mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
Initial commit
Change-Id: I4bf1707bd3dfeadf2c17b0a7daff372b1925ebbd
This commit is contained in:
92
.clang-format
Normal file
92
.clang-format
Normal file
@@ -0,0 +1,92 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: LLVM
|
||||
AccessModifierOffset: -2
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignConsecutiveDeclarations: false
|
||||
AlignEscapedNewlinesLeft: false
|
||||
AlignOperands: true
|
||||
AlignTrailingComments: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: false
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
AlwaysBreakTemplateDeclarations: false
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BraceWrapping:
|
||||
AfterClass: false
|
||||
AfterControlStatement: false
|
||||
AfterEnum: false
|
||||
AfterFunction: false
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
IndentBraces: false
|
||||
BreakBeforeBinaryOperators: false
|
||||
BreakBeforeBraces: Attach
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
# clang-format > v3.8.0: BreakAfterJavaFieldAnnotations: false
|
||||
# clang-format > v3.8.0: BreakStringLiterals: true
|
||||
ColumnLimit: 0
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: true
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
|
||||
IncludeCategories:
|
||||
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
||||
Priority: 2
|
||||
- Regex: '^(<|"(gtest|isl|json)/)'
|
||||
Priority: 3
|
||||
- Regex: '.*'
|
||||
Priority: 1
|
||||
IndentCaseLabels: false
|
||||
IndentWidth: 4
|
||||
IndentWrappedFunctionNames: false
|
||||
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: None
|
||||
ObjCBlockIndentWidth: 2
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PenaltyBreakBeforeFirstCallParameter: 19
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 60
|
||||
PointerAlignment: Right
|
||||
ReflowComments: true
|
||||
SortIncludes: false
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 1
|
||||
SpacesInAngles: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
Standard: Cpp11
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
...
|
||||
|
||||
40
.clang-tidy
Normal file
40
.clang-tidy
Normal file
@@ -0,0 +1,40 @@
|
||||
---
|
||||
Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,readability-identifier-naming,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,-clang-analyzer-core.StackAddressEscape,-clang-analyzer-optin.performance.Padding,-clang-analyzer-cplusplus.NewDeleteLeaks'
|
||||
# WarningsAsErrors: '.*'
|
||||
HeaderFilterRegex: 'runtime/'
|
||||
AnalyzeTemporaryDtors: false
|
||||
CheckOptions:
|
||||
- key: google-readability-braces-around-statements.ShortStatementLines
|
||||
value: '1'
|
||||
- key: google-readability-function-size.StatementThreshold
|
||||
value: '800'
|
||||
- key: google-readability-namespace-comments.ShortNamespaceLines
|
||||
value: '10'
|
||||
- key: google-readability-namespace-comments.SpacesBeforeComments
|
||||
value: '2'
|
||||
- key: readability-identifier-naming.MethodCase
|
||||
value: camelBack
|
||||
- key: readability-identifier-naming.ParameterCase
|
||||
value: camelBack
|
||||
- key: readability-identifier-naming.StructMemberCase
|
||||
value: camelBack
|
||||
- key: readability-identifier-naming.ClassMemberCase
|
||||
value: camelBack
|
||||
- key: readability-identifier-naming.ClassMethodCase
|
||||
value: camelBack
|
||||
- key: modernize-loop-convert.MaxCopySize
|
||||
value: '16'
|
||||
- key: modernize-loop-convert.MinConfidence
|
||||
value: reasonable
|
||||
- key: modernize-loop-convert.NamingStyle
|
||||
value: CamelCase
|
||||
- key: modernize-pass-by-value.IncludeStyle
|
||||
value: llvm
|
||||
- key: modernize-replace-auto-ptr.IncludeStyle
|
||||
value: llvm
|
||||
- key: modernize-use-nullptr.NullMacros
|
||||
value: 'NULL'
|
||||
- key: modernize-use-default-member-init.UseAssignment
|
||||
value: '1'
|
||||
...
|
||||
|
||||
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@@ -0,0 +1 @@
|
||||
manifests/manifest.yml filter=repo_converter
|
||||
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
build/*
|
||||
build_linux/*
|
||||
550
CMakeLists.txt
Normal file
550
CMakeLists.txt
Normal file
@@ -0,0 +1,550 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# We require cmake 3.2.0 or later
|
||||
cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR)
|
||||
include(ExternalProject)
|
||||
|
||||
project(igdrcl)
|
||||
|
||||
if(TR_DEPRECATED)
|
||||
add_definitions(-D_SILENCE_TR1_NAMESPACE_DEPRECATION_WARNING=1)
|
||||
endif(TR_DEPRECATED)
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type: [Release, Release-Internal, Debug]")
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE-INTERNAL ${CMAKE_C_FLAGS_RELEASE})
|
||||
set(CMAKE_CXX_FLAGS_RELEASE-INTERNAL ${CMAKE_CXX_FLAGS_RELEASE})
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE-INTERNAL ${CMAKE_SHARED_LINKER_FLAGS_RELEASE})
|
||||
set(CMAKE_EXE_LINKER_FLAGS_RELEASE-INTERNAL ${CMAKE_EXE_LINKER_FLAGS_RELEASE})
|
||||
|
||||
string(TOLOWER "${CMAKE_BUILD_TYPE}" BUILD_TYPE_lower)
|
||||
if("${BUILD_TYPE_lower}" STREQUAL "release-internal")
|
||||
add_definitions(-D_RELEASE_INTERNAL)
|
||||
endif("${BUILD_TYPE_lower}" STREQUAL "release-internal")
|
||||
|
||||
|
||||
message(STATUS "${CMAKE_BUILD_TYPE} build configuration")
|
||||
|
||||
# Set the runtime source directory
|
||||
if(NOT DEFINED IGDRCL_SOURCE_DIR)
|
||||
set(IGDRCL_SOURCE_DIR ${CMAKE_SOURCE_DIR})
|
||||
endif()
|
||||
|
||||
# Set our build directory
|
||||
if(NOT DEFINED IGDRCL_BUILD_DIR)
|
||||
set(IGDRCL_BUILD_DIR ${CMAKE_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
if(NOT IGDRCL_BINARY_DIR)
|
||||
set(IGDRCL_BINARY_DIR ${CMAKE_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
# we use c++11
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
if(NOT GTEST_SRC_DIR)
|
||||
set(GTEST_SRC_DIR_tmp "${CMAKE_SOURCE_DIR}/../gtest")
|
||||
get_filename_component(GTEST_SRC_DIR ${GTEST_SRC_DIR_tmp} ABSOLUTE)
|
||||
set(GMOCK_SRC_DIR_tmp "${CMAKE_SOURCE_DIR}/../gmock")
|
||||
get_filename_component(GMOCK_SRC_DIR ${GMOCK_SRC_DIR_tmp} ABSOLUTE)
|
||||
else(NOT GTEST_SRC_DIR)
|
||||
get_filename_component(GTEST_SRC_DIR ${GTEST_SRC_DIR} ABSOLUTE)
|
||||
set(GMOCK_SRC_DIR_tmp "${GTEST_SRC_DIR}/../gmock")
|
||||
get_filename_component(GMOCK_SRC_DIR ${GMOCK_SRC_DIR_tmp} ABSOLUTE)
|
||||
endif(NOT GTEST_SRC_DIR)
|
||||
set(GTEST_INCLUDE_DIR "${GTEST_SRC_DIR}/include")
|
||||
set(GMOCK_INCLUDE_DIR "${GMOCK_SRC_DIR}/include")
|
||||
message(STATUS "Google Test source dir: ${GTEST_SRC_DIR}")
|
||||
message(STATUS "Google Mock source dir: ${GMOCK_SRC_DIR}")
|
||||
add_subdirectory(${GMOCK_SRC_DIR} ${IGDRCL_BINARY_DIR}/gmock)
|
||||
set_target_properties(gtest PROPERTIES CXX_STANDARD 11 CXX_STANDARD_REQUIRED ON)
|
||||
set_target_properties(gmock PROPERTIES CXX_STANDARD 11 CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set(gtest_lib gtest)
|
||||
set(gmock_lib gmock)
|
||||
|
||||
if("${BUILD_TYPE_lower}" STREQUAL "release-internal")
|
||||
set(gtest_lib ${IGDRCL_BINARY_DIR}/gmock/gtest/Release/gtest${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
set(gmock_lib ${IGDRCL_BINARY_DIR}/gmock/Release/gmock${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
endif("${BUILD_TYPE_lower}" STREQUAL "release-internal")
|
||||
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(NEO_BITS "64")
|
||||
set(NEO_ARCH "x64")
|
||||
else()
|
||||
set(NEO_BITS "32")
|
||||
set(NEO_ARCH "x86")
|
||||
endif()
|
||||
|
||||
if(NOT ARTIFACTS_DIR)
|
||||
get_filename_component(ARTIFACTS_DIR_tmp "${CMAKE_SOURCE_DIR}/../artifacts" ABSOLUTE)
|
||||
if(IS_DIRECTORY "${ARTIFACTS_DIR_tmp}")
|
||||
set(ARTIFACTS_DIR "${ARTIFACTS_DIR_tmp}")
|
||||
endif()
|
||||
endif(NOT ARTIFACTS_DIR)
|
||||
if(ARTIFACTS_DIR)
|
||||
message(STATUS "Artifact directory is ${ARTIFACTS_DIR}")
|
||||
endif(ARTIFACTS_DIR)
|
||||
|
||||
if(NOT GTPIN_HEADERS_DIR)
|
||||
if ((EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../internal/gtpin/gtpin_dx11_interface.h") AND (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../internal/gtpin/gtpin_driver_common.h"))
|
||||
get_filename_component(GTPIN_HEADERS_DIR "../internal/gtpin/" ABSOLUTE)
|
||||
message(STATUS "GT-Pin headers dir: ${GTPIN_HEADERS_DIR}")
|
||||
endif()
|
||||
endif(NOT GTPIN_HEADERS_DIR)
|
||||
|
||||
if(NOT LIBDRM_DIR)
|
||||
get_filename_component(LIBDRM_DIR "../libdrm/" ABSOLUTE)
|
||||
endif(NOT LIBDRM_DIR)
|
||||
message(STATUS "libdrm dir: ${LIBDRM_DIR}")
|
||||
|
||||
if(NOT KHRONOS_HEADERS_DIR)
|
||||
get_filename_component(KHRONOS_HEADERS_DIR "../khronos/opencl21/" ABSOLUTE)
|
||||
endif(NOT KHRONOS_HEADERS_DIR)
|
||||
message(STATUS "Khronos OpenCL headers dir: ${KHRONOS_HEADERS_DIR}")
|
||||
set(OCL_HEADERS_DIR ${KHRONOS_HEADERS_DIR})
|
||||
|
||||
if(NOT THIRD_PARTY_DIR)
|
||||
get_filename_component(THIRD_PARTY_DIR "../third_party/" ABSOLUTE)
|
||||
endif(NOT THIRD_PARTY_DIR)
|
||||
message(STATUS "Third party dir: ${THIRD_PARTY_DIR}")
|
||||
|
||||
if(ARTIFACTS_DIR)
|
||||
if(NOT IGC_PATH)
|
||||
if(WIN32)
|
||||
file(GLOB_RECURSE IGC_FILE_tmp "${ARTIFACTS_DIR}/igc32.dll")
|
||||
get_filename_component(IGC_PATH ${IGC_FILE_tmp} DIRECTORY)
|
||||
else(WIN32 )
|
||||
file(GLOB_RECURSE IGC_FILE_tmp "${ARTIFACTS_DIR}/libigdccl.so")
|
||||
#exclude those form igdrcl location
|
||||
foreach (TMP_PATH ${IGC_FILE_tmp})
|
||||
string (FIND ${TMP_PATH} ${IGDRCL_BINARY_DIR} EXCLUDE_DIR_FOUND)
|
||||
if (NOT ${EXCLUDE_DIR_FOUND} EQUAL -1)
|
||||
list (REMOVE_ITEM IGC_FILE_tmp ${TMP_PATH})
|
||||
endif ()
|
||||
endforeach(TMP_PATH)
|
||||
get_filename_component(IGC_PATH ${IGC_FILE_tmp} DIRECTORY)
|
||||
endif(WIN32)
|
||||
endif(NOT IGC_PATH)
|
||||
|
||||
if(NOT TARGET igc_dll)
|
||||
add_library(igc_dll UNKNOWN IMPORTED)
|
||||
if(WIN32)
|
||||
set_property(TARGET igc_dll PROPERTY "IMPORTED_LOCATION" "${IGC_PATH}/${CMAKE_SHARED_LIBRARY_PREFIX}igc${IGDRCL_OPTION__BITS}${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
else(WIN32)
|
||||
set_property(TARGET igc_dll PROPERTY "IMPORTED_LOCATION" "${IGC_PATH}/${CMAKE_SHARED_LIBRARY_PREFIX}igdccl${IGDRCL_OPTION__BITS}${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
endif(WIN32)
|
||||
endif()
|
||||
list(APPEND IGDRCL__IGC_TARGETS "igc_dll")
|
||||
|
||||
if(NOT TARGET fcl_dll)
|
||||
add_library(fcl_dll UNKNOWN IMPORTED)
|
||||
set_property(TARGET fcl_dll PROPERTY "IMPORTED_LOCATION" "${IGC_PATH}/${CMAKE_SHARED_LIBRARY_PREFIX}igdfcl${IGDRCL_OPTION__BITS}${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
endif()
|
||||
list(APPEND IGDRCL__IGC_TARGETS "fcl_dll")
|
||||
|
||||
if(NOT TARGET iga_dll)
|
||||
add_library(iga_dll UNKNOWN IMPORTED)
|
||||
set_property(TARGET iga_dll PROPERTY "IMPORTED_LOCATION" "${IGC_PATH}/${CMAKE_SHARED_LIBRARY_PREFIX}iga${NEO_BITS}${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
endif()
|
||||
list(APPEND IGDRCL__IGC_TARGETS "iga_dll")
|
||||
|
||||
if(NOT TARGET common_clang_dll)
|
||||
add_library(common_clang_dll UNKNOWN IMPORTED)
|
||||
set_property(TARGET common_clang_dll PROPERTY "IMPORTED_LOCATION" "${IGC_PATH}/${CMAKE_SHARED_LIBRARY_PREFIX}common_clang${IGDRCL_OPTION__BITS}${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
endif()
|
||||
list(APPEND IGDRCL__IGC_TARGETS "common_clang_dll")
|
||||
|
||||
# select proper gmm from artifacts
|
||||
string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_lower)
|
||||
if(NOT GMM_LIB_PATHS)
|
||||
if(WIN32)
|
||||
set(GMM_LIB_PATHS "${ARTIFACTS_DIR}/windows/${CMAKE_BUILD_TYPE_lower}${IGDRCL_OPTION__BITS}/gmmocl")
|
||||
else(WIN32)
|
||||
set(GMM_LIB_PATHS "${ARTIFACTS_DIR}/linux/${CMAKE_BUILD_TYPE_lower}")
|
||||
endif(WIN32)
|
||||
endif()
|
||||
message(STATUS "GmmLib binary path: ${GMM_LIB_PATHS}")
|
||||
|
||||
if(GMM_SOURCE_DIR)
|
||||
get_filename_component(GMM_SOURCE_DIR "${GMM_SOURCE_DIR}" ABSOLUTE)
|
||||
else(GMM_SOURCE_DIR)
|
||||
get_filename_component(GMM_SOURCE_DIR "${CMAKE_SOURCE_DIR}/../gmmlib" ABSOLUTE)
|
||||
endif(GMM_SOURCE_DIR)
|
||||
|
||||
if(NOT IS_DIRECTORY "${GMM_SOURCE_DIR}")
|
||||
message(FATAL_ERROR "GmmLib public API not found!")
|
||||
endif()
|
||||
|
||||
set(GMM_INCLUDE_PATHS
|
||||
"${GMM_SOURCE_DIR}/GmmLib/inc"
|
||||
)
|
||||
set(UMKM_SHAREDDATA_INCLUDE_PATHS
|
||||
"${GMM_SOURCE_DIR}/inc"
|
||||
"${GMM_SOURCE_DIR}/inc/common"
|
||||
)
|
||||
set(IGDRCL__IGC_INCLUDE_DIR ${THIRD_PARTY_DIR})
|
||||
else(ARTIFACTS_DIR)
|
||||
if(GMM_SOURCE_DIR)
|
||||
get_filename_component(GMM_SOURCE_DIR "${GMM_SOURCE_DIR}" ABSOLUTE)
|
||||
else(GMM_SOURCE_DIR)
|
||||
get_filename_component(GMM_SOURCE_DIR_tmp "${CMAKE_SOURCE_DIR}/../gmmlib" ABSOLUTE)
|
||||
if(IS_DIRECTORY "${GMM_SOURCE_DIR_tmp}")
|
||||
set(GMM_SOURCE_DIR "${GMM_SOURCE_DIR_tmp}")
|
||||
endif()
|
||||
endif()
|
||||
if(NOT IS_DIRECTORY "${GMM_SOURCE_DIR}")
|
||||
message(FATAL_ERROR "GmmLib source not found!")
|
||||
endif()
|
||||
message(STATUS "GmmLib source dir is: ${GMM_SOURCE_DIR}")
|
||||
add_subdirectory("${GMM_SOURCE_DIR}" "${IGDRCL_BUILD_DIR}/gmmlib")
|
||||
set(UMKM_SHAREDDATA_INCLUDE_PATHS $<TARGET_PROPERTY:gmm_umd,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
if(IGC_DIR)
|
||||
get_filename_component(IGC_DIR "${IGC_DIR}" ABSOLUTE)
|
||||
else(IGC_DIR)
|
||||
get_filename_component(IGC_DIR_tmp "${CMAKE_SOURCE_DIR}/../igc" ABSOLUTE)
|
||||
if(IS_DIRECTORY "${IGC_DIR_tmp}")
|
||||
set(IGC_DIR "${IGC_DIR_tmp}")
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "IGC source dir is: ${IGC_DIR}")
|
||||
get_filename_component(IGC_PATH "${IGDRCL_BUILD_DIR}/igc" ABSOLUTE)
|
||||
if(IS_DIRECTORY ${IGC_DIR})
|
||||
set(IGC_OPTION__LIBRARY_NAME "igdccl")
|
||||
set(IGC_OPTION__OUTPUT_DIR "${IGC_PATH}")
|
||||
set(IGC_OPTION__INCLUDE_IGC_COMPILER_TOOLS OFF)
|
||||
add_subdirectory("${IGC_DIR}" "${IGDRCL_BUILD_DIR}/igc" EXCLUDE_FROM_ALL)
|
||||
|
||||
set(IGDRCL__IGC_TARGETS "${IGC__IGC_TARGETS}")
|
||||
|
||||
foreach(TARGET_tmp ${IGDRCL__IGC_TARGETS})
|
||||
list(APPEND IGDRCL__IGC_INCLUDE_DIR $<TARGET_PROPERTY:${TARGET_tmp},INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
endforeach(TARGET_tmp)
|
||||
message(STATUS "IGC Includes: ${IGDRCL__IGC_INCLUDE_DIR}")
|
||||
endif()
|
||||
endif(ARTIFACTS_DIR)
|
||||
|
||||
add_definitions(-DGMM_OCL)
|
||||
|
||||
if(IGC_PATH)
|
||||
get_filename_component(IGC_PATH "${IGC_PATH}" ABSOLUTE)
|
||||
message(STATUS "IGC binaries path: ${IGC_PATH}")
|
||||
endif(IGC_PATH)
|
||||
|
||||
# We want to organize our IDE targets into folders
|
||||
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
|
||||
|
||||
# Get available platfroms
|
||||
include(platforms.cmake)
|
||||
|
||||
# Enable/Disable BuiltIns compilatoin during build
|
||||
set(COMPILE_BUILT_INS TRUE CACHE BOOL "Enable built-in kernels compilation")
|
||||
|
||||
# Changing the default executable and library output directories
|
||||
set(IGDRCL_OUTPUT_DIR "${IGDRCL_OPTION__OUTPUT_DIR}")
|
||||
|
||||
# set output paths
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${IGDRCL_BINARY_DIR}/bin)
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${IGDRCL_BINARY_DIR}/bin)
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${IGDRCL_BINARY_DIR}/lib)
|
||||
|
||||
# do not add rpath
|
||||
set(CMAKE_SKIP_RPATH YES CACHE BOOL "" FORCE )
|
||||
|
||||
# Set the configuration type
|
||||
set(CMAKE_CONFIGURATION_TYPES
|
||||
${CMAKE_BUILD_TYPE}
|
||||
)
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_DEBUG")
|
||||
|
||||
option(APPVERIFIER_ALLOWED "allow use of AppVerifier" TRUE)
|
||||
|
||||
option(CCACHE_ALLOWED "allow use of ccache" TRUE)
|
||||
find_program(CCACHE_EXE_FOUND ccache)
|
||||
if(CCACHE_EXE_FOUND AND CCACHE_ALLOWED)
|
||||
message(STATUS "Found ccache: ${CCACHE_EXE_FOUND}")
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
if(NOT WDK_DIR)
|
||||
get_filename_component(WDK_DIR "../wdk" ABSOLUTE)
|
||||
endif(NOT WDK_DIR)
|
||||
message(STATUS "WDK Directory: ${WDK_DIR}")
|
||||
else()
|
||||
find_package(PkgConfig)
|
||||
pkg_check_modules(LIBVA QUIET libva)
|
||||
if(LIBVA_FOUND)
|
||||
add_definitions(-DLIBVA)
|
||||
message(STATUS "Using libva")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Support for WUD
|
||||
macro(ENABLE_WUD)
|
||||
if(MSVC)
|
||||
set(CMAKE_CXX_STANDARD_LIBRARIES "onecore.lib")
|
||||
set(LINKER_FLAGS "")
|
||||
foreach(IT kernel32.lib;user32.lib;gdi32.lib;advapi32.lib;ole32.lib;)
|
||||
set(LINKER_FLAGS "${LINKER_FLAGS} /NODEFAULTLIB:${IT}")
|
||||
endforeach()
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${LINKER_FLAGS}")
|
||||
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${LINKER_FLAGS}")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${LINKER_FLAGS}")
|
||||
add_definitions(-DUNICODE -D_UNICODE)
|
||||
link_directories("${WDK_DIR}/Win15/Lib/${WindowsTargetPlatformVersion}/um/${NEO_ARCH}/")
|
||||
endif(MSVC)
|
||||
endmacro(ENABLE_WUD)
|
||||
|
||||
# Miscs options
|
||||
option(IGDRCL_GCOV "generate gcov report" OFF)
|
||||
option(HAVE_TBX_SERVER "Compile TBX server from TbxAccess library" OFF)
|
||||
option(USE_CL_CACHE "Use OpenCL program binary cache" ON)
|
||||
set(CL_CACHE_LOCATION "cl_cache" CACHE STRING "OpenCL program binary cache location")
|
||||
|
||||
if(NOT NEO_DRIVER_VERSION)
|
||||
set(NEO_DRIVER_VERSION 1.0)
|
||||
endif()
|
||||
|
||||
# Put profiling enable flag into define
|
||||
if(OCL_RUNTIME_PROFILING)
|
||||
add_definitions(-DOCL_RUNTIME_PROFILING=${OCL_RUNTIME_PROFILING})
|
||||
endif()
|
||||
|
||||
# We want to build with the static, multithreaded runtime libraries (as opposed
|
||||
# to the multithreaded runtime DLLs)
|
||||
if(MSVC)
|
||||
# Get WDK version from ${WDK_DIR}/Win15/WDKVersion.txt
|
||||
file(READ "${WDK_DIR}/Win15/WDKVersion.txt" WindowsTargetPlatformVersion)
|
||||
string(REPLACE " " ";" WindowsTargetPlatformVersion ${WindowsTargetPlatformVersion})
|
||||
list(LENGTH WindowsTargetPlatformVersion versionListLength)
|
||||
if(NOT versionListLength EQUAL 3)
|
||||
MESSAGE(ERROR "Error reading content of WDKVersion.txt file")
|
||||
endif(NOT versionListLength EQUAL 3)
|
||||
list(GET WindowsTargetPlatformVersion 2 WindowsTargetPlatformVersion)
|
||||
|
||||
message(STATUS "WDK Version is ${WindowsTargetPlatformVersion}")
|
||||
|
||||
set(WDK_INCLUDE_PATHS
|
||||
"${WDK_DIR}/Win15/Include/${WindowsTargetPlatformVersion}/um"
|
||||
"${WDK_DIR}/Win15/Include/${WindowsTargetPlatformVersion}/shared"
|
||||
"${WDK_DIR}/Win15/Include/${WindowsTargetPlatformVersion}/km"
|
||||
)
|
||||
# Force to treat warnings as errors
|
||||
if(NOT CMAKE_CXX_FLAGS MATCHES "/WX")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX")
|
||||
endif()
|
||||
MESSAGE(STATUS "WDK include paths: ${WDK_INCLUDE_PATHS}")
|
||||
|
||||
string(REPLACE "/MDd" "/MTd" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
||||
string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
||||
string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE-INTERNAL "${CMAKE_CXX_FLAGS_RELEASE-INTERNAL}")
|
||||
else()
|
||||
if(IGDRCL_GCOV)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage --coverage")
|
||||
endif()
|
||||
option(USE_ASAN "Link with address sanitization support" OFF)
|
||||
if(USE_ASAN)
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
set(ASAN_FLAGS " -fsanitize=address -fno-omit-frame-pointer")
|
||||
link_libraries(asan)
|
||||
else()
|
||||
message(STATUS "Address sanitization with clang not yet support")
|
||||
endif()
|
||||
endif()
|
||||
if(USE_TSAN)
|
||||
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
set(TSAN_FLAGS " -fsanitize=thread")
|
||||
link_libraries(tsan)
|
||||
else()
|
||||
message(STATUS "Thread sanitization with gcc is not fully supported")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(CheckLibraryExists)
|
||||
CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" HAVE_CLOCK_GETTIME)
|
||||
if(HAVE_CLOCK_GETTIME)
|
||||
link_libraries(rt)
|
||||
endif(HAVE_CLOCK_GETTIME)
|
||||
endif(MSVC)
|
||||
|
||||
# setup variables needed for custom configuration type
|
||||
# generate PDB files even for release build on MSVC
|
||||
if(MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftemplate-depth=1024")
|
||||
endif(NOT MSVC)
|
||||
|
||||
# Compiler warning flags
|
||||
if(NOT MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wempty-body")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wignored-qualifiers")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wtype-limits")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
|
||||
|
||||
if("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" )
|
||||
# clang only
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshorten-64-to-32")
|
||||
if(USE_SANITIZE_UB)
|
||||
message(STATUS "Enabling undefined behavior sanitizer")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize-recover=undefined -fsanitize-recover=vptr -fno-rtti")
|
||||
endif(USE_SANITIZE_UB)
|
||||
if (NOT (CMAKE_C_COMPILER_VERSION VERSION_LESS 3.6))
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs")
|
||||
endif()
|
||||
if (NOT (CMAKE_C_COMPILER_VERSION VERSION_LESS 4.0))
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-register") # Added for htons()
|
||||
endif()
|
||||
else()
|
||||
# gcc only
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs -Wno-unused-but-set-variable")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wclobbered")
|
||||
if (CMAKE_C_COMPILER_VERSION VERSION_LESS 7.0)
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wimplicit-fallthrough=4")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-noexcept-type") # Added for gtest
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
|
||||
endif()
|
||||
|
||||
# Compile code with defenses enabled (settings to be used for production release code)
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
if(MSVC)
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GS")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /sdl")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /NXCompat")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DynamicBase")
|
||||
if("${NEO_ARCH}" STREQUAL "x32")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /SafeSEH")
|
||||
endif()
|
||||
else()
|
||||
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -D_FORTIFY_SOURCE=2")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security")
|
||||
else()
|
||||
# gcc, g++ only
|
||||
if (CMAKE_C_COMPILER_VERSION VERSION_LESS 4.9)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -D_FORTIFY_SOURCE=2")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -Wl,-z,noexecstack")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -Wl,-z,relro")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -Wl,-z,now")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Project-wide include paths
|
||||
include_directories(${IGDRCL_SOURCE_DIR})
|
||||
include_directories(${IGDRCL_BUILD_DIR})
|
||||
|
||||
# Define where to put binaries
|
||||
if(MSVC)
|
||||
if ("${CMAKE_GENERATOR}" STREQUAL "Ninja")
|
||||
set(TargetDir ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
||||
else()
|
||||
set(TargetDir ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${CMAKE_BUILD_TYPE})
|
||||
endif()
|
||||
else()
|
||||
set(TargetDir ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
||||
endif(MSVC)
|
||||
|
||||
add_subdirectory(offline_compiler ${IGDRCL_BUILD_DIR}/offline_compiler)
|
||||
target_compile_definitions(cloc PUBLIC MOCKABLE_VIRTUAL=)
|
||||
|
||||
macro(generate_runtime_lib LIB_NAME MOCKABLE GENERATE_EXEC)
|
||||
set(NEO_STATIC_LIB_NAME ${LIB_NAME})
|
||||
set(SHARINGS_ENABLE_LIB_NAME "${LIB_NAME}_sharings_enable")
|
||||
set(GENERATE_EXECUTABLE ${GENERATE_EXEC})
|
||||
|
||||
add_subdirectory(runtime "${IGDRCL_BUILD_DIR}/${LIB_NAME}")
|
||||
target_compile_definitions(${BUILTINS_SOURCES_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
|
||||
target_compile_definitions(${BUILTINS_BINARIES_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
|
||||
target_compile_definitions(${SCHEDULER_BINARY_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
|
||||
|
||||
|
||||
if(${MOCKABLE})
|
||||
target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual)
|
||||
target_compile_definitions(${SHARINGS_ENABLE_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual)
|
||||
target_compile_definitions(${LIB_NAME} PUBLIC DEFAULT_TEST_PLATFORM=${DEFAULT_TESTED_PLATFORM} DEFAULT_TEST_FAMILY_NAME=${DEFAULT_TESTED_FAMILY_NAME})
|
||||
else()
|
||||
target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
|
||||
target_compile_definitions(${SHARINGS_ENABLE_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=)
|
||||
endif()
|
||||
endmacro(generate_runtime_lib)
|
||||
|
||||
set(NEO_MOCKABLE_LIB_NAME "igdrcl_lib_mockable") # Used by ULTS
|
||||
set(NEO_RELEASE_LIB_NAME "igdrcl_lib_release") # Used by dll/so
|
||||
set(NEO_DYNAMIC_LIB_NAME "igdrcl_dll") # single NEO dll (when WUD-crosscompilation is disabled)
|
||||
set(NEO_DLL_NAME_BASE "igdrcl")
|
||||
|
||||
set(BIKSIM_LIB_NAME "biksim")
|
||||
set(BUILTINS_SOURCES_LIB_NAME "builtins_sources")
|
||||
set(BUILTINS_BINARIES_LIB_NAME "builtins_binaries")
|
||||
set(SCHEDULER_BINARY_LIB_NAME "scheduler_binary")
|
||||
|
||||
add_subdirectory(elf)
|
||||
generate_runtime_lib(${NEO_RELEASE_LIB_NAME} FALSE TRUE)
|
||||
generate_runtime_lib(${NEO_MOCKABLE_LIB_NAME} TRUE FALSE)
|
||||
|
||||
if(EXISTS ../icd)
|
||||
add_subdirectory(../icd ${IGDRCL_BINARY_DIR}/icd)
|
||||
endif(EXISTS ../icd)
|
||||
|
||||
if(TARGET OpenCL)
|
||||
target_include_directories(OpenCL PRIVATE ${KHRONOS_HEADERS_DIR})
|
||||
endif()
|
||||
|
||||
if(DEFAULT_TESTED_PLATFORM)
|
||||
add_subdirectory(unit_tests ${IGDRCL_BUILD_DIR}/unit_tests)
|
||||
endif()
|
||||
|
||||
set(DONT_LINK_ELF_STATICALLY TRUE)
|
||||
if(EXISTS ${IGDRCL_SOURCE_DIR}/../internal)
|
||||
add_subdirectory(${IGDRCL_SOURCE_DIR}/../internal ${IGDRCL_BUILD_DIR}/internal)
|
||||
endif(EXISTS ${IGDRCL_SOURCE_DIR}/../internal)
|
||||
|
||||
set(CL_USE_DEPRECATED_OPENCL_1_1_APIS 1)
|
||||
set(CL_USE_DEPRECATED_OPENCL_1_2_APIS 1)
|
||||
set(CL_USE_DEPRECATED_OPENCL_2_0_APIS 1)
|
||||
set(_CRT_SECURE_NO_WARNINGS 1)
|
||||
|
||||
include(package.cmake)
|
||||
|
||||
configure_file(config.h.in ${IGDRCL_BUILD_DIR}/config.h)
|
||||
5
Jenkinsfile
vendored
Normal file
5
Jenkinsfile
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
#!groovy
|
||||
neoDependenciesRev='716918-671'
|
||||
strategy='EQUAL'
|
||||
allowedF=49
|
||||
allowedCD=381
|
||||
76
README.md
Normal file
76
README.md
Normal file
@@ -0,0 +1,76 @@
|
||||
# Intel(R) Graphics Compute Runtime for OpenCL(TM)
|
||||
|
||||
## Introduction
|
||||
|
||||
The Intel(R) Graphics Compute Runtime for OpenCL(TM) is a open source project to
|
||||
converge Intel's development efforts on OpenCL(TM) compute stacks supporting the
|
||||
GEN graphics hardware architecture.
|
||||
|
||||
Please refer to http://01.org/compute-runtime for additional details regarding Intel's
|
||||
motivation and intentions wrt OpenCL support in the open source.
|
||||
|
||||
## License
|
||||
|
||||
The Intel(R) Graphics Compute Runtime for OpenCL(TM) is distributed under the MIT.
|
||||
|
||||
You may obtain a copy of the License at:
|
||||
|
||||
https://opensource.org/licenses/MIT
|
||||
|
||||
## Building
|
||||
|
||||
<TODO:insert instructions here>
|
||||
|
||||
|
||||
### Install
|
||||
|
||||
<TODO:insert instructions here>
|
||||
|
||||
## Supported Platforms
|
||||
|
||||
Intel Core Processors supporting Gen8 graphics devices - OpenCL 2.0
|
||||
Intel Core Processors supporting Gen9 graphics devices - OpenCL 2.1
|
||||
Intel Atom Processors supporting Gen9 graphics devices - OpenCL 1.2
|
||||
|
||||
## How to provide feedback
|
||||
|
||||
By default, please submit an issue using native github.com interface: https://github.com/intel/compute-runtime/issues.
|
||||
|
||||
|
||||
## How to contribute
|
||||
|
||||
Create a pull request on github.com with your patch. Make sure your change is cleanly building and passing ULTs.
|
||||
A maintainer will contact you if there are questions or concerns.
|
||||
|
||||
|
||||
## Known Issues and Limitations
|
||||
|
||||
OpenCL compliance of a driver built from open-source components should not be
|
||||
assumed by default. Intel will clearly designate / tag specific builds to
|
||||
indicate production quality including formal compliance. Other builds should be
|
||||
considered experimental.
|
||||
|
||||
The driver has the following functional delta compared to previously released drivers:
|
||||
* Intel's closed source SRB5.0 driver (aka Classic)
|
||||
https://software.intel.com/en-us/articles/opencl-drivers#latest_linux_driver
|
||||
* Intel's former open-source Beignet driver
|
||||
https://01.org/beignet
|
||||
|
||||
## Generic extensions
|
||||
* cl_khr_mipmap
|
||||
* cl_khr_mipmap_writes
|
||||
* cl_khr_priority_hints
|
||||
* cl_khr_throttle_hints
|
||||
* cl_khr_fp64
|
||||
## Preview extensions
|
||||
* cl_intelx_video_enhancement
|
||||
* cl_intelx_video_enhancement_camera_pipeline
|
||||
* cl_intelx_video_enhancement_color_pipeline
|
||||
* cl_intelx_hevc_pak
|
||||
## Other capabilities
|
||||
* OpenGL sharing with MESA driver
|
||||
* CL_MEM_SVM_FINE_GRAIN_BUFFER (if using unpatched i915)
|
||||
|
||||
|
||||
___(*) Other names and brands my be claimed as property of others.___
|
||||
|
||||
40
config.h.in
Normal file
40
config.h.in
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
#cmakedefine CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#cmakedefine CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#cmakedefine CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#cmakedefine _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#cmakedefine USE_CL_CACHE
|
||||
#if defined(USE_CL_CACHE)
|
||||
static const bool clCacheEnabled = true;
|
||||
#else
|
||||
static const bool clCacheEnabled = false;
|
||||
#endif
|
||||
|
||||
#cmakedefine CL_CACHE_LOCATION "${CL_CACHE_LOCATION}"
|
||||
|
||||
#endif /* CONFIG_H */
|
||||
38
elf/CMakeLists.txt
Normal file
38
elf/CMakeLists.txt
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# We require cmake 3.2.0 or later
|
||||
cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR)
|
||||
|
||||
add_library(elflib STATIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/reader.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/reader.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/types.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/writer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/writer.h
|
||||
)
|
||||
|
||||
target_include_directories(elflib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_include_directories(elflib PRIVATE ${IGDRCL_SOURCE_DIR})
|
||||
|
||||
set_target_properties(elflib PROPERTIES FOLDER "elflib")
|
||||
|
||||
set_target_properties(elflib PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
254
elf/reader.cpp
Normal file
254
elf/reader.cpp
Normal file
@@ -0,0 +1,254 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "reader.h"
|
||||
#include <string.h>
|
||||
|
||||
namespace CLElfLib {
|
||||
|
||||
/******************************************************************************\
|
||||
Constructor: CElfReader::CElfReader
|
||||
\******************************************************************************/
|
||||
CElfReader::CElfReader(
|
||||
const char *pElfBinary,
|
||||
const size_t elfBinarySize) {
|
||||
m_pNameTable = NULL;
|
||||
m_nameTableSize = 0;
|
||||
m_pElfHeader = (SElf64Header *)pElfBinary;
|
||||
m_pBinary = pElfBinary;
|
||||
|
||||
// get a pointer to the string table
|
||||
if (m_pElfHeader) {
|
||||
getSectionData(
|
||||
m_pElfHeader->SectionNameTableIndex,
|
||||
m_pNameTable, m_nameTableSize);
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Destructor: CElfReader::~CElfReader
|
||||
\******************************************************************************/
|
||||
CElfReader::~CElfReader() {
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: CElfReader::Create
|
||||
\******************************************************************************/
|
||||
CElfReader *CElfReader::create(
|
||||
const char *pElfBinary,
|
||||
const size_t elfBinarySize) {
|
||||
CElfReader *pNewReader = NULL;
|
||||
|
||||
if (isValidElf64(pElfBinary, elfBinarySize)) {
|
||||
pNewReader = new CElfReader(pElfBinary, elfBinarySize);
|
||||
}
|
||||
|
||||
return pNewReader;
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: CElfReader::Delete
|
||||
\******************************************************************************/
|
||||
void CElfReader::destroy(
|
||||
CElfReader *&pElfReader) {
|
||||
if (pElfReader) {
|
||||
delete pElfReader;
|
||||
pElfReader = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: IsValidElf64
|
||||
Description: Determines if a binary is in the ELF64 format checks for
|
||||
invalid offsets.
|
||||
\******************************************************************************/
|
||||
bool CElfReader::isValidElf64(
|
||||
const void *pBinary,
|
||||
const size_t binarySize) {
|
||||
bool retVal = false;
|
||||
SElf64Header *pElf64Header = NULL;
|
||||
SElf64SectionHeader *pSectionHeader = NULL;
|
||||
char *pNameTable = NULL;
|
||||
char *pEnd = NULL;
|
||||
size_t ourSize = 0;
|
||||
size_t entrySize = 0;
|
||||
size_t indexedSectionHeaderOffset = 0;
|
||||
|
||||
// validate header
|
||||
if (pBinary && (binarySize >= sizeof(SElf64Header))) {
|
||||
// calculate a pointer to the end
|
||||
pEnd = (char *)pBinary + binarySize;
|
||||
pElf64Header = (SElf64Header *)pBinary;
|
||||
|
||||
if ((pElf64Header->Identity[ID_IDX_MAGIC0] == ELF_MAG0) &&
|
||||
(pElf64Header->Identity[ID_IDX_MAGIC1] == ELF_MAG1) &&
|
||||
(pElf64Header->Identity[ID_IDX_MAGIC2] == ELF_MAG2) &&
|
||||
(pElf64Header->Identity[ID_IDX_MAGIC3] == ELF_MAG3) &&
|
||||
(pElf64Header->Identity[ID_IDX_CLASS] == EH_CLASS_64)) {
|
||||
ourSize += pElf64Header->ElfHeaderSize;
|
||||
retVal = true;
|
||||
}
|
||||
}
|
||||
|
||||
// validate sections
|
||||
if (retVal == true) {
|
||||
// get the section entry size
|
||||
entrySize = pElf64Header->SectionHeaderEntrySize;
|
||||
|
||||
// get an offset to the name table
|
||||
if (pElf64Header->SectionNameTableIndex <
|
||||
pElf64Header->NumSectionHeaderEntries) {
|
||||
indexedSectionHeaderOffset =
|
||||
(size_t)pElf64Header->SectionHeadersOffset +
|
||||
(pElf64Header->SectionNameTableIndex * entrySize);
|
||||
|
||||
if (((char *)pBinary + indexedSectionHeaderOffset) <= pEnd) {
|
||||
pNameTable = (char *)pBinary + indexedSectionHeaderOffset;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < pElf64Header->NumSectionHeaderEntries; i++) {
|
||||
indexedSectionHeaderOffset = (size_t)pElf64Header->SectionHeadersOffset +
|
||||
(i * entrySize);
|
||||
|
||||
// check section header offset
|
||||
if (((char *)pBinary + indexedSectionHeaderOffset) > pEnd) {
|
||||
retVal = false;
|
||||
break;
|
||||
}
|
||||
|
||||
pSectionHeader = (SElf64SectionHeader *)((char *)pBinary + indexedSectionHeaderOffset);
|
||||
|
||||
// check section data
|
||||
if (((char *)pBinary + pSectionHeader->DataOffset + pSectionHeader->DataSize) > pEnd) {
|
||||
retVal = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// check section name index
|
||||
if ((pNameTable + pSectionHeader->Name) > pEnd) {
|
||||
retVal = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// tally up the sizes
|
||||
ourSize += (size_t)pSectionHeader->DataSize;
|
||||
ourSize += (size_t)entrySize;
|
||||
}
|
||||
|
||||
if (ourSize != binarySize) {
|
||||
retVal = false;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: GetElfHeader
|
||||
Description: Returns a pointer to the requested section header
|
||||
\******************************************************************************/
|
||||
const SElf64Header *CElfReader::getElfHeader() {
|
||||
return m_pElfHeader;
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: GetSectionHeader
|
||||
Description: Returns a pointer to the requested section header
|
||||
\******************************************************************************/
|
||||
const SElf64SectionHeader *CElfReader::getSectionHeader(
|
||||
unsigned int sectionIndex) {
|
||||
SElf64SectionHeader *pSectionHeader = NULL;
|
||||
size_t indexedSectionHeaderOffset = 0;
|
||||
size_t entrySize = m_pElfHeader->SectionHeaderEntrySize;
|
||||
|
||||
if (sectionIndex < m_pElfHeader->NumSectionHeaderEntries) {
|
||||
indexedSectionHeaderOffset = (size_t)m_pElfHeader->SectionHeadersOffset +
|
||||
(sectionIndex * entrySize);
|
||||
|
||||
pSectionHeader = (SElf64SectionHeader *)((char *)m_pElfHeader + indexedSectionHeaderOffset);
|
||||
}
|
||||
|
||||
return pSectionHeader;
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: GetSectionData
|
||||
Description: Returns a pointer to and size of the requested section's
|
||||
data
|
||||
\******************************************************************************/
|
||||
bool CElfReader::getSectionData(
|
||||
const unsigned int sectionIndex,
|
||||
char *&pData,
|
||||
size_t &dataSize) {
|
||||
const SElf64SectionHeader *pSectionHeader = getSectionHeader(sectionIndex);
|
||||
|
||||
if (pSectionHeader) {
|
||||
pData = (char *)m_pBinary + pSectionHeader->DataOffset;
|
||||
dataSize = (size_t)pSectionHeader->DataSize;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: GetSectionData
|
||||
Description: Returns a pointer to and size of the requested section's
|
||||
data
|
||||
\******************************************************************************/
|
||||
bool CElfReader::getSectionData(
|
||||
const char *pName,
|
||||
char *&pData,
|
||||
size_t &dataSize) {
|
||||
const char *pSectionName = NULL;
|
||||
|
||||
for (unsigned int i = 1; i < m_pElfHeader->NumSectionHeaderEntries; i++) {
|
||||
pSectionName = getSectionName(i);
|
||||
|
||||
if (pSectionName && (strcmp(pName, pSectionName) == 0)) {
|
||||
getSectionData(i, pData, dataSize);
|
||||
return true;
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: GetSectionName
|
||||
Description: Returns a pointer to a NULL terminated string
|
||||
\******************************************************************************/
|
||||
const char *CElfReader::getSectionName(
|
||||
unsigned int sectionIndex) {
|
||||
char *pName = NULL;
|
||||
const SElf64SectionHeader *pSectionHeader = getSectionHeader(sectionIndex);
|
||||
|
||||
if (pSectionHeader) {
|
||||
pName = m_pNameTable + pSectionHeader->Name;
|
||||
}
|
||||
|
||||
return pName;
|
||||
}
|
||||
|
||||
} // namespace OclElfLib
|
||||
85
elf/reader.h
Normal file
85
elf/reader.h
Normal file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define ELF_CALL __stdcall
|
||||
#else
|
||||
#define ELF_CALL
|
||||
#endif
|
||||
|
||||
namespace CLElfLib {
|
||||
/******************************************************************************\
|
||||
|
||||
Class: CElfReader
|
||||
|
||||
Description: Class to provide simpler interaction with the ELF standard
|
||||
binary object. SElf64Header defines the ELF header type and
|
||||
SElf64SectionHeader defines the section header type.
|
||||
|
||||
\******************************************************************************/
|
||||
class CElfReader {
|
||||
public:
|
||||
static CElfReader *ELF_CALL create(
|
||||
const char *pElfBinary,
|
||||
const size_t elfBinarySize);
|
||||
|
||||
static void ELF_CALL destroy(
|
||||
CElfReader *&pElfObject);
|
||||
|
||||
static bool ELF_CALL isValidElf64(
|
||||
const void *pBinary,
|
||||
const size_t binarySize);
|
||||
|
||||
const SElf64Header *ELF_CALL getElfHeader();
|
||||
|
||||
const SElf64SectionHeader *ELF_CALL getSectionHeader(
|
||||
unsigned int sectionIndex);
|
||||
|
||||
const char *ELF_CALL getSectionName(
|
||||
unsigned int sectionIndex);
|
||||
|
||||
bool ELF_CALL getSectionData(
|
||||
const unsigned int sectionIndex,
|
||||
char *&pData,
|
||||
size_t &dataSize);
|
||||
|
||||
bool ELF_CALL getSectionData(
|
||||
const char *sectionName,
|
||||
char *&pData,
|
||||
size_t &dataSize);
|
||||
|
||||
protected:
|
||||
ELF_CALL CElfReader(
|
||||
const char *pElfBinary,
|
||||
const size_t elfBinarySize);
|
||||
|
||||
ELF_CALL ~CElfReader();
|
||||
|
||||
SElf64Header *m_pElfHeader; // pointer to the ELF header
|
||||
const char *m_pBinary; // portable ELF binary
|
||||
char *m_pNameTable; // pointer to the string table
|
||||
size_t m_nameTableSize; // size of string table in bytes
|
||||
};
|
||||
} // namespace CLElfLib
|
||||
201
elf/types.h
Normal file
201
elf/types.h
Normal file
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// Abstract: Defines the types used for ELF headers/sections.
|
||||
#pragma once
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
|
||||
namespace CLElfLib {
|
||||
/******************************************************************************\
|
||||
ELF Enumerates
|
||||
\******************************************************************************/
|
||||
|
||||
// E_ID_IDX - Defines a file as being ELF
|
||||
enum E_ID_IDX {
|
||||
ID_IDX_MAGIC0 = 0,
|
||||
ID_IDX_MAGIC1 = 1,
|
||||
ID_IDX_MAGIC2 = 2,
|
||||
ID_IDX_MAGIC3 = 3,
|
||||
ID_IDX_CLASS = 4,
|
||||
ID_IDX_VERSION = 5,
|
||||
ID_IDX_OSABI = 6,
|
||||
ID_IDX_ABI_VERSION = 7,
|
||||
ID_IDX_PADDING = 8,
|
||||
ID_IDX_NUM_BYTES = 16,
|
||||
};
|
||||
|
||||
// E_EHT_CLASS - Describes what data types the ELF structures will use.
|
||||
enum E_EH_CLASS {
|
||||
EH_CLASS_NONE = 0,
|
||||
EH_CLASS_32 = 1, // Use Elf32 data types
|
||||
EH_CLASS_64 = 2, // Use Elf64 data types
|
||||
};
|
||||
|
||||
// E_EHT_TYPE - List of pre-defined types header types.
|
||||
// OS-specific codes start at 0xfe00 and run to 0xfeff.
|
||||
// Processor-specific codes start at 0xff00 and end at 0xffff.
|
||||
enum E_EH_TYPE {
|
||||
EH_TYPE_NONE = 0,
|
||||
EH_TYPE_RELOCATABLE = 1,
|
||||
EH_TYPE_EXECUTABLE = 2,
|
||||
EH_TYPE_DYNAMIC = 3,
|
||||
EH_TYPE_CORE = 4,
|
||||
EH_TYPE_OPENCL_SOURCE = 0xff01, // format used to pass CL text sections to FE
|
||||
EH_TYPE_OPENCL_OBJECTS = 0xff02, // format used to pass LLVM objects / store LLVM binary output
|
||||
EH_TYPE_OPENCL_LIBRARY = 0xff03, // format used to store LLVM archive output
|
||||
EH_TYPE_OPENCL_EXECUTABLE = 0xff04, // format used to store executable output
|
||||
EH_TYPE_OPENCL_DEBUG = 0xff05, // format used to store debug output
|
||||
};
|
||||
|
||||
// E_EH_MACHINE - List of pre-defined machine types.
|
||||
// For OpenCL, currently, we do not need this information, so this is not
|
||||
// fully defined.
|
||||
enum E_EH_MACHINE {
|
||||
EH_MACHINE_NONE = 0,
|
||||
//EHT_MACHINE_LO_RSVD = 1, // Beginning of range of reserved types.
|
||||
//EHT_MACHINE_HI_RSVD = 200, // End of range of reserved types.
|
||||
};
|
||||
|
||||
// E_EHT_VERSION - ELF header version options.
|
||||
enum E_EHT_VERSION {
|
||||
EH_VERSION_INVALID = 0,
|
||||
EH_VERSION_CURRENT = 1,
|
||||
};
|
||||
|
||||
// E_SH_TYPE - List of pre-defined section header types.
|
||||
// Processor-specific codes start at 0xff00 and end at 0xffff.
|
||||
enum E_SH_TYPE {
|
||||
SH_TYPE_NULL = 0,
|
||||
SH_TYPE_PROG_BITS = 1,
|
||||
SH_TYPE_SYM_TBL = 2,
|
||||
SH_TYPE_STR_TBL = 3,
|
||||
SH_TYPE_RELO_ADDS = 4,
|
||||
SH_TYPE_HASH = 5,
|
||||
SH_TYPE_DYN = 6,
|
||||
SH_TYPE_NOTE = 7,
|
||||
SH_TYPE_NOBITS = 8,
|
||||
SH_TYPE_RELO_NO_ADDS = 9,
|
||||
SH_TYPE_SHLIB = 10,
|
||||
SH_TYPE_DYN_SYM_TBL = 11,
|
||||
SH_TYPE_INIT = 14,
|
||||
SH_TYPE_FINI = 15,
|
||||
SH_TYPE_PRE_INIT = 16,
|
||||
SH_TYPE_GROUP = 17,
|
||||
SH_TYPE_SYMTBL_SHNDX = 18,
|
||||
SH_TYPE_OPENCL_SOURCE = 0xff000000, // CL source to link into LLVM binary
|
||||
SH_TYPE_OPENCL_HEADER = 0xff000001, // CL header to link into LLVM binary
|
||||
SH_TYPE_OPENCL_LLVM_TEXT = 0xff000002, // LLVM text
|
||||
SH_TYPE_OPENCL_LLVM_BINARY = 0xff000003, // LLVM byte code
|
||||
SH_TYPE_OPENCL_LLVM_ARCHIVE = 0xff000004, // LLVM archives(s)
|
||||
SH_TYPE_OPENCL_DEV_BINARY = 0xff000005, // Device binary (coherent by default)
|
||||
SH_TYPE_OPENCL_OPTIONS = 0xff000006, // CL Options
|
||||
SH_TYPE_OPENCL_PCH = 0xff000007, // PCH (pre-compiled headers)
|
||||
SH_TYPE_OPENCL_DEV_DEBUG = 0xff000008, // Device debug
|
||||
SH_TYPE_SPIRV = 0xff000009, // SPIRV
|
||||
SH_TYPE_NON_COHERENT_DEV_BINARY = 0xff00000a, // Non-coherent Device binary
|
||||
};
|
||||
|
||||
// E_SH_FLAG - List of section header flags.
|
||||
enum E_SH_FLAG {
|
||||
SH_FLAG_WRITE = 0x1,
|
||||
SH_FLAG_ALLOC = 0x2,
|
||||
SH_FLAG_EXEC_INSTR = 0x4,
|
||||
SH_FLAG_MERGE = 0x8,
|
||||
SH_FLAG_STRINGS = 0x10,
|
||||
SH_FLAG_INFO_LINK = 0x20,
|
||||
SH_FLAG_LINK_ORDER = 0x40,
|
||||
SH_FLAG_OS_NONCONFORM = 0x100,
|
||||
SH_FLAG_GROUP = 0x200,
|
||||
SH_FLAG_TLS = 0x400,
|
||||
SH_FLAG_MASK_OS = 0x0ff00000,
|
||||
SH_FLAG_MASK_PROC = 0xf0000000,
|
||||
};
|
||||
|
||||
/******************************************************************************\
|
||||
ELF-64 Data Types
|
||||
\******************************************************************************/
|
||||
#if defined(_MSC_VER) // && (_MSC_VER < 1700)
|
||||
typedef unsigned __int64 Elf64_Addr;
|
||||
typedef unsigned __int64 Elf64_Off;
|
||||
typedef unsigned __int16 Elf64_Short; // Renaming Elf64_Half to Elf64_Short to avoid a conflict with Android
|
||||
typedef unsigned __int32 Elf64_Word;
|
||||
typedef __int32 Elf64_Sword;
|
||||
typedef unsigned __int64 Elf64_Xword;
|
||||
#else
|
||||
#if !defined(_UAPI_LINUX_ELF_H)
|
||||
typedef uint64_t Elf64_Addr;
|
||||
typedef uint64_t Elf64_Off;
|
||||
typedef uint32_t Elf64_Word;
|
||||
typedef int32_t Elf64_Sword;
|
||||
typedef uint64_t Elf64_Xword;
|
||||
#endif
|
||||
typedef uint16_t Elf64_Short; // Renaming Elf64_Half to Elf64_Short to avoid a conflict with Android
|
||||
#endif
|
||||
|
||||
/******************************************************************************\
|
||||
ELF Constants
|
||||
\******************************************************************************/
|
||||
static const unsigned char ELF_MAG0 = 0x7f; // ELFHeader.Identity[ELF_ID_MAGIC0]
|
||||
static const unsigned char ELF_MAG1 = 'E'; // ELFHeader.Identity[ELF_ID_MAGIC1]
|
||||
static const unsigned char ELF_MAG2 = 'L'; // ELFHeader.Identity[ELF_ID_MAGIC2]
|
||||
static const unsigned char ELF_MAG3 = 'F'; // ELFHeader.Identity[ELF_ID_MAGIC3]
|
||||
static const unsigned int ELF_ALIGN_BYTES = 16; // Alignment set to 16-bytes
|
||||
|
||||
/******************************************************************************\
|
||||
ELF-64 Header
|
||||
\******************************************************************************/
|
||||
struct SElf64Header {
|
||||
unsigned char Identity[ID_IDX_NUM_BYTES];
|
||||
Elf64_Short Type;
|
||||
Elf64_Short Machine;
|
||||
Elf64_Word Version;
|
||||
Elf64_Addr EntryAddress;
|
||||
Elf64_Off ProgramHeadersOffset;
|
||||
Elf64_Off SectionHeadersOffset;
|
||||
Elf64_Word Flags;
|
||||
Elf64_Short ElfHeaderSize;
|
||||
Elf64_Short ProgramHeaderEntrySize;
|
||||
Elf64_Short NumProgramHeaderEntries;
|
||||
Elf64_Short SectionHeaderEntrySize;
|
||||
Elf64_Short NumSectionHeaderEntries;
|
||||
Elf64_Short SectionNameTableIndex;
|
||||
};
|
||||
|
||||
/******************************************************************************\
|
||||
ELF-64 Section Header
|
||||
\******************************************************************************/
|
||||
struct SElf64SectionHeader {
|
||||
Elf64_Word Name;
|
||||
Elf64_Word Type;
|
||||
Elf64_Xword Flags;
|
||||
Elf64_Addr Address;
|
||||
Elf64_Off DataOffset;
|
||||
Elf64_Xword DataSize;
|
||||
Elf64_Word Link;
|
||||
Elf64_Word Info;
|
||||
Elf64_Xword Alignment;
|
||||
Elf64_Xword EntrySize;
|
||||
};
|
||||
|
||||
} // namespace ELFlib
|
||||
285
elf/writer.cpp
Normal file
285
elf/writer.cpp
Normal file
@@ -0,0 +1,285 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "writer.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include <cstring>
|
||||
|
||||
namespace CLElfLib {
|
||||
/******************************************************************************\
|
||||
Constructor: CElfWriter::CElfWriter
|
||||
\******************************************************************************/
|
||||
CElfWriter::CElfWriter(
|
||||
E_EH_TYPE type,
|
||||
E_EH_MACHINE machine,
|
||||
Elf64_Xword flags) {
|
||||
m_type = type;
|
||||
m_machine = machine;
|
||||
m_flags = flags;
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Destructor: CElfWriter::~CElfWriter
|
||||
\******************************************************************************/
|
||||
CElfWriter::~CElfWriter() {
|
||||
SSectionNode *pNode = NULL;
|
||||
|
||||
// Walk through the section nodes
|
||||
while (m_nodeQueue.empty() == false) {
|
||||
pNode = m_nodeQueue.front();
|
||||
m_nodeQueue.pop();
|
||||
|
||||
// delete the node and it's data
|
||||
if (pNode) {
|
||||
if (pNode->pData) {
|
||||
delete[] pNode->pData;
|
||||
pNode->pData = NULL;
|
||||
}
|
||||
|
||||
delete pNode;
|
||||
pNode = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: CElfWriter::Create
|
||||
\******************************************************************************/
|
||||
CElfWriter *CElfWriter::create(
|
||||
E_EH_TYPE type,
|
||||
E_EH_MACHINE machine,
|
||||
Elf64_Xword flags) {
|
||||
CElfWriter *pWriter = new CElfWriter(type, machine, flags);
|
||||
|
||||
if (!pWriter->initialize()) {
|
||||
destroy(pWriter);
|
||||
}
|
||||
|
||||
return pWriter;
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: CElfWriter::Delete
|
||||
\******************************************************************************/
|
||||
void CElfWriter::destroy(
|
||||
CElfWriter *&pWriter) {
|
||||
if (pWriter) {
|
||||
delete pWriter;
|
||||
pWriter = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: CElfWriter::AddSection
|
||||
\******************************************************************************/
|
||||
bool CElfWriter::addSection(
|
||||
SSectionNode *pSectionNode) {
|
||||
bool retVal = true;
|
||||
SSectionNode *pNode = NULL;
|
||||
size_t nameSize = 0;
|
||||
unsigned int dataSize = 0;
|
||||
|
||||
// The section header must be non-NULL
|
||||
if (pSectionNode) {
|
||||
pNode = new SSectionNode();
|
||||
if (!pNode)
|
||||
return false;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
pNode->Flags = pSectionNode->Flags;
|
||||
pNode->Type = pSectionNode->Type;
|
||||
|
||||
nameSize = pSectionNode->Name.size() + 1;
|
||||
dataSize = pSectionNode->DataSize;
|
||||
|
||||
pNode->Name = pSectionNode->Name;
|
||||
|
||||
// ok to have NULL data
|
||||
if (dataSize > 0) {
|
||||
pNode->pData = new char[dataSize];
|
||||
if (pNode->pData) {
|
||||
memcpy_s(pNode->pData, dataSize, pSectionNode->pData, dataSize);
|
||||
pNode->DataSize = dataSize;
|
||||
} else {
|
||||
retVal = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (retVal) {
|
||||
// push the node onto the queue
|
||||
m_nodeQueue.push(pNode);
|
||||
|
||||
// increment the sizes for each section
|
||||
m_dataSize += dataSize;
|
||||
m_stringTableSize += nameSize;
|
||||
m_numSections++;
|
||||
} else {
|
||||
delete pNode;
|
||||
pNode = nullptr;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: CElfWriter::ResolveBinary
|
||||
\******************************************************************************/
|
||||
bool CElfWriter::resolveBinary(
|
||||
char *const pBinary,
|
||||
size_t &binarySize) {
|
||||
bool retVal = true;
|
||||
SSectionNode *pNode = NULL;
|
||||
SElf64SectionHeader *pCurSectionHeader = NULL;
|
||||
char *pData = NULL;
|
||||
char *pStringTable = NULL;
|
||||
char *pCurString = NULL;
|
||||
|
||||
m_totalBinarySize =
|
||||
sizeof(SElf64Header) +
|
||||
((m_numSections + 1) * sizeof(SElf64SectionHeader)) + // +1 to account for string table entry
|
||||
m_dataSize +
|
||||
m_stringTableSize;
|
||||
|
||||
if (pBinary) {
|
||||
// get a pointer to the first section header
|
||||
pCurSectionHeader = (SElf64SectionHeader *)(pBinary + sizeof(SElf64Header));
|
||||
|
||||
// get a pointer to the data
|
||||
pData = pBinary +
|
||||
sizeof(SElf64Header) +
|
||||
((m_numSections + 1) * sizeof(SElf64SectionHeader)); // +1 to account for string table entry
|
||||
|
||||
// get a pointer to the string table
|
||||
pStringTable = pBinary + sizeof(SElf64Header) +
|
||||
((m_numSections + 1) * sizeof(SElf64SectionHeader)) + // +1 to account for string table entry
|
||||
m_dataSize;
|
||||
|
||||
pCurString = pStringTable;
|
||||
|
||||
// Walk through the section nodes
|
||||
while (m_nodeQueue.empty() == false) {
|
||||
pNode = m_nodeQueue.front();
|
||||
|
||||
if (pNode) {
|
||||
m_nodeQueue.pop();
|
||||
|
||||
// Copy data into the section header
|
||||
memset(pCurSectionHeader, 0, sizeof(SElf64SectionHeader));
|
||||
pCurSectionHeader->Type = pNode->Type;
|
||||
pCurSectionHeader->Flags = pNode->Flags;
|
||||
pCurSectionHeader->DataSize = pNode->DataSize;
|
||||
pCurSectionHeader->DataOffset = pData - pBinary;
|
||||
pCurSectionHeader->Name = (Elf64_Word)(pCurString - pStringTable);
|
||||
pCurSectionHeader = (SElf64SectionHeader *)((unsigned char *)pCurSectionHeader + sizeof(SElf64SectionHeader));
|
||||
|
||||
// copy the data, move the data pointer
|
||||
memcpy_s(pData, pNode->DataSize, pNode->pData, pNode->DataSize);
|
||||
pData += pNode->DataSize;
|
||||
|
||||
// copy the name into the string table, move the string pointer
|
||||
if (pNode->Name.size() > 0) {
|
||||
memcpy_s(pCurString, pNode->Name.size(), pNode->Name.c_str(), pNode->Name.size());
|
||||
pCurString += pNode->Name.size();
|
||||
}
|
||||
*(pCurString++) = '\0';
|
||||
|
||||
// delete the node and it's data
|
||||
if (pNode->pData) {
|
||||
delete[] pNode->pData;
|
||||
pNode->pData = NULL;
|
||||
}
|
||||
|
||||
delete pNode;
|
||||
pNode = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// add the string table section header
|
||||
SElf64SectionHeader stringSectionHeader = {0};
|
||||
stringSectionHeader.Type = SH_TYPE_STR_TBL;
|
||||
stringSectionHeader.Flags = 0;
|
||||
stringSectionHeader.DataOffset = pStringTable - pBinary;
|
||||
stringSectionHeader.DataSize = m_stringTableSize;
|
||||
stringSectionHeader.Name = 0;
|
||||
|
||||
// Copy into the last section header
|
||||
memcpy_s(pCurSectionHeader, sizeof(SElf64SectionHeader),
|
||||
&stringSectionHeader, sizeof(SElf64SectionHeader));
|
||||
|
||||
// Add to our section number
|
||||
m_numSections++;
|
||||
|
||||
// patch up the ELF header
|
||||
retVal = patchElfHeader(pBinary);
|
||||
}
|
||||
|
||||
if (retVal) {
|
||||
binarySize = m_totalBinarySize;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: CElfWriter::Initialize
|
||||
\******************************************************************************/
|
||||
bool CElfWriter::initialize() {
|
||||
SSectionNode emptySection;
|
||||
|
||||
// Add an empty section 0 (points to "no-bits")
|
||||
return addSection(&emptySection);
|
||||
}
|
||||
|
||||
/******************************************************************************\
|
||||
Member Function: CElfWriter::PatchElfHeader
|
||||
\******************************************************************************/
|
||||
bool CElfWriter::patchElfHeader(char *const pBinary) {
|
||||
SElf64Header *pElfHeader = (SElf64Header *)pBinary;
|
||||
|
||||
if (pElfHeader) {
|
||||
// Setup the identity
|
||||
memset(pElfHeader, 0x00, sizeof(SElf64Header));
|
||||
pElfHeader->Identity[ID_IDX_MAGIC0] = ELF_MAG0;
|
||||
pElfHeader->Identity[ID_IDX_MAGIC1] = ELF_MAG1;
|
||||
pElfHeader->Identity[ID_IDX_MAGIC2] = ELF_MAG2;
|
||||
pElfHeader->Identity[ID_IDX_MAGIC3] = ELF_MAG3;
|
||||
pElfHeader->Identity[ID_IDX_CLASS] = EH_CLASS_64;
|
||||
pElfHeader->Identity[ID_IDX_VERSION] = EH_VERSION_CURRENT;
|
||||
|
||||
// Add other non-zero info
|
||||
pElfHeader->Type = m_type;
|
||||
pElfHeader->Machine = m_machine;
|
||||
pElfHeader->Flags = (unsigned int)m_flags;
|
||||
pElfHeader->ElfHeaderSize = sizeof(SElf64Header);
|
||||
pElfHeader->SectionHeaderEntrySize = sizeof(SElf64SectionHeader);
|
||||
pElfHeader->NumSectionHeaderEntries = (Elf64_Short)m_numSections;
|
||||
pElfHeader->SectionHeadersOffset = (unsigned int)(sizeof(SElf64Header));
|
||||
pElfHeader->SectionNameTableIndex = m_numSections - 1; // last index
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace OclElfLib
|
||||
105
elf/writer.h
Normal file
105
elf/writer.h
Normal file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
#include "types.h"
|
||||
#include <queue>
|
||||
#include <string>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define ELF_CALL __stdcall
|
||||
#else
|
||||
#define ELF_CALL
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace CLElfLib {
|
||||
static const unsigned int g_scElfHeaderAlignment = 16; // allocation alignment restriction
|
||||
static const unsigned int g_scInitialElfSize = 2048; // initial elf size (in bytes)
|
||||
static const unsigned int g_scInitNumSectionHeaders = 8;
|
||||
|
||||
struct SSectionNode {
|
||||
E_SH_TYPE Type;
|
||||
unsigned int Flags;
|
||||
string Name;
|
||||
char *pData;
|
||||
unsigned int DataSize;
|
||||
|
||||
SSectionNode() {
|
||||
Type = SH_TYPE_NULL;
|
||||
Flags = 0;
|
||||
pData = NULL;
|
||||
DataSize = 0;
|
||||
}
|
||||
|
||||
~SSectionNode() {
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************\
|
||||
|
||||
Class: CElfWriter
|
||||
|
||||
Description: Class to provide simpler interaction with the ELF standard
|
||||
binary object. SElf64Header defines the ELF header type and
|
||||
SElf64SectionHeader defines the section header type.
|
||||
|
||||
\******************************************************************************/
|
||||
class CElfWriter {
|
||||
public:
|
||||
static CElfWriter *ELF_CALL create(
|
||||
E_EH_TYPE type,
|
||||
E_EH_MACHINE machine,
|
||||
Elf64_Xword flags);
|
||||
|
||||
static void ELF_CALL destroy(CElfWriter *&pElfWriter);
|
||||
|
||||
bool ELF_CALL addSection(
|
||||
SSectionNode *pSectionNode);
|
||||
|
||||
bool ELF_CALL resolveBinary(
|
||||
char *const pBinary,
|
||||
size_t &dataSize);
|
||||
|
||||
bool ELF_CALL initialize();
|
||||
bool ELF_CALL patchElfHeader(char *const pBinary);
|
||||
|
||||
protected:
|
||||
ELF_CALL CElfWriter(
|
||||
E_EH_TYPE type,
|
||||
E_EH_MACHINE machine,
|
||||
Elf64_Xword flags);
|
||||
|
||||
ELF_CALL ~CElfWriter();
|
||||
|
||||
E_EH_TYPE m_type = EH_TYPE_NONE;
|
||||
E_EH_MACHINE m_machine = EH_MACHINE_NONE;
|
||||
Elf64_Xword m_flags = 0U;
|
||||
|
||||
std::queue<SSectionNode *> m_nodeQueue;
|
||||
|
||||
unsigned int m_dataSize = 0U;
|
||||
unsigned int m_numSections = 0U;
|
||||
size_t m_stringTableSize = 0U;
|
||||
size_t m_totalBinarySize = 0U;
|
||||
};
|
||||
} // namespace ELFLib
|
||||
61
manifests/manifest.yml
Normal file
61
manifests/manifest.yml
Normal file
@@ -0,0 +1,61 @@
|
||||
components:
|
||||
gmmlib:
|
||||
branch: gmmlib
|
||||
clean_on_sync: true
|
||||
dest_dir: gmmlib
|
||||
repository: https://github.com/intel/gmmlib.git
|
||||
revision: 9a261a60bd990b237fe14138b7aaf5eaee342ff8
|
||||
type: git
|
||||
gmock:
|
||||
branch: master
|
||||
clean_on_sync: true
|
||||
dest_dir: gmock
|
||||
repository: https://github.com/google/googlemock.git
|
||||
revision: c440c8fafc6f60301197720617ce64028e09c79d
|
||||
type: git
|
||||
gtest:
|
||||
branch: master
|
||||
clean_on_sync: true
|
||||
dest_dir: gtest
|
||||
repository: https://github.com/google/googletest.git
|
||||
revision: c99458533a9b4c743ed51537e25989ea55944908
|
||||
type: git
|
||||
igc:
|
||||
branch: igc
|
||||
clean_on_sync: true
|
||||
dest_dir: igc
|
||||
repository: https://github.com/intel/intelgraphicscompiler
|
||||
revision: d6379492df107094d0642f0ecf75a6f20ae573b2-2
|
||||
type: git
|
||||
infra:
|
||||
branch: infra
|
||||
clean_on_sync: true
|
||||
dest_dir: infra
|
||||
revision: c81cf66d7995e55cb8f11b24d3776e3fc013a809
|
||||
type: git
|
||||
internal:
|
||||
branch: master
|
||||
dest_dir: internal
|
||||
revision: 93d1c17c98d8c051bcd4368686bc9cf2eddd8f8e
|
||||
type: git
|
||||
khronos:
|
||||
branch: master
|
||||
clean_on_sync: true
|
||||
dest_dir: khronos
|
||||
repository: https://github.com/KhronosGroup/OpenCL-Headers.git
|
||||
revision: f039db6764d52388658ef15c30b2237bbda49803
|
||||
type: git
|
||||
libdrm:
|
||||
branch: libdrm-2.4.84
|
||||
clean_on_sync: true
|
||||
dest_dir: libdrm
|
||||
repository: https://anongit.freedesktop.org/git/mesa/drm.git
|
||||
revision: 290d29d9794813a2fe0578dbb905ad09bc810516
|
||||
type: git
|
||||
wdk:
|
||||
branch: wdk
|
||||
clean_on_sync: true
|
||||
dest_dir: wdk
|
||||
revision: c67a2fa209d3ad3c3ab05f6f10e2234fd81fcebc
|
||||
type: git
|
||||
version: '1'
|
||||
139
offline_compiler/CMakeLists.txt
Normal file
139
offline_compiler/CMakeLists.txt
Normal file
@@ -0,0 +1,139 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR)
|
||||
|
||||
project(cloc)
|
||||
|
||||
set(CLOC_SRCS_LIB
|
||||
${IGDRCL_SOURCE_DIR}/offline_compiler/offline_compiler.cpp
|
||||
${IGDRCL_SOURCE_DIR}/offline_compiler/offline_compiler.h
|
||||
${IGDRCL_SOURCE_DIR}/offline_compiler/options.cpp
|
||||
${IGDRCL_SOURCE_DIR}/offline_compiler/helper.cpp
|
||||
${IGDRCL_SOURCE_DIR}/runtime/compiler_interface/create_main.cpp
|
||||
${IGDRCL_SOURCE_DIR}/runtime/helpers/hw_info.cpp
|
||||
${IGDRCL_SOURCE_DIR}/runtime/helpers/file_io.cpp
|
||||
${IGDRCL_SOURCE_DIR}/runtime/helpers/abort.cpp
|
||||
${IGDRCL_SOURCE_DIR}/runtime/helpers/debug_helpers.cpp
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
list (APPEND CLOC_SRCS_LIB
|
||||
${IGDRCL_SOURCE_DIR}/runtime/os_interface/windows/os_library.cpp
|
||||
${IGDRCL_SOURCE_DIR}/runtime/os_interface/windows/options.cpp
|
||||
)
|
||||
endif (WIN32)
|
||||
|
||||
if (UNIX)
|
||||
list (APPEND CLOC_SRCS_LIB
|
||||
${IGDRCL_SOURCE_DIR}/runtime/os_interface/linux/os_library.cpp
|
||||
${IGDRCL_SOURCE_DIR}/runtime/os_interface/linux/options.cpp
|
||||
)
|
||||
endif (UNIX)
|
||||
|
||||
list (APPEND HW_SRC_INCLUDES ${IGDRCL_SOURCE_DIR}/runtime/gen_common)
|
||||
|
||||
set(CLOC_LIB_FLAGS_DEFINITIONS
|
||||
-DCIF_HEADERS_ONLY_BUILD
|
||||
)
|
||||
|
||||
set(OPTIONAL_RUNTIME_GENX_FILES
|
||||
hw_info.cpp
|
||||
)
|
||||
|
||||
foreach(GEN_NUM RANGE 0 ${MAX_GEN} 1)
|
||||
GEN_CONTAINS_PLATFORMS("SUPPORTED" ${GEN_NUM} GENX_HAS_PLATFORMS)
|
||||
if(${GENX_HAS_PLATFORMS})
|
||||
foreach(SRC_IT ${OPTIONAL_RUNTIME_GENX_FILES})
|
||||
set(SRC_FILE ${IGDRCL_SOURCE_DIR}/runtime/gen${GEN_NUM}/${SRC_IT})
|
||||
if(EXISTS ${SRC_FILE})
|
||||
list(APPEND CLOC_SRCS_LIB ${SRC_FILE})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
GET_PLATFORMS_FOR_GEN("SUPPORTED" ${GEN_NUM} SUPPORTED_GENX_PLATFORMS)
|
||||
foreach(PLATFORM_IT ${SUPPORTED_GENX_PLATFORMS})
|
||||
string(TOLOWER ${PLATFORM_IT} PLATFORM_IT_LOWER)
|
||||
list (APPEND CLOC_SRCS_LIB
|
||||
${IGDRCL_SOURCE_DIR}/runtime/gen${GEN_NUM}/hw_info_${PLATFORM_IT_LOWER}.cpp
|
||||
${IGDRCL_SOURCE_DIR}/runtime/gen${GEN_NUM}/enable_${PLATFORM_IT_LOWER}.cpp
|
||||
)
|
||||
endforeach(PLATFORM_IT)
|
||||
endif(${GENX_HAS_PLATFORMS})
|
||||
endforeach(GEN_NUM)
|
||||
|
||||
set(CLOC_SRCS
|
||||
${CLOC_SRCS_LIB}
|
||||
main.cpp
|
||||
${IGDRCL_SOURCE_DIR}/offline_compiler/CMakeLists.txt
|
||||
)
|
||||
|
||||
add_executable(cloc ${CLOC_SRCS})
|
||||
|
||||
if(IGC_OCL_ADAPTOR_DIR) # IGC/AdaptorOCL
|
||||
target_include_directories(cloc PUBLIC "${IGC_OCL_ADAPTOR_DIR}")
|
||||
endif(IGC_OCL_ADAPTOR_DIR)
|
||||
|
||||
if(CIF_BASE_DIR)
|
||||
target_include_directories(cloc PUBLIC "${CIF_BASE_DIR}")
|
||||
endif(CIF_BASE_DIR)
|
||||
|
||||
SET(CLOC_INCLUDES
|
||||
"${HW_SRC_INCLUDES}"
|
||||
"${UMKM_SHAREDDATA_INCLUDE_PATHS}"
|
||||
"${KHRONOS_HEADERS_DIR}"
|
||||
"${IGDRCL__IGC_INCLUDE_DIR}"
|
||||
"${THIRD_PARTY_DIR}"
|
||||
PARENT_SCOPE
|
||||
)
|
||||
|
||||
target_include_directories(cloc BEFORE PRIVATE
|
||||
"${HW_SRC_INCLUDES}"
|
||||
"${UMKM_SHAREDDATA_INCLUDE_PATHS}"
|
||||
"${KHRONOS_HEADERS_DIR}"
|
||||
"${IGDRCL__IGC_INCLUDE_DIR}"
|
||||
"${THIRD_PARTY_DIR}"
|
||||
)
|
||||
target_compile_definitions(cloc PUBLIC ${CLOC_LIB_FLAGS_DEFINITIONS} ${SUPPORTED_GEN_FLAGS_DEFINITONS} DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM})
|
||||
|
||||
if(UNIX)
|
||||
target_link_libraries(cloc dl pthread)
|
||||
endif(UNIX)
|
||||
|
||||
target_link_libraries(cloc elflib)
|
||||
|
||||
source_group("source files" FILES ${CLOC_SRCS})
|
||||
set_target_properties(cloc PROPERTIES FOLDER "offline_compiler")
|
||||
set_property(TARGET cloc APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS})
|
||||
|
||||
add_custom_target(copy_compiler_files DEPENDS ${IGDRCL__IGC_TARGETS})
|
||||
set_target_properties(copy_compiler_files PROPERTIES FOLDER "opencl runtime")
|
||||
|
||||
foreach(TARGET_tmp ${IGDRCL__IGC_TARGETS})
|
||||
add_custom_command(
|
||||
TARGET copy_compiler_files
|
||||
PRE_BUILD
|
||||
COMMAND echo copying $<TARGET_FILE:${TARGET_tmp}> to "$<TARGET_FILE_DIR:cloc>"
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:${TARGET_tmp}> $<TARGET_FILE_DIR:cloc>
|
||||
)
|
||||
endforeach(TARGET_tmp)
|
||||
|
||||
SET(CLOC_SRCS_LIB ${CLOC_SRCS_LIB} PARENT_SCOPE)
|
||||
SET(CLOC_LIB_FLAGS_DEFINITIONS ${CLOC_LIB_FLAGS_DEFINITIONS} PARENT_SCOPE)
|
||||
42
offline_compiler/create_command_stream.cpp
Normal file
42
offline_compiler/create_command_stream.cpp
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "hw_info.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
|
||||
namespace OCLRT {
|
||||
extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE];
|
||||
|
||||
CommandStreamReceiver *createCommandStream(const HardwareInfo *pHwInfo) {
|
||||
DEBUG_BREAK_IF(nullptr == pHwInfo->pPlatform);
|
||||
auto funcCreate = commandStreamReceiverFactory[IGFX_MAX_CORE + pHwInfo->pPlatform->eRenderCoreFamily];
|
||||
|
||||
return funcCreate ? funcCreate(*pHwInfo) : nullptr;
|
||||
}
|
||||
|
||||
bool getDevices(HardwareInfo **hwInfo, size_t &numDevicesReturned) {
|
||||
*hwInfo = nullptr;
|
||||
numDevicesReturned = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
63
offline_compiler/helper.cpp
Normal file
63
offline_compiler/helper.cpp
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <DebugFunctionalityLevel DebugLevel>
|
||||
DebugSettingsManager<DebugLevel>::DebugSettingsManager() {
|
||||
}
|
||||
|
||||
template <DebugFunctionalityLevel DebugLevel>
|
||||
DebugSettingsManager<DebugLevel>::~DebugSettingsManager() {
|
||||
}
|
||||
|
||||
template <DebugFunctionalityLevel DebugLevel>
|
||||
void DebugSettingsManager<DebugLevel>::writeToFile(std::string filename, const char *str, size_t length, std::ios_base::openmode mode) {
|
||||
std::ofstream outFile(filename, mode);
|
||||
if (outFile.is_open()) {
|
||||
outFile.write(str, length);
|
||||
outFile.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Global Debug Settings Manager
|
||||
DebugSettingsManager<globalDebugFunctionalityLevel> DebugManager;
|
||||
|
||||
// Global table of hardware prefixes
|
||||
const char *hardwarePrefix[IGFX_MAX_PRODUCT] = {
|
||||
nullptr,
|
||||
};
|
||||
|
||||
// Global table of family names
|
||||
const char *familyName[IGFX_MAX_CORE] = {
|
||||
nullptr,
|
||||
};
|
||||
|
||||
// Global table of family names
|
||||
bool familyEnabled[IGFX_MAX_CORE] = {
|
||||
false,
|
||||
};
|
||||
|
||||
} // namespace OCLRT
|
||||
54
offline_compiler/main.cpp
Normal file
54
offline_compiler/main.cpp
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "offline_compiler/offline_compiler.h"
|
||||
#include "runtime/os_interface/os_library.h"
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
int main(int numArgs, const char *argv[]) {
|
||||
int retVal = CL_SUCCESS;
|
||||
OfflineCompiler *pCompiler = OfflineCompiler::create(numArgs, argv, retVal);
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
retVal = pCompiler->build();
|
||||
|
||||
std::string buildLog = pCompiler->getBuildLog();
|
||||
if (buildLog.empty() == false) {
|
||||
printf("%s\n", buildLog.c_str());
|
||||
}
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
if (!pCompiler->isQuiet())
|
||||
printf("Build succeeded.\n");
|
||||
} else {
|
||||
printf("Build failed with error code: %d\n", retVal);
|
||||
}
|
||||
}
|
||||
|
||||
delete pCompiler;
|
||||
return retVal;
|
||||
}
|
||||
781
offline_compiler/offline_compiler.cpp
Normal file
781
offline_compiler/offline_compiler.cpp
Normal file
@@ -0,0 +1,781 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "cif/common/cif_main.h"
|
||||
#include "cif/helpers/error.h"
|
||||
#include "cif/import/library_api.h"
|
||||
#include "ocl_igc_interface/code_type.h"
|
||||
#include "ocl_igc_interface/fcl_ocl_device_ctx.h"
|
||||
#include "ocl_igc_interface/igc_ocl_device_ctx.h"
|
||||
#include "ocl_igc_interface/platform_helper.h"
|
||||
#include "offline_compiler.h"
|
||||
#include "igfxfmid.h"
|
||||
#include "runtime/helpers/file_io.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
#include "runtime/os_interface/os_inc.h"
|
||||
#include "runtime/os_interface/os_library.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/helpers/validators.h"
|
||||
#include "elf/writer.h"
|
||||
#include <iomanip>
|
||||
#include <list>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
#define MakeDirectory _mkdir
|
||||
#define GetCurrentWorkingDirectory _getcwd
|
||||
#else
|
||||
#include <sys/stat.h>
|
||||
#define MakeDirectory(dir) mkdir(dir, 0777)
|
||||
#define GetCurrentWorkingDirectory getcwd
|
||||
#endif
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
CIF::CIFMain *createMainNoSanitize(CIF::CreateCIFMainFunc_t createFunc);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// StringsAreEqual
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool stringsAreEqual(const char *string1, const char *string2) {
|
||||
if (string2 == nullptr)
|
||||
return false;
|
||||
return (strcmp(string1, string2) == 0);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// convertToPascalCase
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
std::string convertToPascalCase(const std::string &inString) {
|
||||
std::string outString;
|
||||
bool capitalize = true;
|
||||
|
||||
for (unsigned int i = 0; i < inString.length(); i++) {
|
||||
if (isalpha(inString[i]) && capitalize == true) {
|
||||
outString += toupper(inString[i]);
|
||||
capitalize = false;
|
||||
} else if (inString[i] == '_') {
|
||||
capitalize = true;
|
||||
} else {
|
||||
outString += inString[i];
|
||||
}
|
||||
}
|
||||
return outString;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// ctor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
OfflineCompiler::OfflineCompiler() = default;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// dtor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
OfflineCompiler::~OfflineCompiler() {
|
||||
delete[] llvmBinary;
|
||||
delete[] genBinary;
|
||||
delete[] elfBinary;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Create
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
OfflineCompiler *OfflineCompiler::create(uint32_t numArgs, const char **argv, int &retVal) {
|
||||
retVal = CL_SUCCESS;
|
||||
auto pOffCompiler = new OfflineCompiler();
|
||||
|
||||
if (pOffCompiler) {
|
||||
retVal = pOffCompiler->initialize(numArgs, argv);
|
||||
}
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
delete pOffCompiler;
|
||||
pOffCompiler = nullptr;
|
||||
}
|
||||
|
||||
return pOffCompiler;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// buildSourceCode
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int OfflineCompiler::buildSourceCode() {
|
||||
int retVal = CL_SUCCESS;
|
||||
|
||||
do {
|
||||
if (strcmp(sourceCode.c_str(), "") == 0) {
|
||||
retVal = CL_INVALID_PROGRAM;
|
||||
break;
|
||||
}
|
||||
UNRECOVERABLE_IF(fclDeviceCtx == nullptr);
|
||||
UNRECOVERABLE_IF(igcDeviceCtx == nullptr);
|
||||
|
||||
IGC::CodeType::CodeType_t intermediateRepresentation = useLlvmText ? IGC::CodeType::llvmLl : IGC::CodeType::llvmBc;
|
||||
auto fclSrc = CIF::Builtins::CreateConstBuffer(fclMain.get(), sourceCode.c_str(), sourceCode.size());
|
||||
auto fclOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), options.c_str(), options.size());
|
||||
auto fclInternalOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), internalOptions.c_str(), internalOptions.size());
|
||||
|
||||
auto fclTranslationCtx = fclDeviceCtx->CreateTranslationCtx(IGC::CodeType::oclC, intermediateRepresentation);
|
||||
auto igcTranslationCtx = igcDeviceCtx->CreateTranslationCtx(intermediateRepresentation, IGC::CodeType::oclGenBin);
|
||||
|
||||
if (false == OCLRT::areNotNullptr(fclSrc.get(), fclOptions.get(), fclInternalOptions.get(),
|
||||
fclTranslationCtx.get(), igcTranslationCtx.get())) {
|
||||
retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
auto fclOutput = fclTranslationCtx->Translate(fclSrc.get(), fclOptions.get(),
|
||||
fclInternalOptions.get(), nullptr, 0);
|
||||
|
||||
if (fclOutput == nullptr) {
|
||||
retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(fclOutput->GetBuildLog() == nullptr);
|
||||
UNRECOVERABLE_IF(fclOutput->GetOutput() == nullptr);
|
||||
|
||||
if (fclOutput->Successful() == false) {
|
||||
updateBuildLog(fclOutput->GetBuildLog()->GetMemory<char>(), fclOutput->GetBuildLog()->GetSizeRaw());
|
||||
retVal = CL_BUILD_PROGRAM_FAILURE;
|
||||
break;
|
||||
}
|
||||
|
||||
storeBinary(llvmBinary, llvmBinarySize, fclOutput->GetOutput()->GetMemory<char>(), fclOutput->GetOutput()->GetSizeRaw());
|
||||
updateBuildLog(fclOutput->GetBuildLog()->GetMemory<char>(), fclOutput->GetBuildLog()->GetSizeRaw());
|
||||
|
||||
auto igcOutput = igcTranslationCtx->Translate(fclOutput->GetOutput(), fclOptions.get(),
|
||||
fclInternalOptions.get(),
|
||||
nullptr, 0);
|
||||
if (igcOutput == nullptr) {
|
||||
retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(igcOutput->GetBuildLog() == nullptr);
|
||||
UNRECOVERABLE_IF(igcOutput->GetOutput() == nullptr);
|
||||
storeBinary(genBinary, genBinarySize, igcOutput->GetOutput()->GetMemory<char>(), igcOutput->GetOutput()->GetSizeRaw());
|
||||
updateBuildLog(igcOutput->GetBuildLog()->GetMemory<char>(), igcOutput->GetBuildLog()->GetSizeRaw());
|
||||
retVal = igcOutput->Successful() ? CL_SUCCESS : CL_BUILD_PROGRAM_FAILURE;
|
||||
} while (0);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// build
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int OfflineCompiler::build() {
|
||||
int retVal = CL_SUCCESS;
|
||||
|
||||
retVal = buildSourceCode();
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
generateElfBinary();
|
||||
writeOutAllFiles();
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// updateBuildLog
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void OfflineCompiler::updateBuildLog(const char *pErrorString, const size_t errorStringSize) {
|
||||
std::string errorString = (errorStringSize && pErrorString) ? std::string(pErrorString, pErrorString + errorStringSize) : "";
|
||||
if (errorString[0] != '\0') {
|
||||
if (buildLog.empty()) {
|
||||
buildLog.assign(errorString);
|
||||
} else {
|
||||
buildLog.append("\n" + errorString);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// getBuildLog
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
std::string &OfflineCompiler::getBuildLog() {
|
||||
return buildLog;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// getHardwareInfo
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int OfflineCompiler::getHardwareInfo(const char *pDeviceName) {
|
||||
int retVal = CL_INVALID_DEVICE;
|
||||
|
||||
for (unsigned int productId = 0; productId < IGFX_MAX_PRODUCT; ++productId) {
|
||||
if (stringsAreEqual(pDeviceName, hardwarePrefix[productId])) {
|
||||
if (hardwareInfoTable[productId]) {
|
||||
hwInfo = hardwareInfoTable[productId];
|
||||
retVal = CL_SUCCESS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// getStringWithinDelimiters
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
std::string OfflineCompiler::getStringWithinDelimiters(const std::string &src) {
|
||||
size_t start = src.find("R\"===(");
|
||||
size_t stop = src.find(")===\"");
|
||||
|
||||
DEBUG_BREAK_IF(std::string::npos == start);
|
||||
DEBUG_BREAK_IF(std::string::npos == stop);
|
||||
|
||||
start += strlen("R\"===(");
|
||||
size_t size = stop - start;
|
||||
|
||||
std::string dst(src, start, size);
|
||||
return dst;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Initialize
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int OfflineCompiler::initialize(uint32_t numArgs, const char **argv) {
|
||||
int retVal = CL_SUCCESS;
|
||||
const char *pSource = nullptr;
|
||||
void *pSourceFromFile = nullptr;
|
||||
size_t sourceFromFileSize = 0;
|
||||
|
||||
retVal = parseCommandLine(numArgs, argv);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
return retVal;
|
||||
}
|
||||
|
||||
parseDebugSettings();
|
||||
|
||||
if (options.empty()) {
|
||||
// try to read options from file if not provided by commandline
|
||||
std::string optionsFileName = inputFile;
|
||||
size_t ext_start = optionsFileName.find(".cl");
|
||||
if (ext_start != std::string::npos) {
|
||||
optionsFileName.replace(ext_start, strlen(".cl"), "_options.txt");
|
||||
void *pOptions = nullptr;
|
||||
size_t optionsSize = loadDataFromFile(optionsFileName.c_str(), pOptions);
|
||||
if (optionsSize > 0) {
|
||||
options = (char *)pOptions;
|
||||
// Remove comment containing copyright header
|
||||
size_t commentBegin = options.find_first_of("/*");
|
||||
size_t commentEnd = options.find_last_of("*/");
|
||||
if (commentBegin != std::string::npos && commentEnd != std::string::npos) {
|
||||
options = options.replace(commentBegin, commentEnd - commentBegin + 1, "");
|
||||
size_t optionsBegin = options.find_first_not_of(" \t\n\r");
|
||||
if (optionsBegin != std::string::npos) {
|
||||
options = options.substr(optionsBegin, options.length());
|
||||
}
|
||||
}
|
||||
auto trimPos = options.find_last_not_of(" \n\r");
|
||||
options = options.substr(0, trimPos + 1);
|
||||
if (!isQuiet())
|
||||
printf("Building with options:\n%s\n", options.c_str());
|
||||
}
|
||||
deleteDataReadFromFile(pOptions);
|
||||
}
|
||||
}
|
||||
|
||||
// set up the device inside the program
|
||||
sourceFromFileSize = loadDataFromFile(inputFile.c_str(), pSourceFromFile);
|
||||
struct Helper {
|
||||
static void deleter(void *ptr) { deleteDataReadFromFile(ptr); }
|
||||
};
|
||||
auto sourceRaii = std::unique_ptr<void, decltype(&Helper::deleter)>{pSourceFromFile, Helper::deleter};
|
||||
if (sourceFromFileSize == 0) {
|
||||
retVal = INVALID_FILE;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
// we also accept files used as runtime builtins
|
||||
pSource = strstr((const char *)pSourceFromFile, "R\"===(");
|
||||
sourceCode = (pSource != nullptr) ? getStringWithinDelimiters((char *)pSourceFromFile) : (char *)pSourceFromFile;
|
||||
|
||||
this->fclLib.reset(OsLibrary::load(Os::frontEndDllName));
|
||||
if (this->fclLib == nullptr) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
auto fclCreateMain = reinterpret_cast<CIF::CreateCIFMainFunc_t>(this->fclLib->getProcAddress(CIF::CreateCIFMainFuncName));
|
||||
if (fclCreateMain == nullptr) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
this->fclMain = CIF::RAII::UPtr(createMainNoSanitize(fclCreateMain));
|
||||
if (this->fclMain == nullptr) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
if (false == this->fclMain->IsCompatible<IGC::FclOclDeviceCtx>()) {
|
||||
// given FCL is not compatible
|
||||
DEBUG_BREAK_IF(true);
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
this->fclDeviceCtx = this->fclMain->CreateInterface<IGC::FclOclDeviceCtxTagOCL>();
|
||||
if (this->fclDeviceCtx == nullptr) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
fclDeviceCtx->SetOclApiVersion(hwInfo->capabilityTable.clVersionSupport * 10);
|
||||
|
||||
this->igcLib.reset(OsLibrary::load(Os::igcDllName));
|
||||
if (this->igcLib == nullptr) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
auto igcCreateMain = reinterpret_cast<CIF::CreateCIFMainFunc_t>(this->igcLib->getProcAddress(CIF::CreateCIFMainFuncName));
|
||||
if (igcCreateMain == nullptr) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
this->igcMain = CIF::RAII::UPtr(createMainNoSanitize(igcCreateMain));
|
||||
if (this->igcMain == nullptr) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
if (false == this->igcMain->IsCompatible<IGC::IgcOclDeviceCtx>()) {
|
||||
// given IGC is not compatible
|
||||
DEBUG_BREAK_IF(true);
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
this->igcDeviceCtx = this->igcMain->CreateInterface<IGC::IgcOclDeviceCtxTagOCL>();
|
||||
if (this->igcDeviceCtx == nullptr) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
this->igcDeviceCtx->SetProfilingTimerResolution(static_cast<float>(hwInfo->capabilityTable.defaultProfilingTimerResolution));
|
||||
auto igcPlatform = this->igcDeviceCtx->GetPlatformHandle();
|
||||
auto igcGtSystemInfo = this->igcDeviceCtx->GetGTSystemInfoHandle();
|
||||
auto igcFeWa = this->igcDeviceCtx->GetIgcFeaturesAndWorkaroundsHandle();
|
||||
if ((igcPlatform == nullptr) || (igcGtSystemInfo == nullptr) || (igcFeWa == nullptr)) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
IGC::PlatformHelper::PopulateInterfaceWith(*igcPlatform.get(), *hwInfo->pPlatform);
|
||||
IGC::GtSysInfoHelper::PopulateInterfaceWith(*igcGtSystemInfo.get(), *hwInfo->pSysInfo);
|
||||
// populate with features
|
||||
igcFeWa.get()->SetFtrDesktop(hwInfo->pSkuTable->ftrDesktop);
|
||||
igcFeWa.get()->SetFtrChannelSwizzlingXOREnabled(hwInfo->pSkuTable->ftrChannelSwizzlingXOREnabled);
|
||||
|
||||
igcFeWa.get()->SetFtrGtBigDie(hwInfo->pSkuTable->ftrGtBigDie);
|
||||
igcFeWa.get()->SetFtrGtMediumDie(hwInfo->pSkuTable->ftrGtMediumDie);
|
||||
igcFeWa.get()->SetFtrGtSmallDie(hwInfo->pSkuTable->ftrGtSmallDie);
|
||||
|
||||
igcFeWa.get()->SetFtrGT1(hwInfo->pSkuTable->ftrGT1);
|
||||
igcFeWa.get()->SetFtrGT1_5(hwInfo->pSkuTable->ftrGT1_5);
|
||||
igcFeWa.get()->SetFtrGT2(hwInfo->pSkuTable->ftrGT2);
|
||||
igcFeWa.get()->SetFtrGT3(hwInfo->pSkuTable->ftrGT3);
|
||||
igcFeWa.get()->SetFtrGT4(hwInfo->pSkuTable->ftrGT4);
|
||||
|
||||
igcFeWa.get()->SetFtrIVBM0M1Platform(hwInfo->pSkuTable->ftrIVBM0M1Platform);
|
||||
igcFeWa.get()->SetFtrGTL(hwInfo->pSkuTable->ftrGT1);
|
||||
igcFeWa.get()->SetFtrGTM(hwInfo->pSkuTable->ftrGT2);
|
||||
igcFeWa.get()->SetFtrGTH(hwInfo->pSkuTable->ftrGT3);
|
||||
|
||||
igcFeWa.get()->SetFtrSGTPVSKUStrapPresent(hwInfo->pSkuTable->ftrSGTPVSKUStrapPresent);
|
||||
igcFeWa.get()->SetFtrGTA(hwInfo->pSkuTable->ftrGTA);
|
||||
igcFeWa.get()->SetFtrGTC(hwInfo->pSkuTable->ftrGTC);
|
||||
igcFeWa.get()->SetFtrGTX(hwInfo->pSkuTable->ftrGTX);
|
||||
igcFeWa.get()->SetFtr5Slice(hwInfo->pSkuTable->ftr5Slice);
|
||||
|
||||
igcFeWa.get()->SetFtrGpGpuMidThreadLevelPreempt(hwInfo->pSkuTable->ftrGpGpuMidThreadLevelPreempt);
|
||||
igcFeWa.get()->SetFtrIoMmuPageFaulting(hwInfo->pSkuTable->ftrIoMmuPageFaulting);
|
||||
igcFeWa.get()->SetFtrWddm2Svm(hwInfo->pSkuTable->ftrWddm2Svm);
|
||||
igcFeWa.get()->SetFtrPooledEuEnabled(hwInfo->pSkuTable->ftrPooledEuEnabled);
|
||||
|
||||
igcFeWa.get()->SetFtrResourceStreamer(hwInfo->pSkuTable->ftrResourceStreamer);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// ParseCommandLine
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int OfflineCompiler::parseCommandLine(uint32_t numArgs, const char **argv) {
|
||||
int retVal = CL_SUCCESS;
|
||||
bool compile32 = false;
|
||||
bool compile64 = false;
|
||||
|
||||
if (numArgs < 2) {
|
||||
printUsage();
|
||||
retVal = PRINT_USAGE;
|
||||
}
|
||||
|
||||
for (uint32_t argIndex = 1; argIndex < numArgs; argIndex++) {
|
||||
if ((stringsAreEqual(argv[argIndex], "-file")) &&
|
||||
(argIndex + 1 < numArgs)) {
|
||||
inputFile = argv[argIndex + 1];
|
||||
argIndex++;
|
||||
} else if (stringsAreEqual(argv[argIndex], "-32")) {
|
||||
compile32 = true;
|
||||
internalOptions.append(" -m32 ");
|
||||
} else if (stringsAreEqual(argv[argIndex], "-64")) {
|
||||
compile64 = true;
|
||||
internalOptions.append(" -m64 ");
|
||||
} else if (stringsAreEqual(argv[argIndex], "-cl-intel-greater-than-4GB-buffer-required")) {
|
||||
internalOptions.append(" -cl-intel-greater-than-4GB-buffer-required ");
|
||||
} else if ((stringsAreEqual(argv[argIndex], "-device")) &&
|
||||
(argIndex + 1 < numArgs)) {
|
||||
deviceName = argv[argIndex + 1];
|
||||
argIndex++;
|
||||
} else if (stringsAreEqual(argv[argIndex], "-llvm_text")) {
|
||||
useLlvmText = true;
|
||||
} else if (stringsAreEqual(argv[argIndex], "-cpp_file")) {
|
||||
useCppFile = true;
|
||||
} else if ((stringsAreEqual(argv[argIndex], "-options")) &&
|
||||
(argIndex + 1 < numArgs)) {
|
||||
options = argv[argIndex + 1];
|
||||
argIndex++;
|
||||
} else if (stringsAreEqual(argv[argIndex], "-options_name")) {
|
||||
useOptionsSuffix = true;
|
||||
} else if ((stringsAreEqual(argv[argIndex], "-out_dir")) &&
|
||||
(argIndex + 1 < numArgs)) {
|
||||
outputDirectory = argv[argIndex + 1];
|
||||
argIndex++;
|
||||
} else if (stringsAreEqual(argv[argIndex], "-q")) {
|
||||
quiet = true;
|
||||
} else if (stringsAreEqual(argv[argIndex], "-?")) {
|
||||
printUsage();
|
||||
retVal = PRINT_USAGE;
|
||||
} else {
|
||||
printf("Invalid option (arg %d): %s\n", argIndex, argv[argIndex]);
|
||||
retVal = INVALID_COMMAND_LINE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
if (compile32 && compile64) {
|
||||
printf("Error: Cannot compile for 32-bit and 64-bit, please choose one.\n");
|
||||
retVal = INVALID_COMMAND_LINE;
|
||||
} else if (inputFile.empty()) {
|
||||
printf("Error: Input file name missing.\n");
|
||||
retVal = INVALID_COMMAND_LINE;
|
||||
} else if (deviceName.empty()) {
|
||||
printf("Error: Device name missing.\n");
|
||||
retVal = INVALID_COMMAND_LINE;
|
||||
} else if (!fileExists(inputFile)) {
|
||||
printf("Error: Input file %s missing.\n", inputFile.c_str());
|
||||
retVal = INVALID_FILE;
|
||||
} else {
|
||||
retVal = getHardwareInfo(deviceName.c_str());
|
||||
if (retVal != CL_SUCCESS) {
|
||||
printf("Error: Cannot get HW Info for device %s.\n", deviceName.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// ParseCommandLine
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void OfflineCompiler::parseDebugSettings() {
|
||||
if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get()) {
|
||||
internalOptions += "-cl-intel-has-buffer-offset-arg ";
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// ParseBinAsCharArray
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
std::string OfflineCompiler::parseBinAsCharArray(uint8_t *binary, size_t size, std::string &deviceName, std::string &fileName) {
|
||||
std::string builtinName = convertToPascalCase(fileName);
|
||||
std::ostringstream out;
|
||||
|
||||
// Convert binary to cpp
|
||||
out << "#include <cstddef>\n";
|
||||
out << "#include <cstdint>\n\n";
|
||||
out << "size_t " << builtinName << "BinarySize_" << deviceName << " = " << size << ";\n";
|
||||
out << "uint32_t " << builtinName << "Binary_" << deviceName << "[" << (size + 3) / 4 << "] = {"
|
||||
<< std::endl
|
||||
<< " ";
|
||||
|
||||
uint32_t *binaryUint = (uint32_t *)binary;
|
||||
for (size_t i = 0; i < (size + 3) / 4; i++) {
|
||||
if (i != 0) {
|
||||
out << ", ";
|
||||
if (i % 8 == 0) {
|
||||
out << std::endl
|
||||
<< " ";
|
||||
}
|
||||
}
|
||||
if (i < size / 4) {
|
||||
out << "0x" << std::hex << std::setw(8) << std::setfill('0') << binaryUint[i];
|
||||
} else {
|
||||
uint32_t lastBytes = size & 0x3;
|
||||
uint32_t lastUint = 0;
|
||||
uint8_t *pLastUint = (uint8_t *)&lastUint;
|
||||
for (uint32_t j = 0; j < lastBytes; j++) {
|
||||
pLastUint[sizeof(uint32_t) - 1 - j] = binary[i * 4 + j];
|
||||
}
|
||||
out << "0x" << std::hex << std::setw(8) << std::setfill('0') << lastUint;
|
||||
}
|
||||
}
|
||||
out << "};" << std::endl;
|
||||
|
||||
out << std::endl
|
||||
<< "#include \"runtime/built_ins/registry/built_ins_registry.h\"\n"
|
||||
<< std::endl;
|
||||
out << "namespace OCLRT {" << std::endl;
|
||||
out << "static RegisterEmbeddedResource register" << builtinName << "Bin(" << std::endl;
|
||||
out << " createBuiltinResourceName(" << std::endl;
|
||||
out << " EBuiltInOps::" << builtinName << "," << std::endl;
|
||||
out << " BuiltinCode::getExtension(BuiltinCode::ECodeType::Binary), \"" << deviceName << "\", 0)" << std::endl;
|
||||
out << " .c_str()," << std::endl;
|
||||
out << " (const char *)" << builtinName << "Binary"
|
||||
<< "_" << deviceName << "," << std::endl;
|
||||
out << " " << builtinName << "BinarySize_" << deviceName << ");" << std::endl;
|
||||
out << "}" << std::endl;
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// GetFileNameTrunk
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
std::string OfflineCompiler::getFileNameTrunk(std::string &filePath) {
|
||||
size_t slashPos = filePath.find_last_of("\\/", filePath.size()) + 1;
|
||||
size_t extPos = filePath.find_last_of(".", filePath.size());
|
||||
if (extPos == std::string::npos) {
|
||||
extPos = filePath.size();
|
||||
}
|
||||
|
||||
std::string fileName;
|
||||
std::string fileTrunk = filePath.substr(slashPos, (extPos - slashPos));
|
||||
|
||||
return fileTrunk;
|
||||
}
|
||||
//
|
||||
std::string getDevicesTypes() {
|
||||
std::list<std::string> prefixes;
|
||||
for (int j = 0; j < IGFX_MAX_PRODUCT; j++) {
|
||||
if (hardwarePrefix[j] == nullptr)
|
||||
continue;
|
||||
prefixes.push_back(hardwarePrefix[j]);
|
||||
}
|
||||
|
||||
ostringstream os;
|
||||
for (auto it = prefixes.begin(); it != prefixes.end(); it++) {
|
||||
if (it != prefixes.begin())
|
||||
os << ",";
|
||||
os << *it;
|
||||
}
|
||||
|
||||
return os.str();
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// PrintUsage
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void OfflineCompiler::printUsage() {
|
||||
|
||||
printf("Compiles CL files into llvm (.bc or .ll), gen isa (.gen), and binary files (.bin)\n\n");
|
||||
printf("cloc -file <filename> -device <device_type> [-outdir <output_dir>]\n\n");
|
||||
printf(" -file <filename> Indicates the CL kernel file to be compiled.\n");
|
||||
printf(" -device <device_type> Indicates which device for which we will compile.\n");
|
||||
printf(" <device_type> can be: %s\n", getDevicesTypes().c_str());
|
||||
printf(" -out_dir <output_dir> Indicates the directory into which the compiled files\n");
|
||||
printf(" will be placed.\n");
|
||||
printf(" -llvm_text Readable LLVM text will be output in a .ll file instead of\n");
|
||||
printf(" through the default lllvm binary (.bc) file.\n");
|
||||
printf(" -cpp_file Cpp file with scheduler program binary will be generated.");
|
||||
printf(" -options <options> Compiler options.\n");
|
||||
printf(" -options_name Add suffix with compile options to filename\n");
|
||||
printf(" -32 Force compile to 32-bit binary.\n");
|
||||
printf(" -64 Force compile to 64-bit binary.\n");
|
||||
printf(" -q Be more quiet. print only warnings and errors.\n");
|
||||
printf(" -? Print this usage message.\n");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// StoreBinary
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void OfflineCompiler::storeBinary(
|
||||
char *&pDst,
|
||||
size_t &dstSize,
|
||||
const void *pSrc,
|
||||
const size_t srcSize) {
|
||||
dstSize = 0;
|
||||
|
||||
DEBUG_BREAK_IF(!(pSrc && srcSize > 0));
|
||||
|
||||
delete[] pDst;
|
||||
pDst = new char[srcSize];
|
||||
|
||||
dstSize = (cl_uint)srcSize;
|
||||
memcpy_s(pDst, dstSize, pSrc, srcSize);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// GenerateElfBinary
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool OfflineCompiler::generateElfBinary() {
|
||||
bool retVal = true;
|
||||
CLElfLib::CElfWriter *pElfWriter = nullptr;
|
||||
|
||||
if (!genBinary || !genBinarySize) {
|
||||
retVal = false;
|
||||
}
|
||||
|
||||
if (retVal) {
|
||||
pElfWriter = CLElfLib::CElfWriter::create(CLElfLib::EH_TYPE_OPENCL_EXECUTABLE, CLElfLib::EH_MACHINE_NONE, 0);
|
||||
|
||||
if (pElfWriter) {
|
||||
CLElfLib::SSectionNode sectionNode;
|
||||
|
||||
// Always add the options string
|
||||
sectionNode.Name = "BuildOptions";
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_OPTIONS;
|
||||
sectionNode.pData = (char *)options.c_str();
|
||||
sectionNode.DataSize = (uint32_t)(strlen(options.c_str()) + 1);
|
||||
|
||||
retVal = pElfWriter->addSection(§ionNode);
|
||||
|
||||
if (retVal) {
|
||||
sectionNode.Name = "Intel(R) OpenCL LLVM Object";
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_LLVM_BINARY;
|
||||
sectionNode.pData = llvmBinary;
|
||||
sectionNode.DataSize = (uint32_t)llvmBinarySize;
|
||||
retVal = pElfWriter->addSection(§ionNode);
|
||||
}
|
||||
|
||||
// Add the device binary if it exists
|
||||
if (retVal && genBinary) {
|
||||
sectionNode.Name = "Intel(R) OpenCL Device Binary";
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_DEV_BINARY;
|
||||
sectionNode.pData = genBinary;
|
||||
sectionNode.DataSize = (uint32_t)genBinarySize;
|
||||
|
||||
retVal = pElfWriter->addSection(§ionNode);
|
||||
}
|
||||
|
||||
if (retVal) {
|
||||
// get the size
|
||||
retVal = pElfWriter->resolveBinary(elfBinary, elfBinarySize);
|
||||
}
|
||||
|
||||
if (retVal) {
|
||||
// allocate the binary
|
||||
elfBinary = new char[elfBinarySize];
|
||||
|
||||
retVal = pElfWriter->resolveBinary(elfBinary, elfBinarySize);
|
||||
}
|
||||
} else {
|
||||
retVal = false;
|
||||
}
|
||||
|
||||
CLElfLib::CElfWriter::destroy(pElfWriter);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WriteOutAllFiles
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void OfflineCompiler::writeOutAllFiles() {
|
||||
std::string fileTrunk = getFileNameTrunk(inputFile);
|
||||
std::string fileBase = fileTrunk + "_" + deviceName;
|
||||
|
||||
if (outputDirectory != "") {
|
||||
std::list<std::string> dirList;
|
||||
std::string tmp = outputDirectory;
|
||||
size_t pos = outputDirectory.size() + 1;
|
||||
|
||||
do {
|
||||
dirList.push_back(tmp);
|
||||
pos = tmp.find_last_of("/\\", pos);
|
||||
tmp = tmp.substr(0, pos);
|
||||
} while (pos != std::string::npos);
|
||||
|
||||
while (!dirList.empty()) {
|
||||
MakeDirectory(dirList.back().c_str());
|
||||
dirList.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
if (llvmBinary) {
|
||||
std::string llvmOutputFile = (outputDirectory == "") ? "" : outputDirectory + "/";
|
||||
(useLlvmText == true) ? llvmOutputFile.append(fileBase + ".ll") : llvmOutputFile.append(fileBase + ".bc");
|
||||
|
||||
if (useOptionsSuffix) {
|
||||
std::string opts(options.c_str());
|
||||
std::replace(opts.begin(), opts.end(), ' ', '_');
|
||||
llvmOutputFile.append(opts);
|
||||
}
|
||||
|
||||
writeDataToFile(
|
||||
llvmOutputFile.c_str(),
|
||||
llvmBinary,
|
||||
llvmBinarySize);
|
||||
}
|
||||
|
||||
if (genBinary) {
|
||||
std::string genOutputFile = (outputDirectory == "") ? "" : outputDirectory + "/";
|
||||
genOutputFile.append(fileBase + ".gen");
|
||||
|
||||
if (useOptionsSuffix) {
|
||||
std::string opts(options.c_str());
|
||||
std::replace(opts.begin(), opts.end(), ' ', '_');
|
||||
genOutputFile.append(opts);
|
||||
}
|
||||
|
||||
writeDataToFile(
|
||||
genOutputFile.c_str(),
|
||||
genBinary,
|
||||
genBinarySize);
|
||||
|
||||
if (useCppFile) {
|
||||
std::string cppOutputFile = (outputDirectory == "") ? "" : outputDirectory + "/";
|
||||
cppOutputFile.append(fileBase + ".cpp");
|
||||
std::string cpp = parseBinAsCharArray((uint8_t *)genBinary, genBinarySize, deviceName, fileTrunk);
|
||||
writeDataToFile(cppOutputFile.c_str(), cpp.c_str(), cpp.size());
|
||||
}
|
||||
}
|
||||
|
||||
if (elfBinary) {
|
||||
std::string elfOutputFile = (outputDirectory == "") ? "" : outputDirectory + "/";
|
||||
|
||||
elfOutputFile.append(fileBase + ".bin");
|
||||
|
||||
if (useOptionsSuffix) {
|
||||
std::string opts(options.c_str());
|
||||
std::replace(opts.begin(), opts.end(), ' ', '_');
|
||||
elfOutputFile.append(opts);
|
||||
}
|
||||
|
||||
writeDataToFile(
|
||||
elfOutputFile.c_str(),
|
||||
elfBinary,
|
||||
elfBinarySize);
|
||||
}
|
||||
}
|
||||
} // namespace OCLRT
|
||||
106
offline_compiler/offline_compiler.h
Normal file
106
offline_compiler/offline_compiler.h
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "cif/common/cif_main.h"
|
||||
#include "ocl_igc_interface/igc_ocl_device_ctx.h"
|
||||
#include "ocl_igc_interface/fcl_ocl_device_ctx.h"
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
struct HardwareInfo;
|
||||
class OsLibrary;
|
||||
|
||||
std::string convertToPascalCase(const std::string &inString);
|
||||
|
||||
enum ErrorCode {
|
||||
INVALID_COMMAND_LINE = -5150,
|
||||
INVALID_FILE = -5151,
|
||||
PRINT_USAGE = -5152,
|
||||
};
|
||||
|
||||
class OfflineCompiler {
|
||||
public:
|
||||
static OfflineCompiler *create(uint32_t numArgs, const char **argv, int &retVal);
|
||||
int build();
|
||||
std::string &getBuildLog();
|
||||
void printUsage();
|
||||
|
||||
OfflineCompiler &operator=(const OfflineCompiler &) = delete;
|
||||
OfflineCompiler(const OfflineCompiler &) = delete;
|
||||
~OfflineCompiler();
|
||||
|
||||
bool isQuiet() const {
|
||||
return quiet;
|
||||
}
|
||||
|
||||
std::string parseBinAsCharArray(uint8_t *binary, size_t size, std::string &deviceName, std::string &fileName);
|
||||
|
||||
protected:
|
||||
OfflineCompiler();
|
||||
|
||||
int getHardwareInfo(const char *pDeviceName);
|
||||
std::string getFileNameTrunk(std::string &filePath);
|
||||
std::string getStringWithinDelimiters(const std::string &src);
|
||||
int initialize(uint32_t numArgs, const char **argv);
|
||||
int parseCommandLine(uint32_t numArgs, const char **argv);
|
||||
void parseDebugSettings();
|
||||
void storeBinary(char *&pDst, size_t &dstSize, const void *pSrc, const size_t srcSize);
|
||||
int buildSourceCode();
|
||||
void updateBuildLog(const char *pErrorString, const size_t errorStringSize);
|
||||
bool generateElfBinary();
|
||||
void writeOutAllFiles();
|
||||
const HardwareInfo *hwInfo = nullptr;
|
||||
|
||||
std::string deviceName;
|
||||
std::string inputFile;
|
||||
std::string outputFile;
|
||||
std::string outputDirectory;
|
||||
std::string options;
|
||||
std::string internalOptions;
|
||||
std::string sourceCode;
|
||||
std::string buildLog;
|
||||
|
||||
bool useLlvmText = false;
|
||||
bool useCppFile = false;
|
||||
bool useOptionsSuffix = false;
|
||||
bool quiet = false;
|
||||
|
||||
char *elfBinary = nullptr;
|
||||
size_t elfBinarySize = 0;
|
||||
char *genBinary = nullptr;
|
||||
size_t genBinarySize = 0;
|
||||
char *llvmBinary = nullptr;
|
||||
size_t llvmBinarySize = 0;
|
||||
|
||||
std::unique_ptr<OsLibrary> igcLib = nullptr;
|
||||
CIF::RAII::UPtr_t<CIF::CIFMain> igcMain = nullptr;
|
||||
CIF::RAII::UPtr_t<IGC::IgcOclDeviceCtxTagOCL> igcDeviceCtx = nullptr;
|
||||
|
||||
std::unique_ptr<OsLibrary> fclLib = nullptr;
|
||||
CIF::RAII::UPtr_t<CIF::CIFMain> fclMain = nullptr;
|
||||
CIF::RAII::UPtr_t<IGC::FclOclDeviceCtxTagOCL> fclDeviceCtx = nullptr;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
43
offline_compiler/options.cpp
Normal file
43
offline_compiler/options.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/helpers/array_count.h"
|
||||
#include <cstddef>
|
||||
|
||||
namespace OCLRT {
|
||||
// AUB file folder location
|
||||
const char *folderAUB = "aub_out";
|
||||
|
||||
// Initial value for HW tag
|
||||
uint32_t initialHardwareTag = (uint32_t)-1;
|
||||
|
||||
// Number of devices in the platform
|
||||
static const HardwareInfo *DefaultPlatformDevices[] =
|
||||
{
|
||||
&DEFAULT_PLATFORM::hwInfo,
|
||||
};
|
||||
|
||||
size_t numPlatformDevices = ARRAY_COUNT(DefaultPlatformDevices);
|
||||
const HardwareInfo **platformDevices = DefaultPlatformDevices;
|
||||
} // namespace OCLRT
|
||||
125
package.cmake
Normal file
125
package.cmake
Normal file
@@ -0,0 +1,125 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
if(UNIX)
|
||||
set(package_input_dir ${IGDRCL_BINARY_DIR}/packageinput)
|
||||
set(package_output_dir ${IGDRCL_BINARY_DIR}/packages)
|
||||
|
||||
if(NOT NEO_VERSION_MAJOR)
|
||||
set(NEO_VERSION_MAJOR 1)
|
||||
endif()
|
||||
if(NOT NEO_VERSION_MINOR)
|
||||
set(NEO_VERSION_MINOR 0)
|
||||
endif()
|
||||
if(NOT NEO_VERSION_BUILD)
|
||||
set(NEO_VERSION_BUILD 0)
|
||||
endif()
|
||||
|
||||
set(NEO_BINARY_INSTALL_DIR /opt/intel/opencl)
|
||||
set(CMAKE_INSTALL_PREFIX ${NEO_BINARY_INSTALL_DIR})
|
||||
|
||||
install(FILES
|
||||
${IGDRCL_BINARY_DIR}/bin/libigdrcl.so
|
||||
${IGDRCL_BINARY_DIR}/bin/libigdccl.so
|
||||
${IGDRCL_BINARY_DIR}/bin/libigdfcl.so
|
||||
${IGDRCL_BINARY_DIR}/bin/libiga64.so
|
||||
${IGDRCL_BINARY_DIR}/bin/libcommon_clang.so
|
||||
DESTINATION ${NEO_BINARY_INSTALL_DIR}
|
||||
COMPONENT igdrcl
|
||||
)
|
||||
|
||||
set(OCL_ICD_RUNTIME_NAME libigdrcl.so)
|
||||
install(
|
||||
CODE "file( WRITE ${IGDRCL_BINARY_DIR}/libintelopencl.conf \"/opt/intel/opencl\n\" )"
|
||||
CODE "file( WRITE ${IGDRCL_BINARY_DIR}/intel.icd \"/opt/intel/opencl/${OCL_ICD_RUNTIME_NAME}\n\" )"
|
||||
CODE "file( WRITE ${IGDRCL_BINARY_DIR}/postinst \"echo /opt/intel/opencl >> /etc/ld.so.conf\n\" )"
|
||||
CODE "file( APPEND ${IGDRCL_BINARY_DIR}/postinst \"/sbin/ldconfig\n\" )"
|
||||
CODE "file( WRITE ${IGDRCL_BINARY_DIR}/postrm \"sed -i '/\\\\/opt\\\\/intel\\\\/opencl.*$/d' /etc/ld.so.conf\n\" )"
|
||||
CODE "file( APPEND ${IGDRCL_BINARY_DIR}/postrm \"/sbin/ldconfig\n\" )"
|
||||
COMPONENT igdrcl
|
||||
)
|
||||
install(FILES ${IGDRCL_BINARY_DIR}/libintelopencl.conf DESTINATION /etc/ld.so.conf.d COMPONENT igdrcl)
|
||||
install(FILES ${IGDRCL_BINARY_DIR}/intel.icd DESTINATION /etc/OpenCL/vendors/ COMPONENT igdrcl)
|
||||
|
||||
# Add Khronos ICD loader - if available
|
||||
if(NOT ICD_LIB_DIR)
|
||||
# Try to find ICD in upper level directory
|
||||
if(EXISTS ${IGDRCL_SOURCE_DIR}/../OpenCL-ICD-Loader/build/lib/libOpenCL.so)
|
||||
set(ICD_LIB_DIR ${IGDRCL_SOURCE_DIR}/../OpenCL-ICD-Loader/build/lib)
|
||||
message(STATUS "Taking ICD library from ${ICD_LIB_DIR}")
|
||||
else()
|
||||
get_filename_component(IGDRCL_PARENT_DIR ${IGDRCL_SOURCE_DIR} DIRECTORY)
|
||||
message(WARNING "Missing Khronos ICD library. Generated package (.rpm, .deb, .tar.xz) may be incomple.\nPlease download Khronos ICD loader to ${IGDRCL_PARENT_DIR} and build it, or point directory containing library libOpenCL.so using ICD_LIB_DIR.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ICD_LIB_DIR)
|
||||
get_filename_component(ICD_LIB_DIR ${ICD_LIB_DIR} ABSOLUTE)
|
||||
set(ICD_LIB_NAME "libOpenCL.so*")
|
||||
install(
|
||||
CODE "if(NOT((EXISTS ${ICD_LIB_DIR}/libOpenCL.so) OR (IS_SYMLINK ${ICD_LIB_DIR}/libOpenCL.so)))\n execute_process( COMMAND ln -s ${NEO_BINARY_INSTALL_DIR}/libOpenCL.so.1 ${ICD_LIB_DIR}/libOpenCL.so)\n endif()\n"
|
||||
CODE "file( GLOB _NeoIcdLibFiles \"${ICD_LIB_DIR}/${ICD_LIB_NAME}\" )"
|
||||
CODE "if(NOT _NeoIcdLibFiles)\n message(FATAL_ERROR \"${ICD_LIB_NAME} cannot be found in ${ICD_LIB_DIR}\")\nendif()"
|
||||
CODE "file( INSTALL \${_NeoIcdLibFiles} DESTINATION \"${NEO_BINARY_INSTALL_DIR}\" )"
|
||||
COMPONENT igdrcl
|
||||
)
|
||||
endif()
|
||||
|
||||
if(NEO_CPACK_GENERATOR)
|
||||
set(CPACK_GENERATOR "${NEO_CPACK_GENERATOR}")
|
||||
else()
|
||||
# If generators list was not define build native package for current distro
|
||||
if(EXISTS "/etc/debian_version")
|
||||
set(CPACK_GENERATOR "DEB")
|
||||
elseif(EXISTS "/etc/redhat-release")
|
||||
set(CPACK_GENERATOR "RPM")
|
||||
else()
|
||||
set(CPACK_GENERATOR "TXZ")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CPACK_SET_DESTDIR TRUE)
|
||||
set(CPACK_PACKAGE_RELOCATABLE FALSE)
|
||||
set(CPACK_PACKAGE_NAME "intel-opencl")
|
||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Intel OpenCL GPU driver")
|
||||
set(CPACK_PACKAGE_VENDOR "Intel")
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR ${NEO_VERSION_MAJOR})
|
||||
set(CPACK_PACKAGE_VERSION_MINOR ${NEO_VERSION_MINOR})
|
||||
set(CPACK_PACKAGE_VERSION_PATCH ${NEO_VERSION_BUILD})
|
||||
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64")
|
||||
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "postinst;postrm")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64")
|
||||
set(CPACK_RPM_COMPRESSION_TYPE "xz")
|
||||
set(CPACK_RPM_PACKAGE_DESCRIPTION "Intel OpenCL GPU driver")
|
||||
set(CPACK_RPM_PACKAGE_GROUP "System Environment/Libraries")
|
||||
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
|
||||
set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${IGDRCL_BINARY_DIR}/postinst")
|
||||
set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${IGDRCL_BINARY_DIR}/postrm")
|
||||
set(CPACK_PACKAGE_INSTALL_DIRECTORY "/opt/intel/opencl")
|
||||
set(CPACK_PACKAGE_CONTACT "Intel Corporation")
|
||||
set(CPACK_PACKAGE_FILE_NAME "intel-opencl-${NEO_VERSION_MAJOR}.${NEO_VERSION_MINOR}-${NEO_VERSION_BUILD}.${CPACK_RPM_PACKAGE_ARCHITECTURE}")
|
||||
set(CPACK_DEB_COMPONENT_INSTALL ON)
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON)
|
||||
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
|
||||
set(CPACK_COMPONENTS_ALL igdrcl)
|
||||
|
||||
include(CPack)
|
||||
|
||||
endif(UNIX)
|
||||
255
platforms.cmake
Normal file
255
platforms.cmake
Normal file
@@ -0,0 +1,255 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# We require cmake 3.2.0 or later
|
||||
cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR)
|
||||
|
||||
set(MAX_GEN 64)
|
||||
|
||||
macro(INIT_LIST LIST_TYPE ELEMENT_TYPE)
|
||||
foreach(IT RANGE 0 ${MAX_GEN} 1)
|
||||
list(APPEND ALL_${ELEMENT_TYPE}_${LIST_TYPE} " ")
|
||||
endforeach(IT)
|
||||
endmacro(INIT_LIST)
|
||||
|
||||
macro(GET_LIST_FOR_GEN LIST_TYPE ELEMENT_TYPE GEN_NUMBER OUT_LIST)
|
||||
list(GET ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${GEN_NUMBER} GEN_X_${LIST_TYPE})
|
||||
string(REPLACE "_" ";" ${OUT_LIST} ${GEN_X_${LIST_TYPE}})
|
||||
endmacro(GET_LIST_FOR_GEN)
|
||||
|
||||
macro(ADD_ITEM_FOR_GEN LIST_TYPE ELEMENT_TYPE GEN_NUMBER ITEM)
|
||||
list(GET ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${GEN_NUMBER} GEN_X_LIST)
|
||||
string(REPLACE " " "" GEN_X_LIST ${GEN_X_LIST})
|
||||
if("${GEN_X_LIST}" STREQUAL "")
|
||||
set(GEN_X_LIST "${ITEM}")
|
||||
else("${GEN_X_LIST}" STREQUAL "")
|
||||
set(GEN_X_LIST "${GEN_X_LIST}_${ITEM}")
|
||||
endif("${GEN_X_LIST}" STREQUAL "")
|
||||
list(REMOVE_AT ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${GEN_NUMBER})
|
||||
list(INSERT ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${GEN_NUMBER} ${GEN_X_LIST})
|
||||
endmacro(ADD_ITEM_FOR_GEN)
|
||||
|
||||
macro(GEN_CONTAINS_PLATFORMS TYPE GEN_NUMBER OUT_FLAG)
|
||||
GET_LIST_FOR_GEN("PLATFORMS" ${TYPE} ${GEN_NUMBER} GEN_X_PLATFORMS)
|
||||
string(REPLACE " " "" GEN_X_PLATFORMS ${GEN_X_PLATFORMS})
|
||||
if("${GEN_X_PLATFORMS}" STREQUAL "")
|
||||
set(${OUT_FLAG} FALSE)
|
||||
else("${GEN_X_PLATFORMS}" STREQUAL "")
|
||||
set(${OUT_FLAG} TRUE)
|
||||
endif("${GEN_X_PLATFORMS}" STREQUAL "")
|
||||
endmacro(GEN_CONTAINS_PLATFORMS)
|
||||
|
||||
macro(GET_AVAILABLE_PLATFORMS TYPE FLAG_NAME OUT_STR)
|
||||
set(${TYPE}_PLATFORM_LIST)
|
||||
set(${TYPE}_GEN_FLAGS_DEFINITONS)
|
||||
foreach(GEN_NUM RANGE ${MAX_GEN} 0 -1)
|
||||
GEN_CONTAINS_PLATFORMS(${TYPE} ${GEN_NUM} GENX_HAS_PLATFORMS)
|
||||
if(${GENX_HAS_PLATFORMS})
|
||||
list(APPEND ${TYPE}_GEN_FLAGS_DEFINITONS ${FLAG_NAME}_GEN${GEN_NUM})
|
||||
GET_LIST_FOR_GEN("PLATFORMS" ${TYPE} ${GEN_NUM} ${TYPE}_GENX_PLATFORMS)
|
||||
list(APPEND ${TYPE}_PLATFORM_LIST ${${TYPE}_GENX_PLATFORMS})
|
||||
if(NOT DEFAULT_${TYPE}_PLATFORM)
|
||||
list(GET ${TYPE}_PLATFORM_LIST 0 DEFAULT_${TYPE}_PLATFORM ${PLATFORM_IT})
|
||||
endif()
|
||||
if(NOT DEFAULT_${TYPE}_GEN${GEN_NUM}_PLATFORM)
|
||||
list(GET ${TYPE}_GENX_PLATFORMS 0 DEFAULT_${TYPE}_GEN${GEN_NUM}_PLATFORM)
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
foreach(PLATFORM_IT ${${TYPE}_PLATFORM_LIST})
|
||||
set(${OUT_STR} "${${OUT_STR}} ${PLATFORM_IT}")
|
||||
list(APPEND ${TYPE}_GEN_FLAGS_DEFINITONS ${FLAG_NAME}_${PLATFORM_IT})
|
||||
endforeach()
|
||||
endmacro(GET_AVAILABLE_PLATFORMS)
|
||||
|
||||
macro(GET_PLATFORMS_FOR_GEN TYPE GEN_NUMBER OUT_LIST)
|
||||
GET_LIST_FOR_GEN("PLATFORMS" ${TYPE} ${GEN_NUMBER} ${OUT_LIST})
|
||||
endmacro(GET_PLATFORMS_FOR_GEN)
|
||||
|
||||
macro(GET_TEST_CONFIGURATIONS_FOR_PLATFORM TYPE GEN_NUMBER PLATFORM OUT_LIST)
|
||||
set(${OUT_LIST})
|
||||
string(TOLOWER ${PLATFORM} PLATFORM_LOWER)
|
||||
GET_LIST_FOR_GEN("CONFIGURATIONS" ${TYPE} ${GEN_NUMBER} ALL_CONFIGURATIONS_FOR_GEN)
|
||||
foreach(CONFIGURATION ${ALL_CONFIGURATIONS_FOR_GEN})
|
||||
string(REPLACE "/" ";" CONFIGURATION_PARAMS ${CONFIGURATION})
|
||||
list(GET CONFIGURATION_PARAMS 0 CONFIGURATION_PLATFORM)
|
||||
if(${CONFIGURATION_PLATFORM} STREQUAL ${PLATFORM_LOWER})
|
||||
list(APPEND ${OUT_LIST} ${CONFIGURATION})
|
||||
endif()
|
||||
endforeach(CONFIGURATION)
|
||||
endmacro(GET_TEST_CONFIGURATIONS_FOR_PLATFORM)
|
||||
|
||||
macro(PLATFORM_HAS_2_0 GEN_NUMBER PLATFORM_NAME OUT_FLAG)
|
||||
GET_LIST_FOR_GEN("PLATFORMS" "SUPPORTED_2_0" ${GEN_NUMBER} GEN_X_PLATFORMS)
|
||||
list(FIND GEN_X_PLATFORMS ${PLATFORM_NAME} PLATFORM_EXISTS)
|
||||
if("${PLATFORM_EXISTS}" LESS 0)
|
||||
set(${OUT_FLAG} FALSE)
|
||||
else("${PLATFORM_EXISTS}" LESS 0)
|
||||
set(${OUT_FLAG} TRUE)
|
||||
endif("${PLATFORM_EXISTS}" LESS 0)
|
||||
|
||||
endmacro(PLATFORM_HAS_2_0 PLATFORM_NAME OUT_FLAG)
|
||||
|
||||
macro(PLATFORM_TESTED_WITH_APPVERIFIER GEN_NUMBER PLATFORM_NAME OUT_FLAG)
|
||||
GET_LIST_FOR_GEN("PLATFORMS" "TESTED_APPVERIFIER" ${GEN_NUMBER} GEN_X_PLATFORMS)
|
||||
list(FIND GEN_X_PLATFORMS ${PLATFORM_NAME} PLATFORM_EXISTS)
|
||||
if("${PLATFORM_EXISTS}" LESS 0)
|
||||
set(${OUT_FLAG} FALSE)
|
||||
else("${PLATFORM_EXISTS}" LESS 0)
|
||||
set(${OUT_FLAG} TRUE)
|
||||
endif("${PLATFORM_EXISTS}" LESS 0)
|
||||
|
||||
endmacro(PLATFORM_TESTED_WITH_APPVERIFIER PLATFORM_NAME OUT_FLAG)
|
||||
|
||||
# default flag for GenX devices support
|
||||
set(SUPPORT_GEN_DEFAULT TRUE CACHE BOOL "default value for SUPPORT_GENx")
|
||||
# default flag for platform support
|
||||
set(SUPPORT_PLATFORM_DEFAULT TRUE CACHE BOOL "default value for support platform")
|
||||
|
||||
# Define the hardware configurations we support
|
||||
set(SUPPORT_GEN8 ${SUPPORT_GEN_DEFAULT} CACHE BOOL "Support Gen8 devices")
|
||||
set(SUPPORT_GEN9 ${SUPPORT_GEN_DEFAULT} CACHE BOOL "Support Gen9 devices")
|
||||
|
||||
# Define the hardware configurations we test
|
||||
set(TESTS_GEN8 ${SUPPORT_GEN8} CACHE BOOL "Build ULTs for Gen8 devices")
|
||||
set(TESTS_GEN9 ${SUPPORT_GEN9} CACHE BOOL "Build ULTs for Gen9 devices")
|
||||
|
||||
if(SUPPORT_GEN9)
|
||||
set(SUPPORT_SKL ${SUPPORT_PLATFORM_DEFAULT} CACHE BOOL "Support SKL")
|
||||
set(SUPPORT_KBL ${SUPPORT_PLATFORM_DEFAULT} CACHE BOOL "Support KBL")
|
||||
set(SUPPORT_BXT ${SUPPORT_PLATFORM_DEFAULT} CACHE BOOL "Support BXT")
|
||||
set(SUPPORT_GLK ${SUPPORT_PLATFORM_DEFAULT} CACHE BOOL "Support GLK")
|
||||
endif()
|
||||
|
||||
if(TESTS_GEN9)
|
||||
if(SUPPORT_SKL)
|
||||
set(TESTS_SKL ${TESTS_GEN9} CACHE BOOL "Build ULTs for SKL")
|
||||
endif()
|
||||
if(SUPPORT_KBL)
|
||||
set(TESTS_KBL ${TESTS_GEN9} CACHE BOOL "Build ULTs for KBL")
|
||||
endif()
|
||||
if(SUPPORT_GLK)
|
||||
set(TESTS_GLK ${TESTS_GEN9} CACHE BOOL "Build ULTs for GLK")
|
||||
endif()
|
||||
if(SUPPORT_BXT)
|
||||
set(TESTS_BXT ${TESTS_GEN9} CACHE BOOL "Build ULTs for BXT")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Init lists
|
||||
INIT_LIST("FAMILY_NAME" "TESTED")
|
||||
INIT_LIST("PLATFORMS" "SUPPORTED")
|
||||
INIT_LIST("PLATFORMS" "SUPPORTED_2_0")
|
||||
INIT_LIST("PLATFORMS" "TESTED")
|
||||
INIT_LIST("PLATFORMS" "TESTED_APPVERIFIER")
|
||||
INIT_LIST("CONFIGURATIONS" "UNIT_TESTS")
|
||||
INIT_LIST("CONFIGURATIONS" "AUB_TESTS")
|
||||
INIT_LIST("CONFIGURATIONS" "MT_TESTS")
|
||||
|
||||
# Add supported and tested platforms
|
||||
if(SUPPORT_GEN8)
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "SUPPORTED" 8 "BDW")
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "SUPPORTED_2_0" 8 "BDW")
|
||||
if(TESTS_GEN8)
|
||||
ADD_ITEM_FOR_GEN("FAMILY_NAME" "TESTED" 8 "BDWFamily")
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" 8 "BDW")
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED_APPVERIFIER" 8 "BDW")
|
||||
ADD_ITEM_FOR_GEN("CONFIGURATIONS" "AUB_TESTS" 8 "bdw/1/3/8")
|
||||
ADD_ITEM_FOR_GEN("CONFIGURATIONS" "MT_TESTS" 8 "bdw/1/3/8")
|
||||
ADD_ITEM_FOR_GEN("CONFIGURATIONS" "UNIT_TESTS" 8 "bdw/1/3/8")
|
||||
endif()
|
||||
endif(SUPPORT_GEN8)
|
||||
|
||||
if(SUPPORT_GEN9)
|
||||
if(TESTS_GEN9)
|
||||
ADD_ITEM_FOR_GEN("FAMILY_NAME" "TESTED" 9 "SKLFamily")
|
||||
endif()
|
||||
if(SUPPORT_SKL)
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "SUPPORTED" 9 "SKL")
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "SUPPORTED_2_0" 9 "SKL")
|
||||
if(TESTS_SKL)
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" 9 "SKL")
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED_APPVERIFIER" 9 "SKL")
|
||||
ADD_ITEM_FOR_GEN("CONFIGURATIONS" "AUB_TESTS" 9 "skl/1/3/8")
|
||||
ADD_ITEM_FOR_GEN("CONFIGURATIONS" "MT_TESTS" 9 "skl/1/3/8")
|
||||
ADD_ITEM_FOR_GEN("CONFIGURATIONS" "UNIT_TESTS" 9 "skl/1/3/8")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(SUPPORT_KBL)
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "SUPPORTED" 9 "KBL")
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "SUPPORTED_2_0" 9 "KBL")
|
||||
if(TESTS_KBL)
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" 9 "KBL")
|
||||
ADD_ITEM_FOR_GEN("CONFIGURATIONS" "UNIT_TESTS" 9 "kbl/1/3/6")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(SUPPORT_GLK)
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "SUPPORTED" 9 "GLK")
|
||||
if(TESTS_GLK)
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" 9 "GLK")
|
||||
ADD_ITEM_FOR_GEN("CONFIGURATIONS" "UNIT_TESTS" 9 "glk/1/3/6")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(SUPPORT_BXT)
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "SUPPORTED" 9 "BXT")
|
||||
if(TESTS_BXT)
|
||||
ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" 9 "BXT")
|
||||
ADD_ITEM_FOR_GEN("CONFIGURATIONS" "AUB_TESTS" 9 "bxt/1/3/6")
|
||||
ADD_ITEM_FOR_GEN("CONFIGURATIONS" "UNIT_TESTS" 9 "bxt/1/3/6")
|
||||
endif()
|
||||
endif()
|
||||
endif(SUPPORT_GEN9)
|
||||
|
||||
# Get platform lists, flag definition and set default platforms
|
||||
GET_AVAILABLE_PLATFORMS("SUPPORTED" "SUPPORT" ALL_AVAILABLE_SUPPORTED_PLATFORMS)
|
||||
GET_AVAILABLE_PLATFORMS("TESTED" "TESTS" ALL_AVAILABLE_TESTED_PLATFORMS)
|
||||
|
||||
message(STATUS "All supported platforms: ${ALL_AVAILABLE_SUPPORTED_PLATFORMS}")
|
||||
message(STATUS "All tested platforms: ${ALL_AVAILABLE_TESTED_PLATFORMS}")
|
||||
|
||||
message(STATUS "Default supported platform: ${DEFAULT_SUPPORTED_PLATFORM}")
|
||||
|
||||
list(FIND SUPPORTED_PLATFORM_LIST ${DEFAULT_SUPPORTED_PLATFORM} VALID_DEFAULT_SUPPORTED_PLATFORM)
|
||||
if(VALID_DEFAULT_SUPPORTED_PLATFORM LESS 0)
|
||||
message(FATAL_ERROR "Not a valid supported platform: ${DEFAULT_SUPPORTED_PLATFORM}")
|
||||
endif()
|
||||
|
||||
message(STATUS "Default tested platform: ${DEFAULT_TESTED_PLATFORM}")
|
||||
|
||||
if(DEFAULT_TESTED_PLATFORM)
|
||||
list(FIND TESTED_PLATFORM_LIST ${DEFAULT_TESTED_PLATFORM} VALID_DEFAULT_TESTED_PLATFORM)
|
||||
if(VALID_DEFAULT_TESTED_PLATFORM LESS 0)
|
||||
message(FATAL_ERROR "Not a valid tested platform: ${DEFAULT_TESTED_PLATFORM}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT DEFAULT_TESTED_FAMILY_NAME)
|
||||
foreach(GEN_NUM RANGE ${MAX_GEN} 0 -1)
|
||||
list(GET ALL_TESTED_FAMILY_NAME ${GEN_NUM} GEN_FAMILY_NAME)
|
||||
if(NOT GEN_FAMILY_NAME STREQUAL " ")
|
||||
set(DEFAULT_TESTED_FAMILY_NAME ${GEN_FAMILY_NAME})
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
message(STATUS "Default tested family name: ${DEFAULT_TESTED_FAMILY_NAME}")
|
||||
44
public/cl_ext_private.h
Normal file
44
public/cl_ext_private.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
/***************************************
|
||||
* * Internal only queue properties *
|
||||
* ****************************************/
|
||||
// Intel evaluation now. Remove it after approval for public release
|
||||
#define CL_DEVICE_DRIVER_VERSION_INTEL 0x10010
|
||||
|
||||
#define CL_DEVICE_DRIVER_VERSION_INTEL_NEO1 0x454E4831 // Driver version is ENH1
|
||||
|
||||
/***************************************
|
||||
* * cl_intel_debug_info extension *
|
||||
* ****************************************/
|
||||
#define cl_intel_debug_info 1
|
||||
|
||||
// New queries for clGetProgramInfo:
|
||||
#define CL_PROGRAM_DEBUG_INFO_INTEL 0x4100
|
||||
#define CL_PROGRAM_DEBUG_INFO_SIZES_INTEL 0x4101
|
||||
|
||||
// New queries for clGetKernelInfo:
|
||||
#define CL_KERNEL_BINARY_PROGRAM_INTEL 0x407D
|
||||
#define CL_KERNEL_BINARIES_INTEL 0x4102
|
||||
#define CL_KERNEL_BINARY_SIZES_INTEL 0x4103
|
||||
414
public/cl_vebox_intel.h
Normal file
414
public/cl_vebox_intel.h
Normal file
@@ -0,0 +1,414 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __CL_EXT_VEBOX_INTEL_H
|
||||
#define __CL_EXT_VEBOX_INTEL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
/***************************************
|
||||
* cl_intel_video_enhancement extension *
|
||||
****************************************/
|
||||
#define CL_ACCELERATOR_TYPE_VE_INTEL 0x9
|
||||
#define CL_DEVICE_VE_VERSION_INTEL 0x4160
|
||||
#define CL_DEVICE_VE_ENGINE_COUNT_INTEL 0x4161
|
||||
#define CL_DEVICE_VE_COLOR_PIPE_VERSION_INTEL 0x416A
|
||||
#define CL_DEVICE_VE_CAMERA_PIPE_VERSION_INTEL 0x4177
|
||||
#define CL_VE_VERSION_VER_1_INTEL 0x1
|
||||
#define CL_VE_VERSION_VER_2_INTEL 0x2
|
||||
#define CL_VE_VERSION_VER_3_INTEL 0x3
|
||||
#define CL_QUEUE_VE_ENABLE_INTEL 0x4162
|
||||
// VE Attributes
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_DENOISE_INTEL 0x4163
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_DEINTERLACE_INTEL 0x4164
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_HPC_INTEL 0x4165
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_STD_STE_INTEL 0x416B
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_GAMUT_COMP_INTEL 0x416C
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_GECC_INTEL 0x416D
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_ACE_INTEL 0x416E
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_ACE_ADVANCED_INTEL 0x416F
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_TCC_INTEL 0x4170
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_PROC_AMP_INTEL 0x4171
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_BACK_END_CSC_INTEL 0x4172
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_AOI_ALPHA_INTEL 0x4173
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_CCM_INTEL 0x4174
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_FWD_GAMMA_CORRECTION_INTEL 0x4175
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_FRONT_END_CSC_INTEL 0x4176
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_BLC_INTEL 0x4178
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_DEMOSAIC_INTEL 0x4179
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_WBC_INTEL 0x417A
|
||||
#define CL_VE_ACCELERATOR_ATTRIB_VIGNETTE_INTEL 0x417B
|
||||
|
||||
// VE Statistics
|
||||
#define CL_VE_ACCELERATOR_HISTOGRAMS_INTEL 0x4166
|
||||
#define CL_VE_ACCELERATOR_STATISTICS_INTEL 0x4167
|
||||
#define CL_VE_ACCELERATOR_STMM_INPUT_INTEL 0x4168
|
||||
#define CL_VE_ACCELERATOR_STMM_OUTPUT_INTEL 0x4169
|
||||
|
||||
// Denoise Control
|
||||
#define CL_VE_DENOISE_FACTOR_MAX_INTEL 64
|
||||
#define CL_VE_DENOISE_FACTOR_MIN_INTEL 0
|
||||
#define CL_VE_DENOISE_FACTOR_DEFAULT_INTEL 32
|
||||
|
||||
// Hot Pixel Correction ranges
|
||||
#define CL_VE_HPC_THRESHOLD_MAX_INTEL 255
|
||||
#define CL_VE_HPC_THRESHOLD_MIN_INTEL 0
|
||||
#define CL_VE_HPC_THRESHOLD_DEFAULT_INTEL 0
|
||||
#define CL_VE_HPC_PIXEL_COUNT_MAX_INTEL 8
|
||||
#define CL_VE_HPC_PIXEL_COUNT_MIN_INTEL 0
|
||||
#define CL_VE_HPC_PIXEL_COUNT_DEFAULT_INTEL 0
|
||||
|
||||
// Skin tone detection/enhancement ranges
|
||||
#define CL_VE_STE_FACTOR_MIN_INTEL 0
|
||||
#define CL_VE_STE_FACTOR_MAX_INTEL 10
|
||||
#define CL_VE_STE_FACTOR_DEFAULT_INTEL 3
|
||||
|
||||
// Constants for gamut compression scaling factors
|
||||
#define CL_VE_GAMUT_SCALING_FACTOR_MAX_INTEL 4.0f
|
||||
#define CL_VE_GAMUT_SCALING_FACTOR_MIN_INTEL 0.0f
|
||||
#define CL_VE_GAMUT_SCALING_FACTOR_DEFAULT_INTEL 0.0f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTROLS_MAX_INTEL 1.0f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTROLS_MIN_INTEL 0.0f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_RX_DEFAULT_INTEL 0.576f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_GX_DEFAULT_INTEL 0.331f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_BX_DEFAULT_INTEL 0.143f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_RY_DEFAULT_INTEL 0.343f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_GY_DEFAULT_INTEL 0.555f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_BY_DEFAULT_INTEL 0.104f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_RX_SRGB_INTEL 0.640f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_GX_SRGB_INTEL 0.300f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_BX_SRGB_INTEL 0.150f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_RY_SRGB_INTEL 0.330f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_GY_SRGB_INTEL 0.600f
|
||||
#define CL_VE_GAMUT_CHROMATICITY_CONTRL_BY_SRGB_INTEL 0.060f
|
||||
|
||||
// Constants for gamut expansion / color correction
|
||||
#define CL_VE_GECC_PIECE_COUNT_INTEL 11
|
||||
#define CL_VE_GECC_TX_COEFFICIENTS_MIN_INTEL -4.0f
|
||||
#define CL_VE_GECC_TX_COEFFICIENTS_MAX_INTEL 4.0f
|
||||
#define CL_VE_GECC_TX_COEFF_C0_DEFAULT_INTEL 0.681f
|
||||
#define CL_VE_GECC_TX_COEFF_C1_DEFAULT_INTEL 0.278f
|
||||
#define CL_VE_GECC_TX_COEFF_C2_DEFAULT_INTEL 0.008f
|
||||
#define CL_VE_GECC_TX_COEFF_C3_DEFAULT_INTEL 0.017f
|
||||
#define CL_VE_GECC_TX_COEFF_C4_DEFAULT_INTEL 0.894f
|
||||
#define CL_VE_GECC_TX_COEFF_C5_DEFAULT_INTEL -0.012f
|
||||
#define CL_VE_GECC_TX_COEFF_C6_DEFAULT_INTEL -0.002f
|
||||
#define CL_VE_GECC_TX_COEFF_C7_DEFAULT_INTEL 0.041f
|
||||
#define CL_VE_GECC_TX_COEFF_C8_DEFAULT_INTEL 0.838f
|
||||
#define CL_VE_GECC_TX_OFFSET_IN_MIN_INTEL -16384
|
||||
#define CL_VE_GECC_TX_OFFSET_IN_MAX_INTEL 16383
|
||||
#define CL_VE_GECC_TX_OFFSET_OUT_MIN_INTEL -4.0f
|
||||
#define CL_VE_GECC_TX_OFFSET_OUT_MAX_INTEL 4.0f
|
||||
|
||||
// AOI Parameter defaults
|
||||
#define CL_VE_AOI_RANGE_DEFAULT_INTEL 0
|
||||
#define CL_VE_AOI_ALPHA_DEFAULT_INTEL 0
|
||||
|
||||
// CCM Config Parameter Range
|
||||
#define CL_VE_CCM_COEFFICIENTS_MIN_INTEL -16.0f
|
||||
#define CL_VE_CCM_COEFFICIENTS_MAX_INTEL 16.0f
|
||||
#define CL_VE_CCM_COEFFICIENTS_DEFAULT_INTEL 0.0f
|
||||
|
||||
// CSC Config Parameter Range
|
||||
#define CL_VE_CSC_OFFSET_MIN_INTEL -256.0f
|
||||
#define CL_VE_CSC_OFFSET_MAX_INTEL 256.0f
|
||||
#define CL_VE_CSC_COEFF_MIN_INTEL -4.0f
|
||||
#define CL_VE_CSC_COEFF_MAX_INTEL 4.0f
|
||||
|
||||
// Constants for specific color spaces
|
||||
#define CL_VE_GAMUT_CS_BT601_INTEL 0x0
|
||||
#define CL_VE_GAMUT_CS_BT709_INTEL 0x1
|
||||
#define CL_VE_GAMUT_CS_XVYCC601_INTEL 0x2
|
||||
#define CL_VE_GAMUT_CS_XVYCC709_INTEL 0x3
|
||||
|
||||
// LACE/ACE Control
|
||||
#define CL_VE_ACE_PIECE_COUNT_INTEL 10
|
||||
#define CL_VE_ACE_LEVEL_MIN_INTEL 0
|
||||
#define CL_VE_ACE_LEVEL_MAX_INTEL 9
|
||||
#define CL_VE_ACE_LEVEL_DEFAULT_INTEL 5
|
||||
#define CL_VE_ACE_STRENGTH_MIN_INTEL 0
|
||||
#define CL_VE_ACE_STRENGTH_MAX_INTEL 6
|
||||
#define CL_VE_ACE_STRENGTH_DEFAULT_INTEL 1
|
||||
#define CL_VE_ACE_SKIN_THRESHOLD_MIN_INTEL 1
|
||||
#define CL_VE_ACE_SKIN_THRESHOLD_MAX_INTEL 31
|
||||
#define CL_VE_ACE_SKIN_THRESHOLD_DEFAULT_INTEL 26
|
||||
|
||||
// TCC Parameter Range
|
||||
#define CL_VE_TCC_MIN_INTEL 0
|
||||
#define CL_VE_TCC_MAX_INTEL 255
|
||||
#define CL_VE_TCC_DEFAULT_INTEL 220
|
||||
|
||||
// Proc-Amp Ranges
|
||||
#define CL_VE_PROCAMP_BRIGHTNESS_MIN_INTEL -100.0f
|
||||
#define CL_VE_PROCAMP_BRIGHTNESS_MAX_INTEL 100.0f
|
||||
#define CL_VE_PROCAMP_BRIGHTNESS_DEFAULT_INTEL 0.0f
|
||||
|
||||
#define CL_VE_PROCAMP_CONTRAST_MIN_INTEL 0.0f
|
||||
#define CL_VE_PROCAMP_CONTRAST_MAX_INTEL 15.0f
|
||||
#define CL_VE_PROCAMP_CONTRAST_DEFAULT_INTEL 1.0f
|
||||
|
||||
#define CL_VE_PROCAMP_HUE_MIN_INTEL -180.0f
|
||||
#define CL_VE_PROCAMP_HUE_MAX_INTEL 180.0f
|
||||
#define CL_VE_PROCAMP_HUE_DEFAULT_INTEL 0.0f
|
||||
|
||||
#define CL_VE_PROCAMP_SATURATION_MIN_INTEL 0.0f
|
||||
#define CL_VE_PROCAMP_SATURATION_MAX_INTEL 8.0f
|
||||
#define CL_VE_PROCAMP_SATURATION_DEFAULT_INTEL 1.0f
|
||||
|
||||
// BLC Parameter Range
|
||||
#define CL_VE_BLC_MIN_INTEL -65536
|
||||
#define CL_VE_BLC_MAX_INTEL 65535
|
||||
#define CL_VE_BLC_DEFAULT_INTEL 0
|
||||
|
||||
// WBC Parameter Range
|
||||
#define CL_VE_WBC_MIN_INTEL 0.0f
|
||||
#define CL_VE_WBC_MAX_INTEL 16.0f
|
||||
#define CL_VE_WBC_DEFAULT_INTEL 0.0f
|
||||
|
||||
// FGC Parameter Range
|
||||
#define CL_VE_FGC_DEFAULT_INTEL 0
|
||||
|
||||
// Video enhancement kernel flags
|
||||
#define CL_VE_FIRST_FRAME_INTEL (1 << 0)
|
||||
#define CL_VE_RESET_DN_HISTORY_INTEL (1 << 1)
|
||||
#define CL_VE_RESET_DI_HISTORY_INTEL (1 << 2)
|
||||
#define CL_VE_RESET_ACE_HISTORY_INTEL (1 << 3)
|
||||
#define CL_VE_RESET_STE_HISTORY_INTEL (1 << 4)
|
||||
#define CL_VE_GENERATE_LACE_HISTOGRAM_128_BINS_INTEL (1 << 5)
|
||||
#define CL_VE_GENERATE_LACE_HISTOGRAM_256_BINS_INTEL (1 << 6)
|
||||
|
||||
// Bayer pattern controls
|
||||
#define CL_VE_BAYER_PATTERN_FORMAT_8BIT_INTEL 0x0
|
||||
#define CL_VE_BAYER_PATTERN_FORMAT_16BIT_INTEL 0x1
|
||||
#define CL_VE_BAYER_PATTERN_OFFSET_BG_INTEL 0x0
|
||||
#define CL_VE_BAYER_PATTERN_OFFSET_RG_INTEL 0x1
|
||||
#define CL_VE_BAYER_PATTERN_OFFSET_GR_INTEL 0x2
|
||||
#define CL_VE_BAYER_PATTERN_OFFSET_GB_INTEL 0x3
|
||||
|
||||
// Default color-space conversion coefficients
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_OFFSET_IN_0 (-16.0f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_OFFSET_IN_1 (-128.0f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_OFFSET_IN_2 (-128.0f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_TX_COEFF_0_0 (1.164f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_TX_COEFF_0_1 (0.0f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_TX_COEFF_0_2 (1.596f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_TX_COEFF_1_0 (1.164f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_TX_COEFF_1_1 (-0.392f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_TX_COEFF_1_2 (-0.813f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_TX_COEFF_2_0 (1.164f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_TX_COEFF_2_1 (2.017f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_TX_COEFF_2_2 (0.0f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_OFFSET_OUT_0 (0.0f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_OFFSET_OUT_1 (0.0f)
|
||||
#define CL_VE_CSC_DEFAULT_YUV_TO_RGB_OFFSET_OUT_2 (0.0f)
|
||||
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_OFFSET_IN_0 (0.0f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_OFFSET_IN_1 (0.0f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_OFFSET_IN_2 (0.0f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_TX_COEFF_0_0 (0.257f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_TX_COEFF_0_1 (0.504f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_TX_COEFF_0_2 (0.098f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_TX_COEFF_1_0 (-0.148f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_TX_COEFF_1_1 (-0.291f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_TX_COEFF_1_2 (0.439f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_TX_COEFF_2_0 (0.439f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_TX_COEFF_2_1 (-0.368f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_TX_COEFF_2_2 (-0.071f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_OFFSET_OUT_0 (16.0f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_OFFSET_OUT_1 (128.0f)
|
||||
#define CL_VE_CSC_DEFAULT_RGB_TO_YUV_OFFSET_OUT_2 (128.0f)
|
||||
|
||||
// Forward Gamma Correction controls
|
||||
#define CL_VE_FWD_GAMMA_SEGMENT_COUNT 64
|
||||
|
||||
typedef cl_uint cl_ve_accelerator_attrib_id;
|
||||
|
||||
typedef struct _cl_ve_dn_attrib_intel {
|
||||
cl_bool enable_luma;
|
||||
cl_bool enable_chroma;
|
||||
cl_bool auto_detect;
|
||||
cl_uint denoise_factor;
|
||||
} cl_ve_dn_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_di_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_bool motion_compensation_enabled;
|
||||
cl_bool top_first;
|
||||
} cl_ve_di_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_std_ste_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_uint ste_factor;
|
||||
cl_bool write_std_decisions_only;
|
||||
} cl_ve_std_ste_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_gamut_comp_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_bool advanced_mode_enable;
|
||||
cl_uint src_color_space;
|
||||
cl_float basic_mode_scaling_factor;
|
||||
cl_float display_rgb_x[3];
|
||||
cl_float display_rgb_y[3];
|
||||
} cl_ve_gamut_comp_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_gecc_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_bool use_advanced_mode;
|
||||
cl_float matrix[3][3];
|
||||
cl_int offset_in[3];
|
||||
cl_float offset_out[3];
|
||||
cl_uchar gamma_correction_in[CL_VE_GECC_PIECE_COUNT_INTEL];
|
||||
cl_uchar gamma_correction_out[CL_VE_GECC_PIECE_COUNT_INTEL];
|
||||
cl_uchar inv_gamma_correction_in[CL_VE_GECC_PIECE_COUNT_INTEL];
|
||||
cl_uchar inv_gamma_correction_out[CL_VE_GECC_PIECE_COUNT_INTEL];
|
||||
} cl_ve_gecc_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_ace_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_uchar skin_threshold;
|
||||
cl_uint level;
|
||||
cl_uint strength;
|
||||
} cl_ve_ace_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_ace_advanced_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_uchar luma_min;
|
||||
cl_uchar luma_max;
|
||||
cl_uchar luma_in[CL_VE_ACE_PIECE_COUNT_INTEL];
|
||||
cl_uchar luma_out[CL_VE_ACE_PIECE_COUNT_INTEL];
|
||||
} cl_ve_ace_advanced_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_tcc_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_uchar red_saturation;
|
||||
cl_uchar green_saturation;
|
||||
cl_uchar blue_saturation;
|
||||
cl_uchar cyan_saturation;
|
||||
cl_uchar magenta_saturation;
|
||||
cl_uchar yellow_saturation;
|
||||
} cl_ve_tcc_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_procamp_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_float brightness;
|
||||
cl_float contrast;
|
||||
cl_float hue;
|
||||
cl_float saturation;
|
||||
} cl_ve_procamp_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_becsc_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_bool yuv_channel_swap;
|
||||
cl_float offset_in[3];
|
||||
cl_float matrix[3][3];
|
||||
cl_float offset_out[3];
|
||||
} cl_ve_becsc_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_aoi_alpha_attrib_intel {
|
||||
cl_bool aoi_enabled;
|
||||
cl_uint x_min;
|
||||
cl_uint x_max;
|
||||
cl_uint y_min;
|
||||
cl_uint y_max;
|
||||
cl_bool alpha_enable;
|
||||
cl_ushort alpha_value;
|
||||
} cl_ve_aoi_alpha_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_hpc_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_uchar threshold;
|
||||
cl_uchar count;
|
||||
} cl_ve_hpc_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_blc_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_int black_point_offset_red;
|
||||
cl_int black_point_offset_green_top;
|
||||
cl_int black_point_offset_green_bottom;
|
||||
cl_int black_point_offset_blue;
|
||||
} cl_ve_blc_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_demosaic_attrib_intel {
|
||||
cl_uint input_width;
|
||||
cl_uint input_height;
|
||||
cl_uint input_stride;
|
||||
cl_uint bayer_pattern_offset;
|
||||
cl_uint bayer_pattern_format;
|
||||
} cl_ve_demosaic_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_wbc_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_float white_balance_red_correction;
|
||||
cl_float white_balance_green_top_correction;
|
||||
cl_float white_balance_green_bottom_correction;
|
||||
cl_float white_balance_blue_correction;
|
||||
} cl_ve_wbc_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_vignette_attrib_intel {
|
||||
cl_bool enabled;
|
||||
} cl_ve_vignette_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_ccm_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_float matrix[3][3];
|
||||
} cl_ve_ccm_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_fgc_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_ushort pixel_value[CL_VE_FWD_GAMMA_SEGMENT_COUNT];
|
||||
cl_ushort red_channel_corrected_value[CL_VE_FWD_GAMMA_SEGMENT_COUNT];
|
||||
cl_ushort green_channel_corrected_value[CL_VE_FWD_GAMMA_SEGMENT_COUNT];
|
||||
cl_ushort blue_channel_corrected_value[CL_VE_FWD_GAMMA_SEGMENT_COUNT];
|
||||
} cl_ve_fgc_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_fecsc_attrib_intel {
|
||||
cl_bool enabled;
|
||||
cl_float offset_in[3];
|
||||
cl_float matrix[3][3];
|
||||
cl_float offset_out[3];
|
||||
} cl_ve_fecsc_attrib_intel;
|
||||
|
||||
typedef struct _cl_ve_attrib_desc_intel {
|
||||
cl_ve_accelerator_attrib_id attrib_id;
|
||||
void *attrib_data;
|
||||
} cl_ve_attrib_desc_intel;
|
||||
|
||||
typedef struct _cl_ve_desc_intel {
|
||||
cl_uint attrib_count;
|
||||
cl_ve_attrib_desc_intel *attribs;
|
||||
} cl_ve_desc_intel;
|
||||
|
||||
typedef struct _cl_vignette_format_intel {
|
||||
cl_ushort Red;
|
||||
cl_ushort GreenTop;
|
||||
cl_ushort Blue;
|
||||
cl_ushort GreenBottom;
|
||||
} cl_vignette_format_intel;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __CL_EXT_VEBOX_INTEL_H */
|
||||
908
runtime/CMakeLists.txt
Normal file
908
runtime/CMakeLists.txt
Normal file
@@ -0,0 +1,908 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
cmake_minimum_required (VERSION 3.0)
|
||||
|
||||
if (POLICY CMP0042)
|
||||
cmake_policy (SET CMP0042 NEW)
|
||||
endif (POLICY CMP0042)
|
||||
|
||||
if (POLICY CMP0063)
|
||||
cmake_policy (SET CMP0063 NEW)
|
||||
endif (POLICY CMP0063)
|
||||
|
||||
project (neo)
|
||||
|
||||
#set (CMAKE_CXX_VISIBILITY_PRESET default)
|
||||
#set (CMAKE_VISIBILITY_INLINES_HIDDEN 1)
|
||||
|
||||
# Support for Windows Universal Drivers
|
||||
ENABLE_WUD()
|
||||
|
||||
if(WIN32)
|
||||
set(GEN_OS_SRC
|
||||
windows/command_stream_receiver.cpp
|
||||
windows/wddm_engine_mapper.cpp
|
||||
windows/wddm.cpp
|
||||
)
|
||||
else(WIN32)
|
||||
set(GEN_OS_SRC
|
||||
linux/command_stream_receiver.cpp
|
||||
linux/drm_engine_mapper.cpp
|
||||
)
|
||||
endif(WIN32)
|
||||
|
||||
set(RUNTIME_SRCS_GENX
|
||||
aub_command_stream_receiver.cpp
|
||||
aub_mapper.h
|
||||
aub_mem_dump.cpp
|
||||
command_queue.cpp
|
||||
device_enqueue.h
|
||||
device_queue.cpp
|
||||
command_stream_receiver_hw.cpp
|
||||
hw_cmds.h
|
||||
hw_cmds_generated.h
|
||||
hw_helper.cpp
|
||||
hw_info.cpp
|
||||
hw_info.h
|
||||
buffer.cpp
|
||||
image.cpp
|
||||
kernel_commands.cpp
|
||||
preamble.cpp
|
||||
preemption.cpp
|
||||
reg_configs.h
|
||||
sampler.cpp
|
||||
scheduler_definitions.h
|
||||
scheduler_igdrcl_built_in.inl
|
||||
state_base_address.cpp
|
||||
tbx_command_stream_receiver.cpp
|
||||
${GEN_OS_SRC}
|
||||
)
|
||||
if(NOT (TARGET ${BIKSIM_LIB_NAME}))
|
||||
add_subdirectory(builtin_kernels_simulation)
|
||||
endif(NOT (TARGET ${BIKSIM_LIB_NAME}))
|
||||
|
||||
if(NOT (TARGET ${SCHEDULER_BINARY_LIB_NAME}))
|
||||
add_subdirectory("scheduler")
|
||||
endif(NOT (TARGET ${SCHEDULER_BINARY_LIB_NAME}))
|
||||
|
||||
if(NOT (TARGET ${BUILTINS_BINARIES_LIB_NAME}))
|
||||
add_subdirectory("built_ins")
|
||||
endif(NOT (TARGET ${BUILTINS_BINARIES_LIB_NAME}))
|
||||
|
||||
add_subdirectory(api)
|
||||
add_subdirectory(accelerators)
|
||||
|
||||
set (RUNTIME_SRCS_AUB_MEM_DUMP
|
||||
aub_mem_dump/aub_mem_dump.cpp
|
||||
aub_mem_dump/aub_mem_dump.h
|
||||
aub_mem_dump/aub_mem_dump.inl
|
||||
aub_mem_dump/aub_header.h
|
||||
aub_mem_dump/aub_services.h
|
||||
)
|
||||
|
||||
|
||||
set (RUNTIME_SRCS_BUILT_INS
|
||||
built_ins/built_ins_storage.cpp
|
||||
built_ins/built_ins.cpp
|
||||
built_ins/built_ins.h
|
||||
built_ins/sip.cpp
|
||||
built_ins/sip.h
|
||||
built_ins/vme_dispatch_builder.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_BUILT_IN_KERNELS
|
||||
built_ins/kernels/copy_buffer_rect.igdrcl_built_in
|
||||
built_ins/kernels/copy_buffer_to_buffer.igdrcl_built_in
|
||||
built_ins/kernels/copy_buffer_to_image3d.igdrcl_built_in
|
||||
built_ins/kernels/copy_image3d_to_buffer.igdrcl_built_in
|
||||
built_ins/kernels/copy_image_to_image1d.igdrcl_built_in
|
||||
built_ins/kernels/copy_image_to_image2d.igdrcl_built_in
|
||||
built_ins/kernels/copy_image_to_image3d.igdrcl_built_in
|
||||
built_ins/kernels/fill_buffer.igdrcl_built_in
|
||||
built_ins/kernels/fill_image1d.igdrcl_built_in
|
||||
built_ins/kernels/fill_image2d.igdrcl_built_in
|
||||
built_ins/kernels/fill_image3d.igdrcl_built_in
|
||||
built_ins/kernels/vme_block_motion_estimate_intel.igdrcl_built_in
|
||||
built_ins/kernels/vme_block_advanced_motion_estimate_check_intel.igdrcl_built_in
|
||||
built_ins/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.igdrcl_built_in
|
||||
built_ins/kernels/vebox_ve_enhance_intel.igdrcl_built_in
|
||||
built_ins/kernels/vebox_ve_dn_enhance_intel.igdrcl_built_in
|
||||
built_ins/kernels/vebox_ve_dn_di_enhance_intel.igdrcl_built_in
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_COMMANDS
|
||||
commands/bxml_generator_glue.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_COMMAND_QUEUE
|
||||
command_queue/cpu_data_transfer_handler.h
|
||||
command_queue/command_queue.cpp
|
||||
command_queue/command_queue.h
|
||||
command_queue/command_queue_hw.h
|
||||
command_queue/command_queue_hw.inl
|
||||
command_queue/dispatch_walker.h
|
||||
command_queue/dispatch_walker_helper.h
|
||||
command_queue/dispatch_walker_helper.inl
|
||||
command_queue/enqueue_barrier.h
|
||||
command_queue/enqueue_common.h
|
||||
command_queue/enqueue_copy_buffer.h
|
||||
command_queue/enqueue_copy_buffer_rect.h
|
||||
command_queue/enqueue_copy_buffer_to_image.h
|
||||
command_queue/enqueue_copy_image.h
|
||||
command_queue/enqueue_copy_image_to_buffer.h
|
||||
command_queue/enqueue_fill_buffer.h
|
||||
command_queue/enqueue_fill_image.h
|
||||
command_queue/enqueue_kernel.h
|
||||
command_queue/enqueue_map_buffer.h
|
||||
command_queue/enqueue_map_image.h
|
||||
command_queue/enqueue_svm.h
|
||||
command_queue/enqueue_marker.h
|
||||
command_queue/enqueue_migrate_mem_objects.h
|
||||
command_queue/enqueue_read_buffer.h
|
||||
command_queue/enqueue_read_buffer_rect.h
|
||||
command_queue/enqueue_read_image.h
|
||||
command_queue/enqueue_write_buffer.h
|
||||
command_queue/enqueue_write_buffer_rect.h
|
||||
command_queue/enqueue_write_image.h
|
||||
command_queue/finish.h
|
||||
command_queue/flush.h
|
||||
command_queue/local_id_gen.cpp
|
||||
command_queue/local_id_gen_avx2.cpp
|
||||
command_queue/local_id_gen_sse4.cpp
|
||||
command_queue/local_id_gen.h
|
||||
command_queue/local_id_gen.inl
|
||||
command_queue/local_work_size.cpp
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_COMMAND_STREAM
|
||||
command_stream/aub_command_stream_receiver.cpp
|
||||
command_stream/aub_command_stream_receiver.h
|
||||
command_stream/aub_command_stream_receiver_hw.h
|
||||
command_stream/aub_command_stream_receiver_hw.inl
|
||||
command_stream/command_stream_receiver.cpp
|
||||
command_stream/command_stream_receiver.h
|
||||
command_stream/command_stream_receiver_hw.h
|
||||
command_stream/command_stream_receiver_hw.inl
|
||||
command_stream/csr_definitions.h
|
||||
command_stream/device_command_stream.h
|
||||
command_stream/linear_stream.cpp
|
||||
command_stream/linear_stream.h
|
||||
command_stream/submissions_aggregator.cpp
|
||||
command_stream/submissions_aggregator.h
|
||||
command_stream/tbx_command_stream_receiver.cpp
|
||||
command_stream/tbx_command_stream_receiver.h
|
||||
command_stream/tbx_command_stream_receiver_hw.h
|
||||
command_stream/tbx_command_stream_receiver_hw.inl
|
||||
command_stream/tbx_stream.cpp
|
||||
command_stream/thread_arbitration_policy.h
|
||||
command_stream/preemption.h
|
||||
command_stream/preemption.cpp
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_COMPILER_INTERFACE
|
||||
compiler_interface/binary_cache.cpp
|
||||
compiler_interface/compiler_interface.cpp
|
||||
compiler_interface/compiler_interface.h
|
||||
compiler_interface/compiler_interface.inl
|
||||
compiler_interface/create_main.cpp
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_CONTEXT
|
||||
context/context.cpp
|
||||
context/context.h
|
||||
context/context.inl
|
||||
context/driver_diagnostics.cpp
|
||||
context/driver_diagnostics.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_DEVICE
|
||||
device/device.cpp
|
||||
device/device.h
|
||||
device/device_caps.cpp
|
||||
device/device_info.cpp
|
||||
device/device_info.h
|
||||
device/device_info_map.h
|
||||
device/device_vector.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_DEVICE_QUEUE
|
||||
device_queue/device_queue.cpp
|
||||
device_queue/device_queue.h
|
||||
device_queue/device_queue_hw.h
|
||||
device_queue/device_queue_hw.inl
|
||||
device_queue/device_queue_hw_profiling.inl
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_EVENT
|
||||
event/async_events_handler.h
|
||||
event/async_events_handler.cpp
|
||||
event/event.cpp
|
||||
event/event.h
|
||||
event/event_builder.cpp
|
||||
event/event_builder.h
|
||||
event/event_registry.cpp
|
||||
event/event_registry.h
|
||||
event/user_event.cpp
|
||||
event/user_event.h
|
||||
event/hw_timestamps.h
|
||||
event/perf_counter.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_EXECUTION_MODEL
|
||||
execution_model/device_enqueue.h
|
||||
)
|
||||
|
||||
if(GTPIN_HEADERS_DIR)
|
||||
set (RUNTIME_SRCS_GTPIN
|
||||
gtpin/gtpin_init.cpp
|
||||
gtpin/gtpin_init.h
|
||||
gtpin/gtpin_helpers.cpp
|
||||
gtpin/gtpin_helpers.h
|
||||
)
|
||||
endif(GTPIN_HEADERS_DIR)
|
||||
|
||||
set (RUNTIME_SRCS_HELPERS
|
||||
helpers/abort.h
|
||||
helpers/aligned_memory.h
|
||||
helpers/array_count.h
|
||||
helpers/base_object.cpp
|
||||
helpers/base_object.h
|
||||
helpers/base_object_allocator.cpp
|
||||
helpers/basic_math.h
|
||||
helpers/cache_policy.cpp
|
||||
helpers/cache_policy.h
|
||||
helpers/dirty_state_helpers.h
|
||||
helpers/dirty_state_helpers.cpp
|
||||
helpers/dispatch_info.h
|
||||
helpers/dispatch_info.cpp
|
||||
helpers/dispatch_info_builder.h
|
||||
helpers/completion_stamp.h
|
||||
helpers/debug_helpers.h
|
||||
helpers/engine_node.h
|
||||
helpers/error_mappers.h
|
||||
helpers/file_io.cpp
|
||||
helpers/file_io.h
|
||||
helpers/flush_stamp.h
|
||||
helpers/flush_stamp.cpp
|
||||
helpers/get_info.h
|
||||
helpers/hash.h
|
||||
helpers/hw_helper.cpp
|
||||
helpers/hw_helper.h
|
||||
helpers/hw_helper.inl
|
||||
helpers/hw_info.cpp
|
||||
helpers/hw_info.h
|
||||
helpers/kernel_commands.h
|
||||
helpers/kernel_commands.inl
|
||||
helpers/options.cpp
|
||||
helpers/options.h
|
||||
helpers/per_thread_data.cpp
|
||||
helpers/per_thread_data.h
|
||||
helpers/preamble.h
|
||||
helpers/preamble.inl
|
||||
helpers/ptr_math.h
|
||||
helpers/queue_helpers.h
|
||||
helpers/sampler_helpers.h
|
||||
helpers/selectors.h
|
||||
helpers/state_base_address.h
|
||||
helpers/state_base_address.inl
|
||||
helpers/stdio.h
|
||||
helpers/string.h
|
||||
helpers/string_helpers.h
|
||||
helpers/surface_formats.cpp
|
||||
helpers/surface_formats.h
|
||||
helpers/task_information.cpp
|
||||
helpers/task_information.h
|
||||
helpers/uint16_avx2.h
|
||||
helpers/uint16_sse4.h
|
||||
helpers/wddm_helper.h
|
||||
helpers/validators.cpp
|
||||
helpers/validators.h
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
list (APPEND RUNTIME_SRCS_HELPERS
|
||||
helpers/translationtable_callbacks.h
|
||||
)
|
||||
endif(WIN32)
|
||||
|
||||
set (RUNTIME_SRCS_INDIRECT_HEAP
|
||||
indirect_heap/indirect_heap.cpp
|
||||
indirect_heap/indirect_heap.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_INSTRUMENTATION
|
||||
instrumentation/instrumentation.cpp
|
||||
instrumentation/instrumentation.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_KERNEL
|
||||
kernel/dynamic_kernel_info.h
|
||||
kernel/kernel.cpp
|
||||
kernel/kernel.h
|
||||
kernel/kernel.inl
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_MEMORY_MANAGER
|
||||
memory_manager/deferrable_deletion.h
|
||||
memory_manager/deferred_deleter.cpp
|
||||
memory_manager/deferred_deleter.h
|
||||
memory_manager/graphics_allocation.h
|
||||
memory_manager/graphics_allocation.cpp
|
||||
memory_manager/host_ptr_defines.h
|
||||
memory_manager/host_ptr_manager.h
|
||||
memory_manager/host_ptr_manager.cpp
|
||||
memory_manager/memory_manager.cpp
|
||||
memory_manager/memory_manager.h
|
||||
memory_manager/svm_memory_manager.cpp
|
||||
memory_manager/svm_memory_manager.h
|
||||
memory_manager/os_agnostic_memory_manager.cpp
|
||||
memory_manager/os_agnostic_memory_manager.h
|
||||
memory_manager/page_table.cpp
|
||||
memory_manager/page_table.h
|
||||
memory_manager/address_mapper.cpp
|
||||
memory_manager/address_mapper.h
|
||||
memory_manager/surface.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_GMM_HELPER
|
||||
gmm_helper/gmm_helper.cpp
|
||||
gmm_helper/gmm_helper.h
|
||||
gmm_helper/gmm_lib.h
|
||||
gmm_helper/resource_info.h
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
list (APPEND RUNTIME_SRCS_GMM_HELPER
|
||||
gmm_helper/page_table_mngr.h
|
||||
gmm_helper/gmm_memory.h
|
||||
)
|
||||
endif(WIN32)
|
||||
|
||||
set (RUNTIME_SRCS_MEM_OBJ
|
||||
mem_obj/buffer.cpp
|
||||
mem_obj/buffer.h
|
||||
mem_obj/buffer.inl
|
||||
mem_obj/image.cpp
|
||||
mem_obj/image.h
|
||||
mem_obj/image.inl
|
||||
mem_obj/mem_obj.cpp
|
||||
mem_obj/mem_obj.h
|
||||
mem_obj/buffer_factory_init.inl
|
||||
mem_obj/image_factory_init.inl
|
||||
mem_obj/pipe.h
|
||||
mem_obj/pipe.cpp
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_OS_INTERFACE
|
||||
os_interface/32bit_memory.h
|
||||
os_interface/os_library.h
|
||||
os_interface/linux/linux_inc.h
|
||||
os_interface/windows/windows_inc.h
|
||||
os_interface/device_factory.h
|
||||
os_interface/os_inc.h
|
||||
os_interface/os_interface.h
|
||||
os_interface/os_time.h
|
||||
os_interface/os_time.cpp
|
||||
os_interface/debug_settings_manager.cpp
|
||||
os_interface/debug_settings_manager.h
|
||||
os_interface/performance_counters.cpp
|
||||
os_interface/performance_counters.h
|
||||
os_interface/print.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_PLATFORM
|
||||
platform/platform.cpp
|
||||
platform/platform.h
|
||||
platform/platform_info.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_PROGRAM
|
||||
program/block_kernel_manager.cpp
|
||||
program/block_kernel_manager.h
|
||||
program/build.cpp
|
||||
program/compile.cpp
|
||||
program/create.cpp
|
||||
program/get_info.cpp
|
||||
program/heap_info.h
|
||||
program/kernel_arg_info.h
|
||||
program/kernel_info.cpp
|
||||
program/kernel_info.h
|
||||
program/link.cpp
|
||||
program/patch_info.h
|
||||
program/process_elf_binary.cpp
|
||||
program/process_spir_binary.cpp
|
||||
program/process_gen_binary.cpp
|
||||
program/program.cpp
|
||||
program/program.h
|
||||
program/printf_handler.h
|
||||
program/printf_handler.cpp
|
||||
program/print_formatter.h
|
||||
program/print_formatter.cpp
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_SAMPLER
|
||||
sampler/sampler.cpp
|
||||
sampler/sampler.h
|
||||
sampler/sampler.inl
|
||||
sampler/sampler_factory_init.inl
|
||||
)
|
||||
|
||||
list (APPEND RUNTIME_SRCS_SCHEDULER
|
||||
scheduler/scheduler_kernel.cpp
|
||||
scheduler/scheduler_kernel.h
|
||||
scheduler/CMakeLists.txt
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_SHARINGS
|
||||
sharings/sharing.h
|
||||
sharings/sharing.cpp
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_TBX
|
||||
tbx/tbx_proto.h
|
||||
tbx/tbx_sockets.cpp
|
||||
tbx/tbx_sockets.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_UTILITIES
|
||||
utilities/api_intercept.h
|
||||
utilities/arrayref.h
|
||||
utilities/cpu_info.h
|
||||
utilities/debug_file_reader.cpp
|
||||
utilities/debug_file_reader.h
|
||||
utilities/debug_settings_reader.cpp
|
||||
utilities/debug_settings_reader.h
|
||||
utilities/directory.h
|
||||
utilities/heap_allocator.cpp
|
||||
utilities/heap_allocator.h
|
||||
utilities/iflist.h
|
||||
utilities/idlist.h
|
||||
utilities/stackvec.h
|
||||
utilities/perf_profiler.cpp
|
||||
utilities/perf_profiler.h
|
||||
utilities/reference_tracked_object.h
|
||||
utilities/tag_allocator.h
|
||||
utilities/timer_util.h
|
||||
utilities/vec.h
|
||||
)
|
||||
|
||||
set (RUNTIME_SRCS_GEN_COMMON
|
||||
gen_common/aub_mapper.h
|
||||
gen_common/aub_mapper_base.h
|
||||
gen_common/hw_cmds.h
|
||||
gen_common/reg_configs.h
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
list (APPEND RUNTIME_SRCS_UTILITIES
|
||||
utilities/windows/directory.cpp
|
||||
utilities/windows/timer_util.cpp
|
||||
utilities/windows/cpu_info.cpp
|
||||
)
|
||||
else(WIN32)
|
||||
list (APPEND RUNTIME_SRCS_UTILITIES
|
||||
utilities/linux/directory.cpp
|
||||
utilities/linux/timer_util.cpp
|
||||
utilities/linux/cpu_info.cpp
|
||||
)
|
||||
endif (WIN32)
|
||||
|
||||
if (WIN32)
|
||||
list (APPEND RUNTIME_SRCS_OS_INTERFACE
|
||||
os_interface/windows/api.cpp
|
||||
os_interface/windows/d3d_sharing_functions.h
|
||||
os_interface/windows/d3d9_sharing_functions.cpp
|
||||
os_interface/windows/d3d10_11_sharing_functions.cpp
|
||||
os_interface/windows/debug_registry_reader.cpp
|
||||
os_interface/windows/deferrable_deletion_win.cpp
|
||||
os_interface/windows/deferrable_deletion_win.h
|
||||
os_interface/windows/device_command_stream.inl
|
||||
os_interface/windows/device_factory.cpp
|
||||
os_interface/windows/gdi_interface.cpp
|
||||
os_interface/windows/gdi_interface.h
|
||||
os_interface/windows/options.cpp
|
||||
os_interface/windows/os_interface.cpp
|
||||
os_interface/windows/os_interface.h
|
||||
os_interface/windows/os_library.cpp
|
||||
os_interface/windows/os_library.h
|
||||
os_interface/windows/os_time.cpp
|
||||
os_interface/windows/os_time.h
|
||||
os_interface/windows/registry_reader.h
|
||||
os_interface/windows/thk_wrapper.h
|
||||
os_interface/windows/wddm.cpp
|
||||
os_interface/windows/wddm.h
|
||||
os_interface/windows/wddm.inl
|
||||
os_interface/windows/wddm_32bit_memory.cpp
|
||||
os_interface/windows/wddm_allocation.h
|
||||
os_interface/windows/wddm_device_command_stream.inl
|
||||
os_interface/windows/wddm_device_command_stream.h
|
||||
os_interface/windows/wddm_engine_mapper.h
|
||||
os_interface/windows/wddm_memory_manager.cpp
|
||||
os_interface/windows/wddm_memory_manager.h
|
||||
os_interface/windows/windows_inc.cpp
|
||||
os_interface/windows/windows_wrapper.h
|
||||
os_interface/windows/performance_counters_win.cpp
|
||||
os_interface/windows/performance_counters_win.h
|
||||
os_interface/windows/print.cpp
|
||||
os_interface/windows/driver_info.h
|
||||
os_interface/windows/driver_info.cpp
|
||||
)
|
||||
|
||||
if ("${IGDRCL_OPTION__BITS}" STREQUAL "32" )
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /SAFESEH:NO")
|
||||
endif ("${IGDRCL_OPTION__BITS}" STREQUAL "32")
|
||||
endif (WIN32)
|
||||
|
||||
if (UNIX)
|
||||
list (APPEND RUNTIME_SRCS_OS_INTERFACE
|
||||
os_interface/linux/api.cpp
|
||||
os_interface/linux/d3d_sharing_functions.h
|
||||
os_interface/linux/debug_env_reader.cpp
|
||||
os_interface/linux/device_command_stream.inl
|
||||
os_interface/linux/device_factory.cpp
|
||||
os_interface/linux/drm_32bit_memory.cpp
|
||||
os_interface/linux/drm_allocation.h
|
||||
os_interface/linux/drm_buffer_object.cpp
|
||||
os_interface/linux/drm_buffer_object.h
|
||||
os_interface/linux/drm_command_stream.inl
|
||||
os_interface/linux/drm_command_stream.h
|
||||
os_interface/linux/drm_engine_mapper.h
|
||||
os_interface/linux/drm_null_device.h
|
||||
os_interface/linux/drm_gem_close_worker.cpp
|
||||
os_interface/linux/drm_gem_close_worker.h
|
||||
os_interface/linux/drm_memory_manager.cpp
|
||||
os_interface/linux/drm_memory_manager.h
|
||||
os_interface/linux/drm_neo.cpp
|
||||
os_interface/linux/drm_neo.h
|
||||
os_interface/linux/drm_neo_create.cpp
|
||||
os_interface/linux/hw_info_config.cpp
|
||||
os_interface/linux/hw_info_config.h
|
||||
os_interface/linux/linux_inc.cpp
|
||||
os_interface/linux/options.cpp
|
||||
os_interface/linux/os_interface.cpp
|
||||
os_interface/linux/os_interface.h
|
||||
os_interface/linux/os_library.cpp
|
||||
os_interface/linux/os_library.h
|
||||
os_interface/linux/os_time.cpp
|
||||
os_interface/linux/os_time.h
|
||||
os_interface/linux/performance_counters_linux.cpp
|
||||
os_interface/linux/performance_counters_linux.h
|
||||
os_interface/linux/print.cpp
|
||||
os_interface/linux/driver_info.cpp
|
||||
)
|
||||
endif (UNIX)
|
||||
|
||||
add_subdirectory("sharings")
|
||||
|
||||
set (RUNTIME_SRCS
|
||||
${RUNTIME_SRCS_API}
|
||||
${RUNTIME_SRCS_ACCELERATORS}
|
||||
${RUNTIME_SRCS_AUB_MEM_DUMP}
|
||||
${RUNTIME_SRCS_BUILT_INS}
|
||||
${RUNTIME_SRCS_BUILT_IN_KERNELS}
|
||||
${RUNTIME_SRCS_COMMANDS}
|
||||
${RUNTIME_SRCS_COMMAND_QUEUE}
|
||||
${RUNTIME_SRCS_COMMAND_STREAM}
|
||||
${RUNTIME_SRCS_COMPILER_INTERFACE}
|
||||
${RUNTIME_SRCS_CONTEXT}
|
||||
${RUNTIME_SRCS_DEVICE}
|
||||
${RUNTIME_SRCS_DEVICE_QUEUE}
|
||||
${RUNTIME_SRCS_EVENT}
|
||||
${RUNTIME_SRCS_EXECUTION_MODEL}
|
||||
${RUNTIME_SRCS_GEN_COMMON}
|
||||
${RUNTIME_SRCS_GTPIN}
|
||||
${RUNTIME_SRCS_HELPERS}
|
||||
${RUNTIME_SRCS_INDIRECT_HEAP}
|
||||
${RUNTIME_SRCS_INSTRUMENTATION}
|
||||
${RUNTIME_SRCS_KERNEL}
|
||||
${RUNTIME_SRCS_MEMORY_MANAGER}
|
||||
${RUNTIME_SRCS_GMM_HELPER}
|
||||
${RUNTIME_SRCS_MEM_OBJ}
|
||||
${RUNTIME_SRCS_OS_INTERFACE}
|
||||
${RUNTIME_SRCS_PLATFORM}
|
||||
${RUNTIME_SRCS_PROGRAM}
|
||||
${RUNTIME_SRCS_SAMPLER}
|
||||
${RUNTIME_SRCS_SCHEDULER}
|
||||
${RUNTIME_SRCS_SHARINGS}
|
||||
${RUNTIME_SRCS_TBX}
|
||||
${RUNTIME_SRCS_UTILITIES}
|
||||
CMakeLists.txt
|
||||
)
|
||||
|
||||
# Enable SSE4/AVX2 options for files that need them
|
||||
if(MSVC)
|
||||
set_source_files_properties(command_queue/local_id_gen_avx2.cpp PROPERTIES COMPILE_FLAGS /arch:AVX2)
|
||||
else()
|
||||
set_source_files_properties(command_queue/local_id_gen_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
|
||||
set_source_files_properties(command_queue/local_id_gen_sse4.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
|
||||
endif (MSVC)
|
||||
|
||||
# Put Driver version into define
|
||||
if(NEO_DRIVER_VERSION)
|
||||
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/device/device_caps.cpp PROPERTIES COMPILE_DEFINITIONS NEO_DRIVER_VERSION="${NEO_DRIVER_VERSION}")
|
||||
endif(NEO_DRIVER_VERSION)
|
||||
|
||||
list (APPEND HW_SRC_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/gen_common)
|
||||
|
||||
# Include/enable each GEN
|
||||
# Reverse order so that GEN N+1 includes GEN N
|
||||
foreach(GEN_NUM RANGE ${MAX_GEN} 0 -1)
|
||||
GEN_CONTAINS_PLATFORMS("SUPPORTED" ${GEN_NUM} GENX_HAS_PLATFORMS)
|
||||
if(${GENX_HAS_PLATFORMS})
|
||||
# Add GEN-specific files
|
||||
set(RUNTIME_SRCS_GEN${GEN_NUM} ${RUNTIME_SRCS_GEN${GEN_NUM}_SPECIFIC})
|
||||
|
||||
# Add default GEN files
|
||||
foreach(SRC_IT ${RUNTIME_SRCS_GENX})
|
||||
list (APPEND RUNTIME_SRCS_GEN${GEN_NUM} gen${GEN_NUM}/${SRC_IT})
|
||||
endforeach(SRC_IT)
|
||||
|
||||
# Get all supported platforms for this GEN
|
||||
GET_PLATFORMS_FOR_GEN("SUPPORTED" ${GEN_NUM} SUPPORTED_GENX_PLATFORMS)
|
||||
|
||||
# Add platform-specific files
|
||||
foreach(PLATFORM_IT ${SUPPORTED_GENX_PLATFORMS})
|
||||
string(TOLOWER ${PLATFORM_IT} PLATFORM_IT_LOWER)
|
||||
list(APPEND RUNTIME_SRCS_GEN${GEN_NUM} gen${GEN_NUM}/hw_cmds_${PLATFORM_IT_LOWER}.h)
|
||||
list(APPEND RUNTIME_SRCS_GEN${GEN_NUM} gen${GEN_NUM}/hw_info_${PLATFORM_IT_LOWER}.cpp)
|
||||
list(APPEND RUNTIME_SRCS_GEN${GEN_NUM} ${RUNTIME_SRCS_GEN${GEN_NUM}_${PLATFORM_IT}})
|
||||
if(UNIX)
|
||||
list(APPEND RUNTIME_SRCS_GEN${GEN_NUM} gen${GEN_NUM}/linux/hw_info_config_${PLATFORM_IT_LOWER}.cpp)
|
||||
endif(UNIX)
|
||||
|
||||
# Enable platform
|
||||
list(APPEND GEN${GEN_NUM}_SRC_LINK gen${GEN_NUM}/enable_${PLATFORM_IT_LOWER}.cpp)
|
||||
if(UNIX)
|
||||
list(APPEND GEN${GEN_NUM}_SRC_LINK gen${GEN_NUM}/linux/enable_${PLATFORM_IT_LOWER}.cpp)
|
||||
endif(UNIX)
|
||||
endforeach(PLATFORM_IT)
|
||||
|
||||
list(APPEND GEN${GEN_NUM}_SRC_LINK gen${GEN_NUM}/enable_family_full.cpp)
|
||||
|
||||
# Append this GEN's sources to the list of all sources
|
||||
foreach(SRC_IT ${RUNTIME_SRCS_GEN${GEN_NUM}})
|
||||
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${SRC_IT}")
|
||||
list(APPEND RUNTIME_SRCS ${SRC_IT})
|
||||
endif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${SRC_IT}")
|
||||
endforeach(SRC_IT)
|
||||
|
||||
# Set-up gen include dir and sources for the dll
|
||||
list(APPEND HW_SRC_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/gen${GEN_NUM})
|
||||
list(APPEND HW_SRC_LINK ${GEN${GEN_NUM}_SRC_LINK})
|
||||
|
||||
source_group("gen${GEN_NUM}" FILES ${RUNTIME_SRCS_GEN${GEN_NUM}} ${GEN${GEN_NUM}_SRC_LINK} )
|
||||
endif(${GENX_HAS_PLATFORMS})
|
||||
endforeach(GEN_NUM)
|
||||
|
||||
add_library(${NEO_STATIC_LIB_NAME} STATIC $<TARGET_OBJECTS:${BIKSIM_LIB_NAME}>
|
||||
${RUNTIME_SRCS}
|
||||
)
|
||||
|
||||
target_link_libraries(${NEO_STATIC_LIB_NAME} elflib)
|
||||
|
||||
target_include_directories(${NEO_STATIC_LIB_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
${GMM_INCLUDE_PATHS}
|
||||
${UMKM_SHAREDDATA_INCLUDE_PATHS}
|
||||
)
|
||||
|
||||
target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${IGDRCL__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
|
||||
if(GTPIN_HEADERS_DIR)
|
||||
target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC
|
||||
${GTPIN_HEADERS_DIR}
|
||||
)
|
||||
endif(GTPIN_HEADERS_DIR)
|
||||
|
||||
if (WIN32)
|
||||
target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC
|
||||
${WDK_INCLUDE_PATHS}
|
||||
os_interface/windows
|
||||
)
|
||||
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC OGL=1)
|
||||
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC INSTR_WIN_UMD=1)
|
||||
|
||||
endif (WIN32)
|
||||
|
||||
if (UNIX)
|
||||
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC OGL_GEM=1)
|
||||
target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC
|
||||
os_interface/linux
|
||||
"${LIBDRM_DIR}/include"
|
||||
)
|
||||
endif (UNIX)
|
||||
|
||||
#cl_khr_priority support
|
||||
if(NOT MSVC)
|
||||
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC -DSUPPORT_PRIORITY_HINTS)
|
||||
message(STATUS "Supporting priority hints")
|
||||
endif()
|
||||
|
||||
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM})
|
||||
|
||||
link_directories(${GMM_LIB_PATHS})
|
||||
|
||||
if(NOT GMMUMD_LIB_NAME)
|
||||
set(GMMUMD_LIB_NAME "gmm_umd" CACHE STRING "name of gmm static library")
|
||||
endif()
|
||||
target_link_libraries(${NEO_STATIC_LIB_NAME} ${GMMUMD_LIB_NAME})
|
||||
|
||||
if( "${IGDRCL_OPTION__BITS}" STREQUAL "32" )
|
||||
set( DEF_FILE "${CMAKE_CURRENT_SOURCE_DIR}/dll/windows/OpenCLExports32.def" )
|
||||
else( "${IGDRCL_OPTION__BITS}" STREQUAL "32" )
|
||||
set( DEF_FILE "${CMAKE_CURRENT_SOURCE_DIR}/dll/windows/OpenCLExports64.def" )
|
||||
endif( "${IGDRCL_OPTION__BITS}" STREQUAL "32" )
|
||||
|
||||
list(APPEND LIB_FLAGS_DEFINITIONS -DCIF_HEADERS_ONLY_BUILD ${SUPPORTED_GEN_FLAGS_DEFINITONS})
|
||||
|
||||
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC ${LIB_FLAGS_DEFINITIONS})
|
||||
if(IGC_OCL_ADAPTOR_DIR) # IGC/AdaptorOCL
|
||||
target_include_directories("${NEO_STATIC_LIB_NAME}" PUBLIC "${IGC_OCL_ADAPTOR_DIR}")
|
||||
endif(IGC_OCL_ADAPTOR_DIR)
|
||||
|
||||
if(CIF_BASE_DIR)
|
||||
target_include_directories("${NEO_STATIC_LIB_NAME}" PUBLIC "${CIF_BASE_DIR}")
|
||||
endif(CIF_BASE_DIR)
|
||||
|
||||
set(IGDRCL_LIB_FLAGS_DEFINITIONS ${LIB_FLAGS_DEFINITIONS} PARENT_SCOPE)
|
||||
set_target_properties(${NEO_STATIC_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set_property(TARGET ${NEO_STATIC_LIB_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS})
|
||||
|
||||
set_target_properties(${NEO_STATIC_LIB_NAME} PROPERTIES FOLDER "opencl runtime")
|
||||
|
||||
target_include_directories(${NEO_STATIC_LIB_NAME} BEFORE PRIVATE ${HW_SRC_INCLUDES})
|
||||
|
||||
if(${GENERATE_EXECUTABLE})
|
||||
set (RUNTIME_SRCS_DLL
|
||||
dll/options.cpp
|
||||
dll/create_command_stream.cpp
|
||||
dll/create_deferred_deleter.cpp
|
||||
helpers/abort.cpp
|
||||
helpers/debug_helpers.cpp
|
||||
gmm_helper/resource_info.cpp
|
||||
program/evaluate_unhandled_token.cpp
|
||||
"${DEF_FILE}"
|
||||
)
|
||||
|
||||
list (APPEND RUNTIME_SRCS_DLL ${HW_SRC_LINK})
|
||||
|
||||
if (UNIX)
|
||||
list (APPEND RUNTIME_SRCS_DLL dll/linux/drm_neo_create.cpp)
|
||||
endif (UNIX)
|
||||
|
||||
if (WIN32)
|
||||
list (APPEND RUNTIME_SRCS_DLL os_interface/windows/wddm_create.cpp)
|
||||
list (APPEND RUNTIME_SRCS_DLL gmm_helper/page_table_mngr.cpp)
|
||||
list (APPEND RUNTIME_SRCS_DLL gmm_helper/gmm_memory.cpp)
|
||||
endif (WIN32)
|
||||
|
||||
list (APPEND RUNTIME_SRCS_DLL api/api.cpp)
|
||||
|
||||
if(GTPIN_HEADERS_DIR)
|
||||
list (APPEND RUNTIME_SRCS_DLL gtpin/gtpin_init.cpp)
|
||||
endif(GTPIN_HEADERS_DIR)
|
||||
|
||||
add_library(${NEO_DYNAMIC_LIB_NAME} SHARED
|
||||
${RUNTIME_SRCS_DLL}
|
||||
$<TARGET_OBJECTS:${SHARINGS_ENABLE_LIB_NAME}>
|
||||
$<TARGET_OBJECTS:${BUILTINS_SOURCES_LIB_NAME}>
|
||||
$<TARGET_OBJECTS:${BUILTINS_BINARIES_LIB_NAME}>
|
||||
$<TARGET_OBJECTS:${SCHEDULER_BINARY_LIB_NAME}>
|
||||
)
|
||||
|
||||
target_include_directories(${NEO_DYNAMIC_LIB_NAME} BEFORE PRIVATE
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
${HW_SRC_INCLUDES}
|
||||
)
|
||||
|
||||
target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${NEO_STATIC_LIB_NAME})
|
||||
|
||||
if (WIN32)
|
||||
target_include_directories(${NEO_DYNAMIC_LIB_NAME} PUBLIC
|
||||
${WDK_INCLUDE_PATHS}
|
||||
${GMM_INCLUDE_PATHS}
|
||||
${UMKM_SHAREDDATA_INCLUDE_PATHS}
|
||||
${INSTRUMENTATION_INCLUDE_PATH}
|
||||
)
|
||||
target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${NEO_STATIC_LIB_NAME} dxgi Ws2_32.lib)
|
||||
else(WIN32)
|
||||
target_include_directories(${NEO_DYNAMIC_LIB_NAME} PUBLIC
|
||||
${GMM_INCLUDE_PATHS}
|
||||
${UMKM_SHAREDDATA_INCLUDE_PATHS}
|
||||
${INSTRUMENTATION_INCLUDE_PATH}
|
||||
)
|
||||
endif (WIN32)
|
||||
|
||||
if (UNIX)
|
||||
target_link_libraries(${NEO_DYNAMIC_LIB_NAME} dl pthread)
|
||||
set_property(TARGET ${NEO_DYNAMIC_LIB_NAME}
|
||||
APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/dll/linux/ocl.exports"
|
||||
)
|
||||
endif (UNIX)
|
||||
|
||||
set_target_properties(${NEO_DYNAMIC_LIB_NAME} PROPERTIES
|
||||
DEBUG_OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}"
|
||||
RELEASE_OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}"
|
||||
RELEASE-INTERNAL_OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}"
|
||||
OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}"
|
||||
)
|
||||
|
||||
set_property(TARGET ${NEO_DYNAMIC_LIB_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS})
|
||||
set_target_properties(${NEO_DYNAMIC_LIB_NAME} PROPERTIES FOLDER "opencl runtime")
|
||||
endif(${GENERATE_EXECUTABLE})
|
||||
|
||||
if (WIN32)
|
||||
source_group("source files" FILES ${IGDRCL_SRCS_DLL})
|
||||
source_group("source files\\api" FILES ${RUNTIME_SRCS_API})
|
||||
source_group("source files\\accelerators" FILES ${RUNTIME_SRCS_ACCELERATORS})
|
||||
source_group("source files\\aub_mem_dump" FILES ${RUNTIME_SRCS_AUB_MEM_DUMP})
|
||||
source_group("source files\\built_ins" FILES ${RUNTIME_SRCS_BUILT_INS})
|
||||
source_group("source files\\built_ins\\kernels" FILES ${RUNTIME_SRCS_BUILT_IN_KERNELS})
|
||||
source_group("source files\\commands" FILES ${RUNTIME_SRCS_COMMANDS})
|
||||
source_group("source files\\command_queue" FILES ${RUNTIME_SRCS_COMMAND_QUEUE})
|
||||
source_group("source files\\command_stream" FILES ${RUNTIME_SRCS_COMMAND_STREAM})
|
||||
source_group("source files\\compiler_interface" FILES ${RUNTIME_SRCS_COMPILER_INTERFACE})
|
||||
source_group("source files\\context" FILES ${RUNTIME_SRCS_CONTEXT})
|
||||
source_group("source files\\device" FILES ${RUNTIME_SRCS_DEVICE})
|
||||
source_group("source files\\device_queue" FILES ${RUNTIME_SRCS_DEVICE_QUEUE})
|
||||
source_group("source files\\event" FILES ${RUNTIME_SRCS_EVENT})
|
||||
source_group("source files\\execution_model" FILES ${RUNTIME_SRCS_EXECUTION_MODEL})
|
||||
source_group("source files\\gen_common" FILES ${RUNTIME_SRCS_GEN_COMMON})
|
||||
source_group("source files\\helpers" FILES ${RUNTIME_SRCS_HELPERS})
|
||||
source_group("source files\\indirect_heap" FILES ${RUNTIME_SRCS_INDIRECT_HEAP})
|
||||
source_group("source files\\instrumentation" FILES ${RUNTIME_SRCS_INSTRUMENTATION})
|
||||
source_group("source files\\kernel" FILES ${RUNTIME_SRCS_KERNEL})
|
||||
source_group("source files\\memory_manager" FILES ${RUNTIME_SRCS_MEMORY_MANAGER})
|
||||
source_group("source files\\gmm_helper" FILES ${RUNTIME_SRCS_GMM_HELPER})
|
||||
if(GTPIN_HEADERS_DIR)
|
||||
source_group("source files\\gtpin" FILES ${RUNTIME_SRCS_GTPIN})
|
||||
endif(GTPIN_HEADERS_DIR)
|
||||
source_group("source files\\mem_obj" FILES ${RUNTIME_SRCS_MEM_OBJ})
|
||||
source_group("source files\\os_interface" FILES ${RUNTIME_SRCS_OS_INTERFACE})
|
||||
source_group("source files\\platform" FILES ${RUNTIME_SRCS_PLATFORM})
|
||||
source_group("source files\\program" FILES ${RUNTIME_SRCS_PROGRAM})
|
||||
source_group("source files\\sampler" FILES ${RUNTIME_SRCS_SAMPLER})
|
||||
source_group("source files\\scheduler" FILES ${RUNTIME_SRCS_SCHEDULER})
|
||||
source_group("source files\\sharings" FILES ${RUNTIME_SRCS_SHARINGS})
|
||||
source_group("source files\\tbx" FILES ${RUNTIME_SRCS_TBX})
|
||||
source_group("source files\\utilities" FILES ${RUNTIME_SRCS_UTILITIES})
|
||||
endif (WIN32)
|
||||
|
||||
if (UNIX)
|
||||
if(NOT (TARGET clang-tidy))
|
||||
add_custom_target(clang-tidy
|
||||
DEPENDS scheduler
|
||||
)
|
||||
|
||||
add_custom_command(
|
||||
TARGET clang-tidy
|
||||
POST_BUILD
|
||||
COMMAND echo clang-tidy...
|
||||
COMMAND find ${CMAKE_CURRENT_SOURCE_DIR} -name *.cpp -print0 | xargs -0 -I{} -P`nproc` clang-tidy -p ${IGDRCL_BINARY_DIR} {} | tee ${IGDRCL_BINARY_DIR}/clang-tidy.log
|
||||
WORKING_DIRECTORY ${IGDRCL_SOURCE_DIR}
|
||||
)
|
||||
endif(NOT (TARGET clang-tidy))
|
||||
endif(UNIX)
|
||||
|
||||
31
runtime/accelerators/CMakeLists.txt
Normal file
31
runtime/accelerators/CMakeLists.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# We require cmake 3.2.0 or later
|
||||
cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR)
|
||||
|
||||
set (RUNTIME_SRCS_ACCELERATORS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/intel_accelerator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/intel_accelerator.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/intel_motion_estimation.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/intel_motion_estimation.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vebox_accelerator.h
|
||||
PARENT_SCOPE
|
||||
)
|
||||
81
runtime/accelerators/intel_accelerator.cpp
Normal file
81
runtime/accelerators/intel_accelerator.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/accelerators/intel_accelerator.h"
|
||||
#include "runtime/context/context.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/helpers/get_info.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
cl_int IntelAccelerator::getInfo(cl_accelerator_info_intel paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet) const {
|
||||
cl_int result = CL_SUCCESS;
|
||||
size_t ret = 0;
|
||||
|
||||
switch (paramName) {
|
||||
case CL_ACCELERATOR_DESCRIPTOR_INTEL: {
|
||||
ret = getDescriptorSize();
|
||||
result = ::getInfo(paramValue, paramValueSize, getDescriptor(), ret);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case CL_ACCELERATOR_REFERENCE_COUNT_INTEL: {
|
||||
auto v = getReference();
|
||||
|
||||
ret = sizeof(cl_uint);
|
||||
result = ::getInfo(paramValue, paramValueSize, &v, ret);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case CL_ACCELERATOR_CONTEXT_INTEL: {
|
||||
ret = sizeof(cl_context);
|
||||
cl_context ctx = static_cast<cl_context>(pContext);
|
||||
result = ::getInfo(paramValue, paramValueSize, &ctx, ret);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case CL_ACCELERATOR_TYPE_INTEL: {
|
||||
auto v = getTypeId();
|
||||
ret = sizeof(cl_accelerator_type_intel);
|
||||
result = ::getInfo(paramValue, paramValueSize, &v, ret);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
result = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (paramValueSizeRet) {
|
||||
*paramValueSizeRet = ret;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
79
runtime/accelerators/intel_accelerator.h
Normal file
79
runtime/accelerators/intel_accelerator.h
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/api/cl_types.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// cl_intel_accelerator Class Stuff
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
class Context;
|
||||
|
||||
typedef struct TagAcceleratorObjParams {
|
||||
cl_uint AcceleratorType;
|
||||
cl_uint AcceleratorFlags;
|
||||
} OCLRT_ACCELERATOR_OBJECT_PARAMS, *POCLRT_ACCELERATOR_OBJECT_PARAMS;
|
||||
|
||||
template <>
|
||||
struct OpenCLObjectMapper<_cl_accelerator_intel> {
|
||||
typedef class IntelAccelerator DerivedType;
|
||||
};
|
||||
|
||||
class IntelAccelerator : public BaseObject<_cl_accelerator_intel> {
|
||||
public:
|
||||
IntelAccelerator(Context *context,
|
||||
cl_accelerator_type_intel typeId,
|
||||
size_t descriptorSize,
|
||||
const void *descriptor) : pContext(context),
|
||||
typeId(typeId),
|
||||
descriptorSize(descriptorSize),
|
||||
pDescriptor(descriptor) {}
|
||||
|
||||
IntelAccelerator() {}
|
||||
|
||||
static const cl_ulong objectMagic = 0xC6D72FA2E81EA569ULL;
|
||||
|
||||
cl_accelerator_type_intel getTypeId() const { return typeId; }
|
||||
|
||||
size_t getDescriptorSize() const { return descriptorSize; }
|
||||
|
||||
const void *getDescriptor() const { return pDescriptor; }
|
||||
|
||||
cl_int getInfo(cl_accelerator_info_intel paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet) const;
|
||||
|
||||
protected:
|
||||
Context *pContext = nullptr;
|
||||
const cl_accelerator_type_intel typeId = -1;
|
||||
const size_t descriptorSize = 0;
|
||||
const void *pDescriptor = nullptr;
|
||||
|
||||
private:
|
||||
};
|
||||
} // namespace OCLRT
|
||||
79
runtime/accelerators/intel_motion_estimation.cpp
Normal file
79
runtime/accelerators/intel_motion_estimation.cpp
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/accelerators/intel_motion_estimation.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
cl_int VmeAccelerator::validateVmeArgs(Context *context,
|
||||
cl_accelerator_type_intel typeId,
|
||||
size_t descriptorSize,
|
||||
const void *descriptor) {
|
||||
const cl_motion_estimation_desc_intel *descObj =
|
||||
(const cl_motion_estimation_desc_intel *)descriptor;
|
||||
|
||||
DEBUG_BREAK_IF(!context);
|
||||
DEBUG_BREAK_IF(typeId != CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL);
|
||||
|
||||
if ((descriptorSize != sizeof(cl_motion_estimation_desc_intel)) ||
|
||||
(descriptor == NULL)) {
|
||||
return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL;
|
||||
}
|
||||
|
||||
switch (descObj->mb_block_type) {
|
||||
case CL_ME_MB_TYPE_16x16_INTEL:
|
||||
case CL_ME_MB_TYPE_8x8_INTEL:
|
||||
case CL_ME_MB_TYPE_4x4_INTEL:
|
||||
break;
|
||||
default:
|
||||
return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL;
|
||||
}
|
||||
|
||||
switch (descObj->subpixel_mode) {
|
||||
case CL_ME_SUBPIXEL_MODE_INTEGER_INTEL:
|
||||
case CL_ME_SUBPIXEL_MODE_HPEL_INTEL:
|
||||
case CL_ME_SUBPIXEL_MODE_QPEL_INTEL:
|
||||
break;
|
||||
default:
|
||||
return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL;
|
||||
}
|
||||
|
||||
switch (descObj->sad_adjust_mode) {
|
||||
case CL_ME_SAD_ADJUST_MODE_NONE_INTEL:
|
||||
case CL_ME_SAD_ADJUST_MODE_HAAR_INTEL:
|
||||
break;
|
||||
default:
|
||||
return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL;
|
||||
}
|
||||
|
||||
switch (descObj->search_path_type) {
|
||||
case CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL:
|
||||
case CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL:
|
||||
case CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL:
|
||||
break;
|
||||
default:
|
||||
return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL;
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
72
runtime/accelerators/intel_motion_estimation.h
Normal file
72
runtime/accelerators/intel_motion_estimation.h
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/accelerators/intel_accelerator.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// VmeAccelerator Class Stuff
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
class Context;
|
||||
|
||||
class VmeAccelerator : public IntelAccelerator {
|
||||
public:
|
||||
static VmeAccelerator *create(Context *context,
|
||||
cl_accelerator_type_intel typeId,
|
||||
size_t descriptorSize,
|
||||
const void *descriptor,
|
||||
cl_int &result) {
|
||||
|
||||
result = validateVmeArgs(context, typeId, descriptorSize, descriptor);
|
||||
VmeAccelerator *acc = nullptr;
|
||||
|
||||
if (result == CL_SUCCESS) {
|
||||
acc = new VmeAccelerator(
|
||||
context,
|
||||
typeId,
|
||||
descriptorSize,
|
||||
descriptor);
|
||||
}
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
protected:
|
||||
private:
|
||||
VmeAccelerator(Context *context,
|
||||
cl_accelerator_type_intel typeId,
|
||||
size_t descriptorSize,
|
||||
const void *descriptor) : IntelAccelerator(context,
|
||||
typeId,
|
||||
descriptorSize,
|
||||
descriptor) {
|
||||
}
|
||||
static cl_int validateVmeArgs(Context *context,
|
||||
cl_accelerator_type_intel typeId,
|
||||
size_t descriptorSize,
|
||||
const void *descriptor);
|
||||
};
|
||||
}
|
||||
66
runtime/accelerators/vebox_accelerator.h
Normal file
66
runtime/accelerators/vebox_accelerator.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/accelerators/intel_accelerator.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// VeboxAccelerator Class Stuff
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
class Context;
|
||||
|
||||
class VeboxAccelerator : public IntelAccelerator {
|
||||
public:
|
||||
static VeboxAccelerator *create(Context *context,
|
||||
cl_accelerator_type_intel typeId,
|
||||
size_t descriptorSize,
|
||||
const void *descriptor,
|
||||
cl_int &result) {
|
||||
DEBUG_BREAK_IF(!context);
|
||||
DEBUG_BREAK_IF(!descriptor);
|
||||
VeboxAccelerator *acc = new VeboxAccelerator(
|
||||
context,
|
||||
typeId,
|
||||
descriptorSize,
|
||||
descriptor);
|
||||
|
||||
result = CL_SUCCESS;
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
protected:
|
||||
private:
|
||||
VeboxAccelerator(Context *context,
|
||||
cl_accelerator_type_intel typeId,
|
||||
size_t descriptorSize,
|
||||
const void *descriptor) : IntelAccelerator(context,
|
||||
typeId,
|
||||
descriptorSize,
|
||||
descriptor) {
|
||||
}
|
||||
};
|
||||
}
|
||||
31
runtime/api/CMakeLists.txt
Normal file
31
runtime/api/CMakeLists.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# We require cmake 3.2.0 or later
|
||||
cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR)
|
||||
|
||||
set (RUNTIME_SRCS_API
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/api.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/api.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cl_types.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dispatch.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dispatch.h
|
||||
PARENT_SCOPE
|
||||
)
|
||||
3760
runtime/api/api.cpp
Normal file
3760
runtime/api/api.cpp
Normal file
File diff suppressed because it is too large
Load Diff
887
runtime/api/api.h
Normal file
887
runtime/api/api.h
Normal file
@@ -0,0 +1,887 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "CL/cl_gl.h"
|
||||
#include "runtime/api/dispatch.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
cl_int CL_API_CALL clGetPlatformIDs(
|
||||
cl_uint numEntries,
|
||||
cl_platform_id *platforms,
|
||||
cl_uint *numPlatforms);
|
||||
|
||||
cl_int CL_API_CALL clGetPlatformInfo(
|
||||
cl_platform_id platform,
|
||||
cl_platform_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clGetDeviceIDs(
|
||||
cl_platform_id platform,
|
||||
cl_device_type deviceType,
|
||||
cl_uint numEntries,
|
||||
cl_device_id *devices,
|
||||
cl_uint *numDevices);
|
||||
|
||||
cl_int CL_API_CALL clGetDeviceInfo(
|
||||
cl_device_id device,
|
||||
cl_device_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clCreateSubDevices(
|
||||
cl_device_id inDevice,
|
||||
const cl_device_partition_property *properties,
|
||||
cl_uint numDevices,
|
||||
cl_device_id *outDevices,
|
||||
cl_uint *numDevicesRet);
|
||||
|
||||
cl_int CL_API_CALL clRetainDevice(
|
||||
cl_device_id device);
|
||||
|
||||
cl_int CL_API_CALL clReleaseDevice(
|
||||
cl_device_id device);
|
||||
|
||||
cl_context CL_API_CALL clCreateContext(
|
||||
const cl_context_properties *properties,
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *devices,
|
||||
void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *),
|
||||
void *userData,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_context CL_API_CALL clCreateContextFromType(
|
||||
const cl_context_properties *properties,
|
||||
cl_device_type deviceType,
|
||||
void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *),
|
||||
void *userData,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clRetainContext(
|
||||
cl_context context);
|
||||
|
||||
cl_int CL_API_CALL clReleaseContext(
|
||||
cl_context context);
|
||||
|
||||
cl_int CL_API_CALL clGetContextInfo(
|
||||
cl_context context,
|
||||
cl_context_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clGetGLContextInfoKHR(
|
||||
const cl_context_properties *properties,
|
||||
cl_gl_context_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_command_queue CL_API_CALL clCreateCommandQueue(
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
cl_command_queue_properties properties,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clRetainCommandQueue(
|
||||
cl_command_queue commandQueue);
|
||||
|
||||
cl_int CL_API_CALL clReleaseCommandQueue(
|
||||
cl_command_queue commandQueue);
|
||||
|
||||
cl_int CL_API_CALL clGetCommandQueueInfo(
|
||||
cl_command_queue commandQueue,
|
||||
cl_command_queue_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
// deprecated OpenCL 1.0
|
||||
cl_int CL_API_CALL clSetCommandQueueProperty(
|
||||
cl_command_queue commandQueue,
|
||||
cl_command_queue_properties properties,
|
||||
cl_bool enable,
|
||||
cl_command_queue_properties *oldProperties);
|
||||
|
||||
cl_mem CL_API_CALL clCreateBuffer(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_mem CL_API_CALL clCreateSubBuffer(
|
||||
cl_mem buffer,
|
||||
cl_mem_flags flags,
|
||||
cl_buffer_create_type bufferCreateType,
|
||||
const void *bufferCreateInfo,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_mem CL_API_CALL clCreateImage(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *imageFormat,
|
||||
const cl_image_desc *imageDesc,
|
||||
void *hostPtr,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
// deprecated OpenCL 1.1
|
||||
cl_mem CL_API_CALL clCreateImage2D(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *imageFormat,
|
||||
size_t imageWidth,
|
||||
size_t imageHeight,
|
||||
size_t imageRowPitch,
|
||||
void *hostPtr,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
// deprecated OpenCL 1.1
|
||||
cl_mem CL_API_CALL clCreateImage3D(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *imageFormat,
|
||||
size_t imageWidth,
|
||||
size_t imageHeight,
|
||||
size_t imageDepth,
|
||||
size_t imageRowPitch,
|
||||
size_t imageSlicePitch,
|
||||
void *hostPtr,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clRetainMemObject(
|
||||
cl_mem memobj);
|
||||
|
||||
cl_int CL_API_CALL clReleaseMemObject(
|
||||
cl_mem memobj);
|
||||
|
||||
cl_int CL_API_CALL clGetSupportedImageFormats(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type imageType,
|
||||
cl_uint numEntries,
|
||||
cl_image_format *imageFormats,
|
||||
cl_uint *numImageFormats);
|
||||
|
||||
cl_int CL_API_CALL clGetMemObjectInfo(
|
||||
cl_mem memobj,
|
||||
cl_mem_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clGetImageInfo(
|
||||
cl_mem image,
|
||||
cl_image_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clGetImageParamsINTEL(
|
||||
cl_context context,
|
||||
const cl_image_format *imageFormat,
|
||||
const cl_image_desc *imageDesc,
|
||||
size_t *imageRowPitch,
|
||||
size_t *imageSlicePitch);
|
||||
|
||||
cl_int CL_API_CALL clSetMemObjectDestructorCallback(
|
||||
cl_mem memobj,
|
||||
void(CL_CALLBACK *funcNotify)(cl_mem, void *),
|
||||
void *userData);
|
||||
|
||||
cl_sampler CL_API_CALL clCreateSampler(
|
||||
cl_context context,
|
||||
cl_bool normalizedCoords,
|
||||
cl_addressing_mode addressingMode,
|
||||
cl_filter_mode filterMode,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clRetainSampler(
|
||||
cl_sampler sampler);
|
||||
|
||||
cl_int CL_API_CALL clReleaseSampler(
|
||||
cl_sampler sampler);
|
||||
|
||||
cl_int CL_API_CALL clGetSamplerInfo(
|
||||
cl_sampler sampler,
|
||||
cl_sampler_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_program CL_API_CALL clCreateProgramWithSource(
|
||||
cl_context context,
|
||||
cl_uint count,
|
||||
const char **strings,
|
||||
const size_t *lengths,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_program CL_API_CALL clCreateProgramWithBinary(
|
||||
cl_context context,
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *deviceList,
|
||||
const size_t *lengths,
|
||||
const unsigned char **binaries,
|
||||
cl_int *binaryStatus,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_program CL_API_CALL clCreateProgramWithBuiltInKernels(
|
||||
cl_context context,
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *deviceList,
|
||||
const char *kernelNames,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clRetainProgram(
|
||||
cl_program program);
|
||||
|
||||
cl_int CL_API_CALL clReleaseProgram(
|
||||
cl_program program);
|
||||
|
||||
cl_int CL_API_CALL clBuildProgram(
|
||||
cl_program program,
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *deviceList,
|
||||
const char *options,
|
||||
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
|
||||
void *userData);
|
||||
|
||||
cl_int CL_API_CALL clCompileProgram(
|
||||
cl_program program,
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *deviceList,
|
||||
const char *options,
|
||||
cl_uint numInputHeaders,
|
||||
const cl_program *inputHeaders,
|
||||
const char **headerIncludeNames,
|
||||
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
|
||||
void *userData);
|
||||
|
||||
cl_program CL_API_CALL clLinkProgram(
|
||||
cl_context context,
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *deviceList,
|
||||
const char *options,
|
||||
cl_uint numInputPrograms,
|
||||
const cl_program *inputPrograms,
|
||||
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
|
||||
void *userData,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clUnloadPlatformCompiler(
|
||||
cl_platform_id platform);
|
||||
|
||||
// deprecated OpenCL 1.1
|
||||
cl_int CL_API_CALL clUnloadCompiler(void);
|
||||
|
||||
cl_int CL_API_CALL clGetProgramInfo(
|
||||
cl_program program,
|
||||
cl_program_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clGetProgramBuildInfo(
|
||||
cl_program program,
|
||||
cl_device_id device,
|
||||
cl_program_build_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_kernel CL_API_CALL clCreateKernel(
|
||||
cl_program program,
|
||||
const char *kernelName,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clCreateKernelsInProgram(
|
||||
cl_program program,
|
||||
cl_uint numKernels,
|
||||
cl_kernel *kernels,
|
||||
cl_uint *numKernelsRet);
|
||||
|
||||
cl_int CL_API_CALL clRetainKernel(
|
||||
cl_kernel kernel);
|
||||
|
||||
cl_int CL_API_CALL clReleaseKernel(
|
||||
cl_kernel kernel);
|
||||
|
||||
cl_int CL_API_CALL clSetKernelArg(
|
||||
cl_kernel kernel,
|
||||
cl_uint argIndex,
|
||||
size_t argSize,
|
||||
const void *argValue);
|
||||
|
||||
cl_int CL_API_CALL clGetKernelInfo(
|
||||
cl_kernel kernel,
|
||||
cl_kernel_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clGetKernelArgInfo(
|
||||
cl_kernel kernel,
|
||||
cl_uint argIndx,
|
||||
cl_kernel_arg_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clGetKernelWorkGroupInfo(
|
||||
cl_kernel kernel,
|
||||
cl_device_id device,
|
||||
cl_kernel_work_group_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clWaitForEvents(
|
||||
cl_uint numEvents,
|
||||
const cl_event *eventList);
|
||||
|
||||
cl_int CL_API_CALL clGetEventInfo(
|
||||
cl_event event,
|
||||
cl_event_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_event CL_API_CALL clCreateUserEvent(
|
||||
cl_context context,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clRetainEvent(
|
||||
cl_event event);
|
||||
|
||||
cl_int CL_API_CALL clReleaseEvent(
|
||||
cl_event event);
|
||||
|
||||
cl_int CL_API_CALL clSetUserEventStatus(
|
||||
cl_event event,
|
||||
cl_int executionStatus);
|
||||
|
||||
cl_int CL_API_CALL clSetEventCallback(
|
||||
cl_event event,
|
||||
cl_int commandExecCallbackType,
|
||||
void(CL_CALLBACK *funcNotify)(cl_event, cl_int, void *),
|
||||
void *userData);
|
||||
|
||||
cl_int CL_API_CALL clGetEventProfilingInfo(
|
||||
cl_event event,
|
||||
cl_profiling_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clFlush(
|
||||
cl_command_queue commandQueue);
|
||||
|
||||
cl_int CL_API_CALL clFinish(
|
||||
cl_command_queue commandQueue);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueReadBuffer(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem buffer,
|
||||
cl_bool blockingRead,
|
||||
size_t offset,
|
||||
size_t cb,
|
||||
void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueReadBufferRect(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem buffer,
|
||||
cl_bool blockingRead,
|
||||
const size_t *bufferOrigin,
|
||||
const size_t *hostOrigin,
|
||||
const size_t *region,
|
||||
size_t bufferRowPitch,
|
||||
size_t bufferSlicePitch,
|
||||
size_t hostRowPitch,
|
||||
size_t hostSlicePitch,
|
||||
void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueWriteBuffer(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem buffer,
|
||||
cl_bool blockingWrite,
|
||||
size_t offset,
|
||||
size_t cb,
|
||||
const void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueWriteBufferRect(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem buffer,
|
||||
cl_bool blockingWrite,
|
||||
const size_t *bufferOrigin,
|
||||
const size_t *hostOrigin,
|
||||
const size_t *region,
|
||||
size_t bufferRowPitch,
|
||||
size_t bufferSlicePitch,
|
||||
size_t hostRowPitch,
|
||||
size_t hostSlicePitch,
|
||||
const void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueFillBuffer(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem buffer,
|
||||
const void *pattern,
|
||||
size_t patternSize,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueCopyBuffer(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem srcBuffer,
|
||||
cl_mem dstBuffer,
|
||||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
size_t cb,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueCopyBufferRect(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem srcBuffer,
|
||||
cl_mem dstBuffer,
|
||||
const size_t *srcOrigin,
|
||||
const size_t *dstOrigin,
|
||||
const size_t *region,
|
||||
size_t srcRowPitch,
|
||||
size_t srcSlicePitch,
|
||||
size_t dstRowPitch,
|
||||
size_t dstSlicePitch,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueReadImage(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem image,
|
||||
cl_bool blockingRead,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
size_t rowPitch,
|
||||
size_t slicePitch,
|
||||
void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueWriteImage(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem image,
|
||||
cl_bool blockingWrite,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
size_t inputRowPitch,
|
||||
size_t inputSlicePitch,
|
||||
const void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueFillImage(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem image,
|
||||
const void *fillColor,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueCopyImage(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem srcImage,
|
||||
cl_mem dstImage,
|
||||
const size_t *srcOrigin,
|
||||
const size_t *dstOrigin,
|
||||
const size_t *region,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueCopyImageToBuffer(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem srcImage,
|
||||
cl_mem dstBuffer,
|
||||
const size_t *srcOrigin,
|
||||
const size_t *region,
|
||||
size_t dstOffset,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueCopyBufferToImage(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem srcBuffer,
|
||||
cl_mem dstImage,
|
||||
size_t srcOffset,
|
||||
const size_t *dstOrigin,
|
||||
const size_t *region,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
void *CL_API_CALL clEnqueueMapBuffer(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem buffer,
|
||||
cl_bool blockingMap,
|
||||
cl_map_flags mapFlags,
|
||||
size_t offset,
|
||||
size_t cb,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
void *CL_API_CALL clEnqueueMapImage(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem image,
|
||||
cl_bool blockingMap,
|
||||
cl_map_flags mapFlags,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
size_t *imageRowPitch,
|
||||
size_t *imageSlicePitch,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueUnmapMemObject(
|
||||
cl_command_queue commandQueue,
|
||||
cl_mem memobj,
|
||||
void *mappedPtr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueMigrateMemObjects(
|
||||
cl_command_queue commandQueue,
|
||||
cl_uint numMemObjects,
|
||||
const cl_mem *memObjects,
|
||||
cl_mem_migration_flags flags,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueNDRangeKernel(
|
||||
cl_command_queue commandQueue,
|
||||
cl_kernel kernel,
|
||||
cl_uint workDim,
|
||||
const size_t *globalWorkOffset,
|
||||
const size_t *globalWorkSize,
|
||||
const size_t *localWorkSize,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueTask(
|
||||
cl_command_queue commandQueue,
|
||||
cl_kernel kernel,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueNativeKernel(
|
||||
cl_command_queue commandQueue,
|
||||
void(CL_CALLBACK *userFunc)(void *),
|
||||
void *args,
|
||||
size_t cbArgs,
|
||||
cl_uint numMemObjects,
|
||||
const cl_mem *memList,
|
||||
const void **argsMemLoc,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
// deprecated OpenCL 1.1
|
||||
cl_int CL_API_CALL clEnqueueMarker(
|
||||
cl_command_queue commandQueue,
|
||||
cl_event *event);
|
||||
|
||||
// deprecated OpenCL 1.1
|
||||
cl_int CL_API_CALL clEnqueueWaitForEvents(
|
||||
cl_command_queue commandQueue,
|
||||
cl_uint numEvents,
|
||||
const cl_event *eventList);
|
||||
|
||||
// deprecated OpenCL 1.1
|
||||
cl_int CL_API_CALL clEnqueueBarrier(
|
||||
cl_command_queue commandQueue);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueMarkerWithWaitList(
|
||||
cl_command_queue commandQueue,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueBarrierWithWaitList(
|
||||
cl_command_queue commandQueue,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
// deprecated OpenCL 1.1
|
||||
void *CL_API_CALL clGetExtensionFunctionAddress(
|
||||
const char *funcName);
|
||||
|
||||
void *CL_API_CALL clGetExtensionFunctionAddressForPlatform(
|
||||
cl_platform_id platform,
|
||||
const char *funcName);
|
||||
|
||||
// CL-GL Sharing
|
||||
|
||||
cl_mem CL_API_CALL clCreateFromGLBuffer(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint bufobj,
|
||||
int *errcodeRet);
|
||||
|
||||
// OpenCL 1.2
|
||||
cl_mem CL_API_CALL clCreateFromGLTexture(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
// deprecated OpenCL 1.1
|
||||
cl_mem CL_API_CALL clCreateFromGLTexture2D(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
// deprecated OpenCL 1.1
|
||||
cl_mem CL_API_CALL clCreateFromGLTexture3D(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_mem CL_API_CALL clCreateFromGLRenderbuffer(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint renderbuffer,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clGetGLObjectInfo(
|
||||
cl_mem memobj,
|
||||
cl_gl_object_type *glObjectType,
|
||||
cl_GLuint *glObjectName);
|
||||
|
||||
cl_int CL_API_CALL clGetGLTextureInfo(
|
||||
cl_mem memobj,
|
||||
cl_gl_texture_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueAcquireGLObjects(
|
||||
cl_command_queue commandQueue,
|
||||
cl_uint numObjects,
|
||||
const cl_mem *memObjects,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueReleaseGLObjects(
|
||||
cl_command_queue commandQueue,
|
||||
cl_uint numObjects,
|
||||
const cl_mem *memObjects,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
// OpenCL 2.0
|
||||
|
||||
void *CL_API_CALL clSVMAlloc(
|
||||
cl_context context,
|
||||
cl_svm_mem_flags flags,
|
||||
size_t size,
|
||||
cl_uint alignment);
|
||||
|
||||
void CL_API_CALL clSVMFree(
|
||||
cl_context context,
|
||||
void *svmPointer);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueSVMFree(
|
||||
cl_command_queue commandQueue,
|
||||
cl_uint numSvmPointers,
|
||||
void *svmPointers[],
|
||||
void(CL_CALLBACK *pfnFreeFunc)(
|
||||
cl_command_queue queue,
|
||||
cl_uint numSvmPointers,
|
||||
void *svmPointers[],
|
||||
void *userData),
|
||||
void *userData,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueSVMMemcpy(
|
||||
cl_command_queue commandQueue,
|
||||
cl_bool blockingCopy,
|
||||
void *dstPtr,
|
||||
const void *srcPtr,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueSVMMemFill(
|
||||
cl_command_queue commandQueue,
|
||||
void *svmPtr,
|
||||
const void *pattern,
|
||||
size_t patternSize,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueSVMMap(
|
||||
cl_command_queue commandQueue,
|
||||
cl_bool blockingMap,
|
||||
cl_map_flags mapFlags,
|
||||
void *svmPtr,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clEnqueueSVMUnmap(
|
||||
cl_command_queue commandQueue,
|
||||
void *svmPtr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
cl_int CL_API_CALL clSetKernelArgSVMPointer(
|
||||
cl_kernel kernel,
|
||||
cl_uint argIndex,
|
||||
const void *argValue);
|
||||
|
||||
cl_int CL_API_CALL clSetKernelExecInfo(
|
||||
cl_kernel kernel,
|
||||
cl_kernel_exec_info paramName,
|
||||
size_t paramValueSize,
|
||||
const void *paramValue);
|
||||
|
||||
cl_mem CL_API_CALL clCreatePipe(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_uint pipePacketSize,
|
||||
cl_uint pipeMaxPackets,
|
||||
const cl_pipe_properties *properties,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_int CL_API_CALL clGetPipeInfo(
|
||||
cl_mem pipe,
|
||||
cl_pipe_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_queue_properties *properties,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_command_queue CL_API_CALL clCreateCommandQueueWithPropertiesINTEL(
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_queue_properties_intel *properties,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
cl_sampler CL_API_CALL clCreateSamplerWithProperties(
|
||||
cl_context context,
|
||||
const cl_sampler_properties *samplerProperties,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
// OpenCL 2.1
|
||||
|
||||
cl_int CL_API_CALL clGetDeviceAndHostTimer(cl_device_id device,
|
||||
cl_ulong *deviceTimestamp,
|
||||
cl_ulong *hostTimestamp);
|
||||
|
||||
cl_int CL_API_CALL clGetHostTimer(cl_device_id device,
|
||||
cl_ulong *hostTimestamp);
|
||||
|
||||
extern CL_API_ENTRY cl_command_queue CL_API_CALL
|
||||
clCreatePerfCountersCommandQueueINTEL(
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
cl_command_queue_properties properties,
|
||||
cl_uint configuration,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetPerformanceConfigurationINTEL(
|
||||
cl_device_id device,
|
||||
cl_uint count,
|
||||
cl_uint *offsets,
|
||||
cl_uint *values);
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromGLsyncKHR(
|
||||
cl_context context,
|
||||
cl_GLsync sync,
|
||||
cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithILKHR(
|
||||
cl_context context,
|
||||
const void *il,
|
||||
size_t length,
|
||||
cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2;
|
||||
}
|
||||
75
runtime/api/cl_types.h
Normal file
75
runtime/api/cl_types.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "config.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "runtime/api/dispatch.h"
|
||||
#include <cstdint>
|
||||
|
||||
struct ClDispatch {
|
||||
SEntryPointsTable dispatch;
|
||||
ClDispatch() : dispatch(globalDispatchTable) {
|
||||
}
|
||||
};
|
||||
|
||||
struct _cl_accelerator_intel : public ClDispatch {
|
||||
};
|
||||
|
||||
struct _cl_command_queue : public ClDispatch {
|
||||
};
|
||||
|
||||
// device_queue is a type used internally
|
||||
struct _device_queue : public _cl_command_queue {
|
||||
};
|
||||
typedef _device_queue *device_queue;
|
||||
|
||||
struct _cl_context : public ClDispatch {
|
||||
bool isSharedContext = false;
|
||||
};
|
||||
|
||||
struct _cl_device_id : public ClDispatch {
|
||||
};
|
||||
|
||||
struct _cl_event : public ClDispatch {
|
||||
};
|
||||
|
||||
struct _cl_kernel : public ClDispatch {
|
||||
};
|
||||
|
||||
struct _cl_mem : public ClDispatch {
|
||||
};
|
||||
|
||||
struct _cl_platform_id : public ClDispatch {
|
||||
};
|
||||
|
||||
struct _cl_program : public ClDispatch {
|
||||
};
|
||||
|
||||
struct _cl_sampler : public ClDispatch {
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
inline bool isValidObject(Type object) {
|
||||
return object && object->dispatch.icdDispatch == &icdGlobalDispatchTable;
|
||||
}
|
||||
226
runtime/api/dispatch.cpp
Normal file
226
runtime/api/dispatch.cpp
Normal file
@@ -0,0 +1,226 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "dispatch.h"
|
||||
#include "api.h"
|
||||
|
||||
SDispatchTable icdGlobalDispatchTable =
|
||||
{
|
||||
clGetPlatformIDs,
|
||||
clGetPlatformInfo,
|
||||
clGetDeviceIDs,
|
||||
clGetDeviceInfo,
|
||||
clCreateContext,
|
||||
clCreateContextFromType,
|
||||
clRetainContext,
|
||||
clReleaseContext,
|
||||
clGetContextInfo,
|
||||
clCreateCommandQueue,
|
||||
clRetainCommandQueue,
|
||||
clReleaseCommandQueue,
|
||||
clGetCommandQueueInfo,
|
||||
clSetCommandQueueProperty,
|
||||
clCreateBuffer,
|
||||
clCreateImage2D,
|
||||
clCreateImage3D,
|
||||
clRetainMemObject,
|
||||
clReleaseMemObject,
|
||||
clGetSupportedImageFormats,
|
||||
clGetMemObjectInfo,
|
||||
clGetImageInfo,
|
||||
clCreateSampler,
|
||||
clRetainSampler,
|
||||
clReleaseSampler,
|
||||
clGetSamplerInfo,
|
||||
clCreateProgramWithSource,
|
||||
clCreateProgramWithBinary,
|
||||
clRetainProgram,
|
||||
clReleaseProgram,
|
||||
clBuildProgram,
|
||||
clUnloadCompiler,
|
||||
clGetProgramInfo,
|
||||
clGetProgramBuildInfo,
|
||||
clCreateKernel,
|
||||
clCreateKernelsInProgram,
|
||||
clRetainKernel,
|
||||
clReleaseKernel,
|
||||
clSetKernelArg,
|
||||
clGetKernelInfo,
|
||||
clGetKernelWorkGroupInfo,
|
||||
clWaitForEvents,
|
||||
clGetEventInfo,
|
||||
clRetainEvent,
|
||||
clReleaseEvent,
|
||||
clGetEventProfilingInfo,
|
||||
clFlush,
|
||||
clFinish,
|
||||
clEnqueueReadBuffer,
|
||||
clEnqueueWriteBuffer,
|
||||
clEnqueueCopyBuffer,
|
||||
clEnqueueReadImage,
|
||||
clEnqueueWriteImage,
|
||||
clEnqueueCopyImage,
|
||||
clEnqueueCopyImageToBuffer,
|
||||
clEnqueueCopyBufferToImage,
|
||||
clEnqueueMapBuffer,
|
||||
clEnqueueMapImage,
|
||||
clEnqueueUnmapMemObject,
|
||||
clEnqueueNDRangeKernel,
|
||||
clEnqueueTask,
|
||||
clEnqueueNativeKernel,
|
||||
clEnqueueMarker,
|
||||
clEnqueueWaitForEvents,
|
||||
clEnqueueBarrier,
|
||||
clGetExtensionFunctionAddress,
|
||||
|
||||
/* cl_khr_gl_sharing */
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
|
||||
/* cl_khr_d3d10_sharing */
|
||||
nullptr, // clGetDeviceIDsFromD3D10KHR,
|
||||
nullptr, // clCreateFromD3D10BufferKHR,
|
||||
nullptr, // clCreateFromD3D10Texture2DKHR,
|
||||
nullptr, // clCreateFromD3D10Texture3DKHR,
|
||||
nullptr, // clEnqueueAcquireD3D10ObjectsKHR,
|
||||
nullptr, // clEnqueueReleaseD3D10ObjectsKHR,
|
||||
|
||||
/* OpenCL 1.1 */
|
||||
clSetEventCallback,
|
||||
clCreateSubBuffer,
|
||||
clSetMemObjectDestructorCallback,
|
||||
clCreateUserEvent,
|
||||
clSetUserEventStatus,
|
||||
clEnqueueReadBufferRect,
|
||||
clEnqueueWriteBufferRect,
|
||||
clEnqueueCopyBufferRect,
|
||||
|
||||
/* cl_ext_device_fission */
|
||||
nullptr, //clCreateSubDevicesEXT,
|
||||
nullptr, //clRetainDeviceEXT,
|
||||
nullptr, //clReleaseDeviceEXT,
|
||||
|
||||
/* cl_khr_gl_event */
|
||||
nullptr,
|
||||
|
||||
/* OpenCL 1.2 */
|
||||
clCreateSubDevices,
|
||||
clRetainDevice,
|
||||
clReleaseDevice,
|
||||
clCreateImage,
|
||||
clCreateProgramWithBuiltInKernels,
|
||||
clCompileProgram,
|
||||
clLinkProgram,
|
||||
clUnloadPlatformCompiler,
|
||||
clGetKernelArgInfo,
|
||||
clEnqueueFillBuffer,
|
||||
clEnqueueFillImage,
|
||||
clEnqueueMigrateMemObjects,
|
||||
clEnqueueMarkerWithWaitList,
|
||||
clEnqueueBarrierWithWaitList,
|
||||
clGetExtensionFunctionAddressForPlatform,
|
||||
nullptr,
|
||||
|
||||
/* cl_khr_d3d11_sharing */
|
||||
nullptr, // clGetDeviceIDsFromD3D11KHR,
|
||||
nullptr, // clCreateFromD3D11BufferKHR,
|
||||
nullptr, // clCreateFromD3D11Texture2DKHR,
|
||||
nullptr, // clCreateFromD3D11Texture3DKHR,
|
||||
nullptr, // clCreateFromDX9MediaSurfaceKHR,
|
||||
nullptr, // clEnqueueAcquireD3D11ObjectsKHR,
|
||||
nullptr, // clEnqueueReleaseD3D11ObjectsKHR,
|
||||
|
||||
/* cl_khr_dx9_media_sharing */
|
||||
nullptr, // clGetDeviceIDsFromDX9MediaAdapterKHR,
|
||||
nullptr, // clEnqueueAcquireDX9MediaSurfacesKHR,
|
||||
nullptr, // clEnqueueReleaseDX9MediaSurfacesKHR,
|
||||
|
||||
/* cl_khr_egl_image */
|
||||
nullptr, //clCreateFromEGLImageKHR,
|
||||
nullptr, //clEnqueueAcquireEGLObjectsKHR,
|
||||
nullptr, //clEnqueueReleaseEGLObjectsKHR,
|
||||
|
||||
/* cl_khr_egl_event */
|
||||
nullptr, //clCreateEventFromEGLSyncKHR,
|
||||
|
||||
/* OpenCL 2.0 */
|
||||
clCreateCommandQueueWithProperties,
|
||||
clCreatePipe,
|
||||
clGetPipeInfo,
|
||||
clSVMAlloc,
|
||||
clSVMFree,
|
||||
clEnqueueSVMFree,
|
||||
clEnqueueSVMMemcpy,
|
||||
clEnqueueSVMMemFill,
|
||||
clEnqueueSVMMap,
|
||||
clEnqueueSVMUnmap,
|
||||
clCreateSamplerWithProperties,
|
||||
clSetKernelArgSVMPointer,
|
||||
clSetKernelExecInfo,
|
||||
clGetKernelSubGroupInfoKHR,
|
||||
|
||||
/* OpenCL 2.1 */
|
||||
clCloneKernel,
|
||||
clCreateProgramWithIL,
|
||||
clEnqueueSVMMigrateMem,
|
||||
clGetDeviceAndHostTimer,
|
||||
clGetHostTimer,
|
||||
clGetKernelSubGroupInfo,
|
||||
clSetDefaultDeviceCommandQueue,
|
||||
};
|
||||
SCRTDispatchTable crtGlobalDispatchTable = {
|
||||
clGetKernelArgInfo,
|
||||
|
||||
nullptr, // clGetDeviceIDsFromDX9INTEL,
|
||||
nullptr, // clCreateFromDX9MediaSurfaceINTEL,
|
||||
nullptr, // clEnqueueAcquireDX9ObjectsINTEL,
|
||||
nullptr, // clEnqueueReleaseDX9ObjectsINTEL,
|
||||
clGetImageParamsINTEL,
|
||||
clCreatePerfCountersCommandQueueINTEL,
|
||||
|
||||
clCreateAcceleratorINTEL,
|
||||
clGetAcceleratorInfoINTEL,
|
||||
clRetainAcceleratorINTEL,
|
||||
clReleaseAcceleratorINTEL,
|
||||
|
||||
nullptr,
|
||||
nullptr,
|
||||
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
clSetPerformanceConfigurationINTEL};
|
||||
|
||||
SEntryPointsTable globalDispatchTable = {&icdGlobalDispatchTable, &crtGlobalDispatchTable};
|
||||
1304
runtime/api/dispatch.h
Normal file
1304
runtime/api/dispatch.h
Normal file
File diff suppressed because it is too large
Load Diff
92
runtime/aub_mem_dump/aub_header.h
Normal file
92
runtime/aub_mem_dump/aub_header.h
Normal file
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
#ifndef WIN32
|
||||
#pragma pack(4)
|
||||
#else
|
||||
#pragma pack(push, 4)
|
||||
#endif
|
||||
|
||||
struct AubCmdHdr {
|
||||
uint32_t DwordLength : 16,
|
||||
SubOp : 7,
|
||||
Opcode : 6,
|
||||
Type : 3;
|
||||
};
|
||||
static_assert(4 == sizeof(AubCmdHdr), "Invalid size for AubCmdHdr");
|
||||
|
||||
struct AubCmdDumpBmpHd {
|
||||
AubCmdHdr Header;
|
||||
uint32_t Xmin;
|
||||
uint32_t Ymin;
|
||||
uint32_t BufferPitch;
|
||||
uint32_t BitsPerPixel : 8,
|
||||
Format : 8,
|
||||
Reserved_0 : 16;
|
||||
uint32_t Xsize;
|
||||
uint32_t Ysize;
|
||||
uint64_t BaseAddr;
|
||||
uint32_t Secure : 1,
|
||||
UseFence : 1,
|
||||
TileOn : 1,
|
||||
WalkY : 1,
|
||||
UsePPGTT : 1,
|
||||
Use32BitDump : 1,
|
||||
UseFullFormat : 1,
|
||||
Reserved_1 : 25;
|
||||
uint32_t DirectoryHandle;
|
||||
};
|
||||
static_assert(44 == sizeof(AubCmdDumpBmpHd), "Invalid size for AubCmdDumpBmpHd");
|
||||
|
||||
struct AubPpgttContextCreate {
|
||||
AubCmdHdr Header;
|
||||
uint32_t Handle;
|
||||
uint32_t AdvancedContext : 1,
|
||||
SixtyFourBit : 1,
|
||||
Reserved_31_2 : 30;
|
||||
uint64_t PageDirPointer[4];
|
||||
};
|
||||
static_assert(44 == sizeof(AubPpgttContextCreate), "Invalid size for AubPpgttContextCreate");
|
||||
|
||||
struct AubBinaryDump {
|
||||
AubCmdHdr Header;
|
||||
char OutputFile[40];
|
||||
uint32_t Height;
|
||||
uint32_t Width;
|
||||
uint64_t BaseAddr;
|
||||
uint32_t SurfaceType : 4,
|
||||
Pitch : 28;
|
||||
uint32_t GttType : 2,
|
||||
Reserved_31_2 : 30;
|
||||
uint32_t DirectoryHandle;
|
||||
};
|
||||
static_assert(72 == sizeof(AubBinaryDump), "Invalid size for AubBinaryDump");
|
||||
|
||||
#ifndef WIN32
|
||||
#pragma pack()
|
||||
#else
|
||||
#pragma pack(pop)
|
||||
#endif
|
||||
190
runtime/aub_mem_dump/aub_mem_dump.cpp
Normal file
190
runtime/aub_mem_dump/aub_mem_dump.cpp
Normal file
@@ -0,0 +1,190 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "aub_mem_dump.h"
|
||||
#include "runtime/helpers/ptr_math.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
|
||||
namespace AubMemDump {
|
||||
|
||||
const uint64_t g_pageMask = ~(4096ull - 1);
|
||||
|
||||
const size_t g_dwordCountMax = 65536;
|
||||
|
||||
// Some page table constants used in virtualizing the page tables.
|
||||
// clang-format off
|
||||
// 32 bit page table traits
|
||||
const uint64_t PageTableTraits<32>::physicalMemory = 0; // 1ull <<addressingBits;
|
||||
|
||||
const uint64_t PageTableTraits<32>::numPTEntries = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS);
|
||||
const uint64_t PageTableTraits<32>::sizePT = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS) * sizeof(uint64_t);
|
||||
const uint64_t PageTableTraits<32>::ptBaseAddress = BIT(38);
|
||||
|
||||
const uint64_t PageTableTraits<32>::numPDEntries = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS);
|
||||
const uint64_t PageTableTraits<32>::sizePD = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS) * sizeof(uint64_t);
|
||||
const uint64_t PageTableTraits<32>::pdBaseAddress = BIT(37);
|
||||
|
||||
const uint64_t PageTableTraits<32>::numPDPEntries = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS - PageTableTraits<32>::NUM_PDE_BITS);
|
||||
const uint64_t PageTableTraits<32>::sizePDP = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS - PageTableTraits<32>::NUM_PDE_BITS) * sizeof(uint64_t);
|
||||
const uint64_t PageTableTraits<32>::pdpBaseAddress = BIT(36);
|
||||
|
||||
// 48 bit page table traits
|
||||
const uint64_t PageTableTraits<48>::physicalMemory = 0; // 1ull <<addressingBits;
|
||||
|
||||
const uint64_t PageTableTraits<48>::numPTEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS);
|
||||
const uint64_t PageTableTraits<48>::sizePT = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS) * sizeof(uint64_t);
|
||||
const uint64_t PageTableTraits<48>::ptBaseAddress = BIT(38);
|
||||
|
||||
const uint64_t PageTableTraits<48>::numPDEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS);
|
||||
const uint64_t PageTableTraits<48>::sizePD = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS) * sizeof(uint64_t);
|
||||
const uint64_t PageTableTraits<48>::pdBaseAddress = BIT(37);
|
||||
|
||||
const uint64_t PageTableTraits<48>::numPDPEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS - PageTableTraits<48>::NUM_PDE_BITS);
|
||||
const uint64_t PageTableTraits<48>::sizePDP = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS - PageTableTraits<48>::NUM_PDE_BITS) * sizeof(uint64_t);
|
||||
const uint64_t PageTableTraits<48>::pdpBaseAddress = BIT(36);
|
||||
const uint64_t PageTableTraits<48>::numPML4Entries = BIT(NUM_PML4_BITS);
|
||||
const uint64_t PageTableTraits<48>::sizePML4 = BIT(NUM_PML4_BITS) * sizeof(uint64_t);
|
||||
const uint64_t PageTableTraits<48>::pml4BaseAddress = BIT(35);
|
||||
// clang-format on
|
||||
|
||||
void LrcaHelper::setRingTail(void *pLRCIn, uint32_t ringTail) const {
|
||||
auto pLRCA = ptrOffset(reinterpret_cast<uint32_t *>(pLRCIn),
|
||||
offsetContext + offsetRingRegisters + offsetRingTail);
|
||||
*pLRCA++ = mmioBase + 0x2030;
|
||||
*pLRCA++ = ringTail;
|
||||
}
|
||||
|
||||
void LrcaHelper::setRingHead(void *pLRCIn, uint32_t ringHead) const {
|
||||
auto pLRCA = ptrOffset(reinterpret_cast<uint32_t *>(pLRCIn),
|
||||
offsetContext + offsetRingRegisters + offsetRingHead);
|
||||
*pLRCA++ = mmioBase + 0x2034;
|
||||
*pLRCA++ = ringHead;
|
||||
}
|
||||
|
||||
void LrcaHelper::setRingBase(void *pLRCIn, uint32_t ringBase) const {
|
||||
auto pLRCA = ptrOffset(reinterpret_cast<uint32_t *>(pLRCIn),
|
||||
offsetContext + offsetRingRegisters + offsetRingBase);
|
||||
*pLRCA++ = mmioBase + 0x2038;
|
||||
*pLRCA++ = ringBase;
|
||||
}
|
||||
|
||||
void LrcaHelper::setRingCtrl(void *pLRCIn, uint32_t ringCtrl) const {
|
||||
auto pLRCA = ptrOffset(reinterpret_cast<uint32_t *>(pLRCIn),
|
||||
offsetContext + offsetRingRegisters + offsetRingCtrl);
|
||||
*pLRCA++ = mmioBase + 0x203c;
|
||||
*pLRCA++ = ringCtrl;
|
||||
}
|
||||
|
||||
void LrcaHelper::setPDP0(void *pLRCIn, uint64_t address) const {
|
||||
auto pLRCA = ptrOffset(reinterpret_cast<uint32_t *>(pLRCIn),
|
||||
offsetContext + offsetPageTableRegisters + offsetPDP0);
|
||||
|
||||
*pLRCA++ = mmioBase + 0x2274;
|
||||
*pLRCA++ = address >> 32;
|
||||
*pLRCA++ = mmioBase + 0x2270;
|
||||
*pLRCA++ = address & 0xffffffff;
|
||||
}
|
||||
|
||||
void LrcaHelper::setPDP1(void *pLRCIn, uint64_t address) const {
|
||||
auto pLRCA = ptrOffset(reinterpret_cast<uint32_t *>(pLRCIn),
|
||||
offsetContext + offsetPageTableRegisters + offsetPDP1);
|
||||
|
||||
*pLRCA++ = mmioBase + 0x227c;
|
||||
*pLRCA++ = address >> 32;
|
||||
*pLRCA++ = mmioBase + 0x2278;
|
||||
*pLRCA++ = address & 0xffffffff;
|
||||
}
|
||||
|
||||
void LrcaHelper::setPDP2(void *pLRCIn, uint64_t address) const {
|
||||
auto pLRCA = ptrOffset(reinterpret_cast<uint32_t *>(pLRCIn),
|
||||
offsetContext + offsetPageTableRegisters + offsetPDP2);
|
||||
|
||||
*pLRCA++ = mmioBase + 0x2284;
|
||||
*pLRCA++ = address >> 32;
|
||||
*pLRCA++ = mmioBase + 0x2280;
|
||||
*pLRCA++ = address & 0xffffffff;
|
||||
}
|
||||
|
||||
void LrcaHelper::setPDP3(void *pLRCIn, uint64_t address) const {
|
||||
auto pLRCA = ptrOffset(reinterpret_cast<uint32_t *>(pLRCIn),
|
||||
offsetContext + offsetPageTableRegisters + offsetPDP3);
|
||||
|
||||
*pLRCA++ = mmioBase + 0x228c;
|
||||
*pLRCA++ = address >> 32;
|
||||
*pLRCA++ = mmioBase + 0x2288;
|
||||
*pLRCA++ = address & 0xffffffff;
|
||||
}
|
||||
|
||||
void LrcaHelper::setPML4(void *pLRCIn, uint64_t address) const {
|
||||
setPDP0(pLRCIn, address);
|
||||
}
|
||||
|
||||
void LrcaHelper::initialize(void *pLRCIn) const {
|
||||
auto pLRCABase = reinterpret_cast<uint32_t *>(pLRCIn);
|
||||
|
||||
// Initialize to known but benign garbage
|
||||
for (size_t i = 0; i < sizeLRCA / sizeof(uint32_t); i++) {
|
||||
pLRCABase[i] = 0x1;
|
||||
}
|
||||
|
||||
auto pLRCA = ptrOffset(pLRCABase, offsetContext);
|
||||
|
||||
// Initialize the ring context of the LRCA
|
||||
auto pLRI = ptrOffset(pLRCA, offsetLRI0);
|
||||
auto numRegs = numRegsLRI0;
|
||||
*pLRI++ = 0x11001000 | (2 * numRegs - 1);
|
||||
while (numRegs-- > 0) {
|
||||
*pLRI++ = mmioBase + 0x2244; // CTXT_SR_CTL
|
||||
*pLRI++ = 0x00010001; // Inhibit context-restore
|
||||
}
|
||||
|
||||
// Initialize the other LRI
|
||||
DEBUG_BREAK_IF(offsetLRI1 != 0x21 * sizeof(uint32_t));
|
||||
pLRI = ptrOffset(pLRCA, offsetLRI1);
|
||||
numRegs = numRegsLRI1;
|
||||
*pLRI++ = 0x11001000 | (2 * numRegs - 1);
|
||||
while (numRegs-- > 0) {
|
||||
*pLRI++ = mmioBase + 0x2094; // NOP ID
|
||||
*pLRI++ = 0x00000000;
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(offsetLRI2 != 0x41 * sizeof(uint32_t));
|
||||
pLRI = ptrOffset(pLRCA, offsetLRI2);
|
||||
numRegs = numRegsLRI2;
|
||||
*pLRI++ = 0x11000000 | (2 * numRegs - 1);
|
||||
while (numRegs-- > 0) {
|
||||
*pLRI++ = mmioBase + 0x2094; // NOP ID
|
||||
*pLRI++ = 0x00000000;
|
||||
}
|
||||
|
||||
setRingHead(pLRCIn, 0);
|
||||
setRingTail(pLRCIn, 0);
|
||||
setRingBase(pLRCIn, 0);
|
||||
setRingCtrl(pLRCIn, 0);
|
||||
|
||||
setPDP0(pLRCIn, 0);
|
||||
setPDP1(pLRCIn, 0);
|
||||
setPDP2(pLRCIn, 0);
|
||||
setPDP3(pLRCIn, 0);
|
||||
}
|
||||
}
|
||||
382
runtime/aub_mem_dump/aub_mem_dump.h
Normal file
382
runtime/aub_mem_dump/aub_mem_dump.h
Normal file
@@ -0,0 +1,382 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdio>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
|
||||
#ifndef BIT
|
||||
#define BIT(x) (((uint64_t)1) << (x))
|
||||
#endif
|
||||
|
||||
namespace AubMemDump {
|
||||
#include "aub_services.h"
|
||||
|
||||
template <typename Cmd>
|
||||
inline void setAddress(Cmd &cmd, uint64_t address) {
|
||||
cmd.address = address;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void setAddress(CmdServicesMemTraceMemoryCompare &cmd, uint64_t address) {
|
||||
cmd.address = static_cast<uint32_t>(address);
|
||||
cmd.addressHigh = static_cast<uint32_t>(address >> 32);
|
||||
}
|
||||
|
||||
template <typename TypeTrue, typename TypeFalse, bool is32Bits>
|
||||
struct TypeSelector {
|
||||
typedef TypeTrue type;
|
||||
};
|
||||
|
||||
template <typename TypeTrue, typename TypeFalse>
|
||||
struct TypeSelector<TypeTrue, TypeFalse, false> {
|
||||
typedef TypeFalse type;
|
||||
};
|
||||
|
||||
union IAPageTableEntry {
|
||||
struct
|
||||
{
|
||||
uint64_t Present : 1; //[0]
|
||||
uint64_t Writable : 1; //[1]
|
||||
uint64_t UserSupervisor : 1; //[2]
|
||||
uint64_t PWT : 1; //[3]
|
||||
uint64_t PCD : 1; //[4]
|
||||
uint64_t Accessed : 1; //[5]
|
||||
uint64_t Dirty : 1; //[6]
|
||||
uint64_t PAT : 1; //[7]
|
||||
uint64_t Global : 1; //[8]
|
||||
uint64_t Reserved_11_9 : 3; //[11:9]
|
||||
uint64_t PhysicalAddress : 27; //[38:12]
|
||||
uint64_t Reserved_51_39 : 13; //[51:39]
|
||||
uint64_t Ignored : 11; //[62:52]
|
||||
uint64_t ExecuteDisable : 1; //[63]
|
||||
} pageConfig;
|
||||
uint32_t dwordData[2];
|
||||
uint64_t uiData;
|
||||
};
|
||||
|
||||
typedef IAPageTableEntry MiGttEntry;
|
||||
|
||||
static inline void setGttEntry(IAPageTableEntry &entry, uint64_t address) {
|
||||
entry.uiData = 0;
|
||||
entry.pageConfig.PhysicalAddress = address / 4096;
|
||||
entry.pageConfig.Present = true;
|
||||
entry.pageConfig.Writable = true;
|
||||
entry.pageConfig.UserSupervisor = true;
|
||||
}
|
||||
|
||||
// Use the latest DeviceValues enumerations available
|
||||
typedef CmdServicesMemTraceVersion::DeviceValues DeviceValues;
|
||||
typedef CmdServicesMemTraceVersion::SteppingValues SteppingValues;
|
||||
typedef CmdServicesMemTraceMemoryWrite::AddressSpaceValues AddressSpaceValues;
|
||||
typedef CmdServicesMemTraceMemoryWrite::DataTypeHintValues DataTypeHintValues;
|
||||
typedef CmdServicesMemTraceMemoryDump::TilingValues TilingValues;
|
||||
typedef CmdServicesMemTraceMemoryWrite::RepeatMemoryValues RepeatMemoryValues;
|
||||
typedef CmdServicesMemTraceRegisterWrite::MessageSourceIdValues MessageSourceIdValues;
|
||||
typedef CmdServicesMemTraceRegisterWrite::RegisterSizeValues RegisterSizeValues;
|
||||
typedef CmdServicesMemTraceRegisterWrite::RegisterSpaceValues RegisterSpaceValues;
|
||||
typedef CmdServicesMemTraceMemoryPoll::DataSizeValues DataSizeValues;
|
||||
|
||||
template <int deviceIn, int addressingBitsIn>
|
||||
struct Traits {
|
||||
typedef struct AubStream Stream;
|
||||
|
||||
enum {
|
||||
addressingBits = addressingBitsIn,
|
||||
device = deviceIn
|
||||
};
|
||||
};
|
||||
|
||||
struct AubStream {
|
||||
virtual void open(const char *filePath) = 0;
|
||||
virtual void close() = 0;
|
||||
virtual bool init(uint32_t stepping, uint32_t device) = 0;
|
||||
virtual void createContext(const AubPpgttContextCreate &cmd) {}
|
||||
virtual void writeMemory(uint64_t physAddress, const void *memory, size_t sizeToDumpThisIteration, uint32_t addressSpace, uint32_t hint) = 0;
|
||||
virtual void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) = 0;
|
||||
virtual void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace) {
|
||||
return writeMemoryWriteHeader(physAddress, size, addressSpace, CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceNotype);
|
||||
}
|
||||
virtual void writePTE(uint64_t physAddress, uint64_t entry) = 0;
|
||||
virtual void writeGTT(uint32_t offset, uint64_t entry) = 0;
|
||||
virtual void writeMMIO(uint32_t offset, uint32_t value) = 0;
|
||||
virtual void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) = 0;
|
||||
};
|
||||
|
||||
struct AubFileStream : public AubStream {
|
||||
void open(const char *filePath) override;
|
||||
void close() override;
|
||||
bool init(uint32_t stepping, uint32_t device) override;
|
||||
void createContext(const AubPpgttContextCreate &cmd) override;
|
||||
void writeMemory(uint64_t physAddress, const void *memory, size_t size, uint32_t addressSpace, uint32_t hint) override;
|
||||
void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) override;
|
||||
void writePTE(uint64_t physAddress, uint64_t entry) override;
|
||||
void writeGTT(uint32_t offset, uint64_t entry) override;
|
||||
void writeMMIO(uint32_t offset, uint32_t value) override;
|
||||
void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) override;
|
||||
void expectMemory(uint64_t physAddress, const void *memory, size_t size);
|
||||
void addComment(const char *message);
|
||||
|
||||
std::ofstream fileHandle;
|
||||
};
|
||||
|
||||
template <int addressingBits>
|
||||
struct PageTableTraits {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct PageTableTraits<32> {
|
||||
// clang-format off
|
||||
enum {
|
||||
addressingBits = 32,
|
||||
NUM_OFFSET_BITS = 12,
|
||||
NUM_PTE_BITS = 9,
|
||||
NUM_PDE_BITS = 9,
|
||||
NUM_PDP_BITS = addressingBits - NUM_PDE_BITS - NUM_PTE_BITS - NUM_OFFSET_BITS,
|
||||
};
|
||||
|
||||
static const uint64_t physicalMemory;
|
||||
static const uint64_t numPTEntries;
|
||||
static const uint64_t sizePT;
|
||||
static const uint64_t ptBaseAddress;
|
||||
|
||||
static const uint64_t numPDEntries;
|
||||
static const uint64_t sizePD;
|
||||
static const uint64_t pdBaseAddress;
|
||||
|
||||
static const uint64_t numPDPEntries;
|
||||
static const uint64_t sizePDP;
|
||||
static const uint64_t pdpBaseAddress;
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
template <>
|
||||
struct PageTableTraits<48> {
|
||||
// clang-format off
|
||||
enum {
|
||||
addressingBits = 48,
|
||||
NUM_OFFSET_BITS = PageTableTraits<32>::NUM_OFFSET_BITS,
|
||||
NUM_PTE_BITS = PageTableTraits<32>::NUM_PTE_BITS,
|
||||
NUM_PDE_BITS = PageTableTraits<32>::NUM_PDE_BITS,
|
||||
NUM_PDP_BITS = PageTableTraits<32>::NUM_PDP_BITS,
|
||||
NUM_PML4_BITS = addressingBits - NUM_PDP_BITS - NUM_PDE_BITS - NUM_PTE_BITS - NUM_OFFSET_BITS
|
||||
};
|
||||
|
||||
static const uint64_t physicalMemory;
|
||||
static const uint64_t numPTEntries;
|
||||
static const uint64_t sizePT;
|
||||
static const uint64_t ptBaseAddress;
|
||||
|
||||
static const uint64_t numPDEntries;
|
||||
static const uint64_t sizePD;
|
||||
static const uint64_t pdBaseAddress;
|
||||
|
||||
static const uint64_t numPDPEntries;
|
||||
static const uint64_t sizePDP;
|
||||
static const uint64_t pdpBaseAddress;
|
||||
|
||||
static const uint64_t numPML4Entries;
|
||||
static const uint64_t sizePML4;
|
||||
static const uint64_t pml4BaseAddress;
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
template <typename Traits>
|
||||
struct AubPageTableHelper {
|
||||
typedef AubMemDump::PageTableTraits<Traits::addressingBits> PageTableTraits;
|
||||
|
||||
enum {
|
||||
addressingBits = Traits::addressingBits
|
||||
};
|
||||
|
||||
static inline uint32_t ptrToGGTT(const void *memory) {
|
||||
return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(memory));
|
||||
}
|
||||
|
||||
static inline uintptr_t ptrToPPGTT(const void *memory) {
|
||||
return reinterpret_cast<uintptr_t>(memory);
|
||||
}
|
||||
|
||||
static inline uint64_t getPTEAddress(uint64_t ptIndex) {
|
||||
return PageTableTraits::ptBaseAddress + ptIndex * sizeof(uint64_t);
|
||||
}
|
||||
|
||||
static inline uint64_t getPDEAddress(uint64_t pdIndex) {
|
||||
return PageTableTraits::pdBaseAddress + pdIndex * sizeof(uint64_t);
|
||||
}
|
||||
|
||||
static inline uint64_t getPDPAddress(uint64_t pdpIndex) {
|
||||
return PageTableTraits::pdpBaseAddress + pdpIndex * sizeof(uint64_t);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Traits>
|
||||
struct AubPageTableHelper32 : public AubPageTableHelper<Traits>, PageTableTraits<32> {
|
||||
typedef AubPageTableHelper<Traits> BaseClass;
|
||||
|
||||
static void createContext(typename Traits::Stream &stream, uint32_t context);
|
||||
static uint64_t reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress);
|
||||
|
||||
static void fixupLRC(uint8_t *pLrc);
|
||||
};
|
||||
|
||||
template <typename Traits>
|
||||
struct AubPageTableHelper64 : public AubPageTableHelper<Traits>, PageTableTraits<48> {
|
||||
typedef AubPageTableHelper<Traits> BaseClass;
|
||||
|
||||
static inline uint64_t getPML4Address(uint64_t pml4Index) {
|
||||
return pml4BaseAddress + pml4Index * sizeof(uint64_t);
|
||||
}
|
||||
|
||||
static void createContext(typename Traits::Stream &stream, uint32_t context);
|
||||
static uint64_t reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress);
|
||||
|
||||
static void fixupLRC(uint8_t *pLrc);
|
||||
};
|
||||
|
||||
template <typename TraitsIn>
|
||||
struct AubDump : public TypeSelector<AubPageTableHelper32<TraitsIn>, AubPageTableHelper64<TraitsIn>, TraitsIn::addressingBits == 32>::type {
|
||||
typedef TraitsIn Traits;
|
||||
typedef typename TypeSelector<uint32_t, uint64_t, Traits::addressingBits == 32>::type AddressType;
|
||||
typedef typename TypeSelector<AubPageTableHelper32<Traits>, AubPageTableHelper64<Traits>, Traits::addressingBits == 32>::type BaseHelper;
|
||||
typedef typename Traits::Stream Stream;
|
||||
|
||||
typedef union _MiContextDescriptorReg_ {
|
||||
struct {
|
||||
uint64_t Valid : 1; //[0]
|
||||
uint64_t ForcePageDirRestore : 1; //[1]
|
||||
uint64_t ForceRestore : 1; //[2]
|
||||
uint64_t Legacy : 1; //[3]
|
||||
uint64_t ADor64bitSupport : 1; //[4] Selects 64-bit PPGTT in Legacy mode
|
||||
uint64_t LlcCoherencySupport : 1; //[5]
|
||||
uint64_t FaultSupport : 2; //[7:6]
|
||||
uint64_t PrivilegeAccessOrPPGTT : 1; //[8] Selects PPGTT in Legacy mode
|
||||
uint64_t FunctionType : 3; //[11:9]
|
||||
uint64_t LogicalRingCtxAddress : 20; //[31:12]
|
||||
uint64_t ContextID : 32; //[63:32]
|
||||
} sData;
|
||||
uint32_t ulData[2];
|
||||
uint64_t qwordData[2 / 2];
|
||||
} MiContextDescriptorReg, *pMiContextDescriptorReg;
|
||||
|
||||
// Write a block of memory to a given address space using an optional hint
|
||||
static void addMemoryWrite(Stream &stream, uint64_t addr, const void *memory, size_t blockSize, int addressSpace, int hint = DataTypeHintValues::TraceNotype);
|
||||
static uint64_t reserveAddressGGTT(Stream &stream, uint32_t addr, size_t size, uint64_t physStart);
|
||||
static uint64_t reserveAddressGGTT(Stream &stream, const void *memory, size_t size, uint64_t physStart);
|
||||
|
||||
private:
|
||||
static uint64_t reserveAddress(Stream &stream, uint32_t addr, size_t size, unsigned int addressSpace /* = AddressSpaceValues::TraceGttEntry*/, uint64_t physStart);
|
||||
};
|
||||
|
||||
struct LrcaHelper {
|
||||
LrcaHelper(uint32_t base) : mmioBase(base) {
|
||||
}
|
||||
|
||||
int aubHintLRCA = DataTypeHintValues::TraceNotype;
|
||||
int aubHintCommandBuffer = DataTypeHintValues::TraceCommandBuffer;
|
||||
int aubHintBatchBuffer = DataTypeHintValues::TraceBatchBuffer;
|
||||
|
||||
const char *name = "XCS";
|
||||
uint32_t mmioBase = 0;
|
||||
|
||||
size_t sizeLRCA = 0x2000;
|
||||
uint32_t alignLRCA = 0x1000;
|
||||
uint32_t offsetContext = 0x1000;
|
||||
|
||||
uint32_t offsetLRI0 = 0x01 * sizeof(uint32_t);
|
||||
uint32_t numRegsLRI0 = 14;
|
||||
|
||||
uint32_t numNoops0 = 3;
|
||||
|
||||
uint32_t offsetLRI1 = offsetLRI0 + (1 + numRegsLRI0 * 2 + numNoops0) * sizeof(uint32_t); //offsetLRI == 0x21 * sizeof(uint32_t);
|
||||
uint32_t numRegsLRI1 = 9;
|
||||
|
||||
uint32_t numNoops1 = 13;
|
||||
|
||||
uint32_t offsetLRI2 = offsetLRI1 + (1 + numRegsLRI1 * 2 + numNoops1) * sizeof(uint32_t); //offsetLR2 == 0x41 * sizeof(uint32_t);
|
||||
uint32_t numRegsLRI2 = 1;
|
||||
|
||||
uint32_t offsetRingRegisters = offsetLRI0 + (3 * sizeof(uint32_t));
|
||||
uint32_t offsetRingHead = 0x0 * sizeof(uint32_t);
|
||||
uint32_t offsetRingTail = 0x2 * sizeof(uint32_t);
|
||||
uint32_t offsetRingBase = 0x4 * sizeof(uint32_t);
|
||||
uint32_t offsetRingCtrl = 0x6 * sizeof(uint32_t);
|
||||
|
||||
uint32_t offsetPageTableRegisters = offsetLRI1 + (3 * sizeof(uint32_t));
|
||||
uint32_t offsetPDP0 = 0xc * sizeof(uint32_t);
|
||||
uint32_t offsetPDP1 = 0x8 * sizeof(uint32_t);
|
||||
uint32_t offsetPDP2 = 0x4 * sizeof(uint32_t);
|
||||
uint32_t offsetPDP3 = 0x0 * sizeof(uint32_t);
|
||||
|
||||
void initialize(void *pLRCIn) const;
|
||||
void setRingHead(void *pLRCIn, uint32_t ringHead) const;
|
||||
void setRingTail(void *pLRCIn, uint32_t ringTail) const;
|
||||
void setRingBase(void *pLRCIn, uint32_t ringBase) const;
|
||||
void setRingCtrl(void *pLRCIn, uint32_t ringCtrl) const;
|
||||
|
||||
void setPDP0(void *pLRCIn, uint64_t address) const;
|
||||
void setPDP1(void *pLRCIn, uint64_t address) const;
|
||||
void setPDP2(void *pLRCIn, uint64_t address) const;
|
||||
void setPDP3(void *pLRCIn, uint64_t address) const;
|
||||
|
||||
void setPML4(void *pLRCIn, uint64_t address) const;
|
||||
};
|
||||
|
||||
struct LrcaHelperRcs : public LrcaHelper {
|
||||
LrcaHelperRcs(uint32_t base) : LrcaHelper(base) {
|
||||
aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextRcs;
|
||||
aubHintCommandBuffer = DataTypeHintValues::TraceCommandBufferPrimary;
|
||||
aubHintBatchBuffer = DataTypeHintValues::TraceBatchBufferPrimary;
|
||||
sizeLRCA = 0x11000;
|
||||
name = "RCS";
|
||||
}
|
||||
};
|
||||
|
||||
struct LrcaHelperBcs : public LrcaHelper {
|
||||
LrcaHelperBcs(uint32_t base) : LrcaHelper(base) {
|
||||
aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextBcs;
|
||||
aubHintCommandBuffer = DataTypeHintValues::TraceCommandBufferBlt;
|
||||
aubHintBatchBuffer = DataTypeHintValues::TraceBatchBufferBlt;
|
||||
name = "BCS";
|
||||
}
|
||||
};
|
||||
|
||||
struct LrcaHelperVcs : public LrcaHelper {
|
||||
LrcaHelperVcs(uint32_t base) : LrcaHelper(base) {
|
||||
aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextVcs;
|
||||
aubHintCommandBuffer = DataTypeHintValues::TraceCommandBufferMfx;
|
||||
aubHintBatchBuffer = DataTypeHintValues::TraceBatchBufferMfx;
|
||||
name = "VCS";
|
||||
}
|
||||
};
|
||||
|
||||
struct LrcaHelperVecs : public LrcaHelper {
|
||||
LrcaHelperVecs(uint32_t base) : LrcaHelper(base) {
|
||||
aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextVecs;
|
||||
name = "VECS";
|
||||
}
|
||||
};
|
||||
|
||||
extern const uint64_t g_pageMask;
|
||||
extern const size_t g_dwordCountMax;
|
||||
}
|
||||
328
runtime/aub_mem_dump/aub_mem_dump.inl
Normal file
328
runtime/aub_mem_dump/aub_mem_dump.inl
Normal file
@@ -0,0 +1,328 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "aub_mem_dump.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
#include "runtime/helpers/ptr_math.h"
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
namespace AubMemDump {
|
||||
|
||||
template <typename Traits>
|
||||
void AubPageTableHelper32<Traits>::fixupLRC(uint8_t *pLRC) {
|
||||
uint32_t pdAddress;
|
||||
pdAddress = BaseClass::getPDEAddress(0x600) >> 32;
|
||||
*(uint32_t *)(pLRC + 0x1094) = pdAddress;
|
||||
pdAddress = BaseClass::getPDEAddress(0x600) & 0xffffffff;
|
||||
*(uint32_t *)(pLRC + 0x109c) = pdAddress;
|
||||
pdAddress = BaseClass::getPDEAddress(0x400) >> 32;
|
||||
*(uint32_t *)(pLRC + 0x10a4) = pdAddress;
|
||||
pdAddress = BaseClass::getPDEAddress(0x400) & 0xffffffff;
|
||||
*(uint32_t *)(pLRC + 0x10ac) = pdAddress;
|
||||
pdAddress = BaseClass::getPDEAddress(0x200) >> 32;
|
||||
*(uint32_t *)(pLRC + 0x10b4) = pdAddress;
|
||||
pdAddress = BaseClass::getPDEAddress(0x200) & 0xffffffff;
|
||||
*(uint32_t *)(pLRC + 0x10bc) = pdAddress;
|
||||
pdAddress = BaseClass::getPDEAddress(0) >> 32;
|
||||
*(uint32_t *)(pLRC + 0x10c4) = pdAddress;
|
||||
pdAddress = BaseClass::getPDEAddress(0) & 0xffffffff;
|
||||
*(uint32_t *)(pLRC + 0x10cc) = pdAddress;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void AubPageTableHelper64<Traits>::fixupLRC(uint8_t *pLRC) {
|
||||
uint32_t pml4Address = getPML4Address(0) >> 32;
|
||||
*(uint32_t *)(pLRC + 0x10c4) = pml4Address;
|
||||
pml4Address = getPML4Address(0) & 0xffffffff;
|
||||
*(uint32_t *)(pLRC + 0x10cc) = pml4Address;
|
||||
}
|
||||
|
||||
// Write a block of memory to a given address space using an optional hint
|
||||
template <typename Traits>
|
||||
void AubDump<Traits>::addMemoryWrite(typename Traits::Stream &stream, uint64_t addr, const void *memory, size_t sizeRemaining, int addressSpace, int hint) {
|
||||
// We can only dump a relatively small amount per CmdServicesMemTraceMemoryWrite
|
||||
auto sizeMemoryWriteHeader = sizeof(CmdServicesMemTraceMemoryWrite) - sizeof(CmdServicesMemTraceMemoryWrite::data);
|
||||
auto blockSizeMax = g_dwordCountMax * sizeof(uint32_t) - sizeMemoryWriteHeader;
|
||||
|
||||
if (hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextRcs ||
|
||||
hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextBcs ||
|
||||
hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextVcs ||
|
||||
hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextVecs) {
|
||||
DEBUG_BREAK_IF(sizeRemaining <= 0x10cc);
|
||||
uint8_t *pLRC = reinterpret_cast<uint8_t *>(const_cast<void *>(memory));
|
||||
BaseHelper::fixupLRC(pLRC);
|
||||
}
|
||||
|
||||
// loop to dump all of the blocks
|
||||
while (sizeRemaining > 0) {
|
||||
auto sizeThisIteration = std::min(blockSizeMax, sizeRemaining);
|
||||
stream.writeMemory(addr, memory, sizeThisIteration, addressSpace, hint);
|
||||
|
||||
sizeRemaining -= sizeThisIteration;
|
||||
memory = (uint8_t *)memory + sizeThisIteration;
|
||||
addr += sizeThisIteration;
|
||||
}
|
||||
}
|
||||
|
||||
// Reserve memory in the GGTT.
|
||||
template <typename Traits>
|
||||
uint64_t AubDump<Traits>::reserveAddress(typename Traits::Stream &stream, uint32_t addr, size_t size, unsigned int addressSpace, uint64_t physStart) {
|
||||
auto startPage = addr & g_pageMask;
|
||||
auto endPage = (addr + size - 1) & g_pageMask;
|
||||
auto numPages = (uint32_t)(((endPage - startPage) / 4096) + 1);
|
||||
|
||||
// Can only handle 16 bits of dwordCount.
|
||||
DEBUG_BREAK_IF(!(numPages > 0 && (numPages + 4) < 65536));
|
||||
auto gttTableOffset = static_cast<uint32_t>((((uint32_t)startPage) / 4096) * sizeof(MiGttEntry));
|
||||
|
||||
// Write header
|
||||
{
|
||||
typedef AubMemDump::CmdServicesMemTraceMemoryWrite CmdServicesMemTraceMemoryWrite;
|
||||
stream.writeMemoryWriteHeader(gttTableOffset, numPages * sizeof(AubMemDump::MiGttEntry), addressSpace, CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceNotype);
|
||||
}
|
||||
|
||||
uint64_t physAddress = physStart;
|
||||
while (startPage <= endPage) {
|
||||
MiGttEntry entry;
|
||||
setGttEntry(entry, physAddress);
|
||||
|
||||
stream.writeGTT(gttTableOffset, entry.uiData);
|
||||
gttTableOffset += sizeof(entry);
|
||||
|
||||
physAddress += 4096;
|
||||
startPage += 4096;
|
||||
}
|
||||
|
||||
return physStart;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
uint64_t AubDump<Traits>::reserveAddressGGTT(typename Traits::Stream &stream, uint32_t addr, size_t size, uint64_t physStart) {
|
||||
return AubDump<Traits>::reserveAddress(stream, addr, size, AddressSpaceValues::TraceGttEntry, physStart);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
uint64_t AubDump<Traits>::reserveAddressGGTT(typename Traits::Stream &stream, const void *memory, size_t size, uint64_t physStart) {
|
||||
auto gfxAddress = BaseHelper::ptrToGGTT(memory);
|
||||
return AubDump<Traits>::reserveAddress(stream, gfxAddress, size, AddressSpaceValues::TraceGttEntry, physStart);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
uint64_t AubPageTableHelper32<Traits>::reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress) {
|
||||
auto startAddress = gfxAddress;
|
||||
auto endAddress = gfxAddress + blockSize - 1;
|
||||
|
||||
auto startPTE = startAddress >> 12;
|
||||
auto endPTE = endAddress >> 12;
|
||||
auto numPTEs = endPTE - startPTE + 1;
|
||||
|
||||
auto startPDE = startPTE >> 9;
|
||||
auto endPDE = endPTE >> 9;
|
||||
auto numPDEs = endPDE - startPDE + 1;
|
||||
|
||||
// Process the PD entries
|
||||
bool writePDE = true;
|
||||
if (writePDE) {
|
||||
auto start_address = BaseClass::getPDEAddress(startPDE);
|
||||
|
||||
stream.writeMemoryWriteHeader(start_address, numPDEs * sizeof(uint64_t), AddressSpaceValues::TracePpgttPdEntry);
|
||||
|
||||
auto currPDE = startPDE;
|
||||
auto physPage = BaseClass::getPTEAddress(startPTE) & g_pageMask;
|
||||
while (currPDE <= endPDE) {
|
||||
auto pde = physPage | 7;
|
||||
|
||||
stream.writePTE(start_address, pde);
|
||||
start_address += sizeof(pde);
|
||||
|
||||
physPage += 4096;
|
||||
currPDE++;
|
||||
}
|
||||
}
|
||||
|
||||
// Process the PT entries
|
||||
bool writePTE = true;
|
||||
if (writePTE) {
|
||||
auto start_address = BaseClass::getPTEAddress(startPTE);
|
||||
|
||||
stream.writeMemoryWriteHeader(start_address, numPTEs * sizeof(uint64_t), AddressSpaceValues::TracePpgttEntry);
|
||||
|
||||
auto currPTE = startPTE;
|
||||
auto physPage = physAddress & g_pageMask;
|
||||
while (currPTE <= endPTE) {
|
||||
auto pte = physPage | 7;
|
||||
|
||||
stream.writePTE(start_address, pte);
|
||||
start_address += sizeof(pte);
|
||||
|
||||
physPage += 4096;
|
||||
currPTE++;
|
||||
}
|
||||
}
|
||||
|
||||
return physAddress;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
uint64_t AubPageTableHelper64<Traits>::reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress) {
|
||||
auto startAddress = gfxAddress;
|
||||
auto endAddress = gfxAddress + blockSize - 1;
|
||||
|
||||
auto startPTE = startAddress >> 12;
|
||||
auto endPTE = endAddress >> 12;
|
||||
auto numPTEs = endPTE - startPTE + 1;
|
||||
|
||||
auto startPDE = startPTE >> 9;
|
||||
auto endPDE = endPTE >> 9;
|
||||
auto numPDEs = endPDE - startPDE + 1;
|
||||
|
||||
auto startPDP = startPDE >> 9;
|
||||
auto endPDP = endPDE >> 9;
|
||||
auto numPDPs = endPDP - startPDP + 1;
|
||||
|
||||
auto startPML4 = startPDP >> 9;
|
||||
auto endPML4 = endPDP >> 9;
|
||||
auto numPML4s = endPML4 - startPML4 + 1;
|
||||
|
||||
// Process the PML4 entries
|
||||
bool writePML4 = true;
|
||||
if (writePML4) {
|
||||
auto start_address = getPML4Address(startPML4);
|
||||
|
||||
stream.writeMemoryWriteHeader(start_address, numPML4s * sizeof(uint64_t), AddressSpaceValues::TracePml4Entry);
|
||||
|
||||
auto currPML4 = startPML4;
|
||||
auto physPage = BaseClass::getPDPAddress(startPDP) & g_pageMask;
|
||||
while (currPML4 <= endPML4) {
|
||||
auto pml4 = physPage | 7;
|
||||
|
||||
stream.writePTE(start_address, pml4);
|
||||
start_address += sizeof(pml4);
|
||||
|
||||
physPage += 4096;
|
||||
currPML4++;
|
||||
}
|
||||
}
|
||||
|
||||
// Process the PDP entries
|
||||
bool writePDPE = true;
|
||||
if (writePDPE) {
|
||||
auto start_address = BaseClass::getPDPAddress(startPDP);
|
||||
|
||||
stream.writeMemoryWriteHeader(start_address, numPDPs * sizeof(uint64_t), AddressSpaceValues::TracePhysicalPdpEntry);
|
||||
|
||||
auto currPDP = startPDP;
|
||||
auto physPage = BaseClass::getPDEAddress(startPDE) & g_pageMask;
|
||||
while (currPDP <= endPDP) {
|
||||
auto pdp = physPage | 7;
|
||||
|
||||
stream.writePTE(start_address, pdp);
|
||||
start_address += sizeof(pdp);
|
||||
|
||||
physPage += 4096;
|
||||
currPDP++;
|
||||
}
|
||||
}
|
||||
|
||||
// Process the PD entries
|
||||
bool writePDE = true;
|
||||
if (writePDE) {
|
||||
auto start_address = BaseClass::getPDEAddress(startPDE);
|
||||
|
||||
stream.writeMemoryWriteHeader(start_address, numPDEs * sizeof(uint64_t), AddressSpaceValues::TracePpgttPdEntry);
|
||||
|
||||
auto currPDE = startPDE;
|
||||
auto physPage = BaseClass::getPTEAddress(startPTE) & g_pageMask;
|
||||
while (currPDE <= endPDE) {
|
||||
auto pde = physPage | 7;
|
||||
|
||||
stream.writePTE(start_address, pde);
|
||||
start_address += sizeof(pde);
|
||||
|
||||
physPage += 4096;
|
||||
currPDE++;
|
||||
}
|
||||
}
|
||||
|
||||
// Process the PT entries
|
||||
bool writePTE = true;
|
||||
if (writePTE) {
|
||||
auto start_address = BaseClass::getPTEAddress(startPTE);
|
||||
|
||||
stream.writeMemoryWriteHeader(start_address, numPTEs * sizeof(uint64_t), AddressSpaceValues::TracePpgttEntry);
|
||||
|
||||
auto currPTE = startPTE;
|
||||
auto physPage = physAddress & g_pageMask;
|
||||
while (currPTE <= endPTE) {
|
||||
auto pte = physPage | 7;
|
||||
|
||||
stream.writePTE(start_address, pte);
|
||||
start_address += sizeof(pte);
|
||||
|
||||
physPage += 4096;
|
||||
currPTE++;
|
||||
}
|
||||
}
|
||||
|
||||
return physAddress;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void AubPageTableHelper32<Traits>::createContext(typename Traits::Stream &stream, uint32_t context) {
|
||||
AubPpgttContextCreate cmd;
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.Header.Type = 0x7;
|
||||
cmd.Header.Opcode = 0x1;
|
||||
cmd.Header.SubOp = 0x14;
|
||||
cmd.Header.DwordLength = ((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1;
|
||||
cmd.Handle = context;
|
||||
cmd.AdvancedContext = false;
|
||||
|
||||
cmd.SixtyFourBit = 0;
|
||||
cmd.PageDirPointer[0] = BaseClass::getPDEAddress(0x000);
|
||||
cmd.PageDirPointer[1] = BaseClass::getPDEAddress(0x200);
|
||||
cmd.PageDirPointer[2] = BaseClass::getPDEAddress(0x400);
|
||||
cmd.PageDirPointer[3] = BaseClass::getPDEAddress(0x600);
|
||||
|
||||
stream.createContext(cmd);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void AubPageTableHelper64<Traits>::createContext(typename Traits::Stream &stream, uint32_t context) {
|
||||
AubPpgttContextCreate cmd;
|
||||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.Header.Type = 0x7;
|
||||
cmd.Header.Opcode = 0x1;
|
||||
cmd.Header.SubOp = 0x14;
|
||||
cmd.Header.DwordLength = ((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1;
|
||||
cmd.Handle = context;
|
||||
cmd.AdvancedContext = false;
|
||||
|
||||
cmd.SixtyFourBit = 1;
|
||||
cmd.PageDirPointer[0] = getPML4Address(0);
|
||||
|
||||
stream.createContext(cmd);
|
||||
}
|
||||
|
||||
}
|
||||
1433
runtime/aub_mem_dump/aub_services.h
Normal file
1433
runtime/aub_mem_dump/aub_services.h
Normal file
File diff suppressed because it is too large
Load Diff
79
runtime/built_ins/CMakeLists.txt
Normal file
79
runtime/built_ins/CMakeLists.txt
Normal file
@@ -0,0 +1,79 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# We require cmake 3.2.0 or later
|
||||
cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR)
|
||||
|
||||
add_library(${BUILTINS_BINARIES_LIB_NAME} OBJECT CMakeLists.txt)
|
||||
|
||||
# Add builtins sources
|
||||
add_subdirectory(registry)
|
||||
|
||||
set(GENERATED_BUILTINS "COPY_BUFFER_TO_BUFFER"
|
||||
"COPY_BUFFER_RECT"
|
||||
"FILL_BUFFER"
|
||||
"COPY_BUFFER_TO_IMAGE3D"
|
||||
"COPY_IMAGE3D_TO_BUFFER"
|
||||
"COPY_IMAGE_TO_IMAGE1D"
|
||||
"COPY_IMAGE_TO_IMAGE2D"
|
||||
"COPY_IMAGE_TO_IMAGE3D"
|
||||
"FILL_IMAGE1D"
|
||||
"FILL_IMAGE2D"
|
||||
"FILL_IMAGE3D"
|
||||
)
|
||||
|
||||
# Generate builtins cpps
|
||||
if(COMPILE_BUILT_INS)
|
||||
add_subdirectory(kernels)
|
||||
endif()
|
||||
|
||||
# Reverse order so that GEN N+1 includes GEN N
|
||||
foreach(GEN_NUM RANGE ${MAX_GEN} 0 -1)
|
||||
GEN_CONTAINS_PLATFORMS("SUPPORTED" ${GEN_NUM} GENX_HAS_PLATFORMS)
|
||||
if(${GENX_HAS_PLATFORMS})
|
||||
# Get all supported platforms for this GEN
|
||||
GET_PLATFORMS_FOR_GEN("SUPPORTED" ${GEN_NUM} SUPPORTED_GENX_PLATFORMS)
|
||||
|
||||
# Add platform-specific files
|
||||
foreach(PLATFORM_IT ${SUPPORTED_GENX_PLATFORMS})
|
||||
foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS})
|
||||
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN}_GEN${GEN_NUM}_${PLATFORM_IT}})
|
||||
endforeach(GENERATED_BUILTIN)
|
||||
endforeach(PLATFORM_IT)
|
||||
|
||||
source_group("generated files\\gen${GEN_NUM}" FILES ${GENERATED_BUILTINS_CPPS})
|
||||
endif(${GENX_HAS_PLATFORMS})
|
||||
endforeach(GEN_NUM)
|
||||
|
||||
|
||||
if(COMPILE_BUILT_INS)
|
||||
target_sources(${BUILTINS_BINARIES_LIB_NAME} PUBLIC ${GENERATED_BUILTINS_CPPS})
|
||||
set_source_files_properties(${GENERATED_BUILTINS_CPPS} PROPERTIES GENERATED TRUE)
|
||||
endif(COMPILE_BUILT_INS)
|
||||
|
||||
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX)
|
||||
set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
target_include_directories(${BUILTINS_BINARIES_LIB_NAME} PRIVATE
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${UMKM_SHAREDDATA_INCLUDE_PATHS}
|
||||
${IGDRCL__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
890
runtime/built_ins/built_ins.cpp
Normal file
890
runtime/built_ins/built_ins.cpp
Normal file
@@ -0,0 +1,890 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include "built_ins.h"
|
||||
#include "runtime/built_ins/vme_dispatch_builder.h"
|
||||
#include "runtime/built_ins/sip.h"
|
||||
#include "runtime/compiler_interface/compiler_interface.h"
|
||||
#include "runtime/program/program.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "runtime/helpers/basic_math.h"
|
||||
#include "runtime/helpers/convert_color.h"
|
||||
#include "runtime/helpers/dispatch_info_builder.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
#include <sstream>
|
||||
|
||||
namespace OCLRT {
|
||||
BuiltIns *BuiltIns::pInstance = nullptr;
|
||||
|
||||
const char *mediaKernelsBuildOptions = {
|
||||
"-D cl_intel_device_side_advanced_vme_enable "
|
||||
"-D cl_intel_device_side_avc_vme_enable "
|
||||
"-D cl_intel_device_side_vme_enable "
|
||||
"-D cl_intel_media_block_io "
|
||||
"-cl-fast-relaxed-math "};
|
||||
|
||||
BuiltIns::BuiltIns() {
|
||||
builtinsLib.reset(new BuiltinsLib());
|
||||
}
|
||||
|
||||
BuiltIns::~BuiltIns() {
|
||||
delete static_cast<SchedulerKernel *>(schedulerBuiltIn.pKernel);
|
||||
delete schedulerBuiltIn.pProgram;
|
||||
schedulerBuiltIn.pKernel = nullptr;
|
||||
schedulerBuiltIn.pProgram = nullptr;
|
||||
}
|
||||
|
||||
BuiltIns &BuiltIns::getInstance() {
|
||||
static std::mutex initMutex;
|
||||
std::lock_guard<std::mutex> autolock(initMutex);
|
||||
|
||||
if (pInstance == nullptr) {
|
||||
pInstance = new BuiltIns();
|
||||
}
|
||||
return *pInstance;
|
||||
}
|
||||
|
||||
void BuiltIns::shutDown() {
|
||||
if (pInstance) {
|
||||
auto inst = pInstance;
|
||||
pInstance = nullptr;
|
||||
delete inst;
|
||||
}
|
||||
}
|
||||
|
||||
SchedulerKernel &BuiltIns::getSchedulerKernel(Context &context) {
|
||||
if (schedulerBuiltIn.pKernel) {
|
||||
return *static_cast<SchedulerKernel *>(schedulerBuiltIn.pKernel);
|
||||
}
|
||||
|
||||
auto initializeSchedulerProgramAndKernel = [&] {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
auto src = getInstance().builtinsLib->getBuiltinCode(EBuiltInOps::Scheduler, BuiltinCode::ECodeType::Any, *context.getDevice(0));
|
||||
|
||||
auto program = Program::createFromGenBinary(&context,
|
||||
src.resource.data(),
|
||||
src.resource.size(),
|
||||
true,
|
||||
&retVal);
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
DEBUG_BREAK_IF(!program);
|
||||
|
||||
retVal = program->processGenBinary();
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
|
||||
schedulerBuiltIn.pProgram = program;
|
||||
|
||||
auto kernelInfo = schedulerBuiltIn.pProgram->getKernelInfo(SchedulerKernel::schedulerName);
|
||||
DEBUG_BREAK_IF(!kernelInfo);
|
||||
|
||||
schedulerBuiltIn.pKernel = Kernel::create<SchedulerKernel>(
|
||||
schedulerBuiltIn.pProgram,
|
||||
*kernelInfo,
|
||||
&retVal);
|
||||
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
};
|
||||
std::call_once(schedulerBuiltIn.programIsInitialized, initializeSchedulerProgramAndKernel);
|
||||
|
||||
return *static_cast<SchedulerKernel *>(schedulerBuiltIn.pKernel);
|
||||
}
|
||||
|
||||
SipKernel &BuiltIns::getSipKernel(SipKernelType type, Context &context) {
|
||||
uint32_t kernelId = static_cast<uint32_t>(type);
|
||||
UNRECOVERABLE_IF(kernelId >= static_cast<uint32_t>(SipKernelType::COUNT));
|
||||
auto &sipBuiltIn = this->sipKernels[kernelId];
|
||||
|
||||
auto initializer = [&] {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
std::vector<char> sipBinary;
|
||||
auto compilerInteface = CompilerInterface::getInstance();
|
||||
UNRECOVERABLE_IF(compilerInteface == nullptr);
|
||||
|
||||
auto ret = compilerInteface->getSipKernelBinary(type, *context.getDevice(0), sipBinary);
|
||||
|
||||
UNRECOVERABLE_IF(ret != CL_SUCCESS);
|
||||
UNRECOVERABLE_IF(sipBinary.size() == 0);
|
||||
auto program = Program::createFromGenBinary(&context,
|
||||
sipBinary.data(),
|
||||
sipBinary.size(),
|
||||
true,
|
||||
&retVal);
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
UNRECOVERABLE_IF(program == nullptr);
|
||||
|
||||
retVal = program->processGenBinary();
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
|
||||
auto kernelInfo = program->getKernelInfo(size_t{0});
|
||||
UNRECOVERABLE_IF(kernelInfo == nullptr);
|
||||
|
||||
uint32_t sipOffset = kernelInfo->systemKernelOffset;
|
||||
UNRECOVERABLE_IF(sipOffset >= kernelInfo->heapInfo.pKernelHeader->KernelHeapSize)
|
||||
sipBuiltIn.first.reset(new SipKernel(type, ptrOffset(kernelInfo->heapInfo.pKernelHeap, sipOffset),
|
||||
kernelInfo->heapInfo.pKernelHeader->KernelHeapSize - sipOffset));
|
||||
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
program->release();
|
||||
};
|
||||
std::call_once(sipBuiltIn.second, initializer);
|
||||
UNRECOVERABLE_IF(sipBuiltIn.first == nullptr);
|
||||
return *sipBuiltIn.first;
|
||||
}
|
||||
|
||||
// VME:
|
||||
static const char *blockMotionEstimateIntelSrc = {
|
||||
#include "kernels/vme_block_motion_estimate_intel_frontend.igdrcl_built_in"
|
||||
};
|
||||
|
||||
static const char *blockAdvancedMotionEstimateCheckIntelSrc = {
|
||||
#include "kernels/vme_block_advanced_motion_estimate_check_intel_frontend.igdrcl_built_in"
|
||||
};
|
||||
|
||||
static const char *blockAdvancedMotionEstimateBidirectionalCheckIntelSrc = {
|
||||
#include "kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel_frontend.igdrcl_built_in"
|
||||
};
|
||||
|
||||
// VEBOX:
|
||||
static const char *veEnhanceIntelSrc = {
|
||||
#include "kernels/vebox_ve_enhance_intel.igdrcl_built_in"
|
||||
};
|
||||
|
||||
static const char *veDnEnhanceIntelSrc = {
|
||||
#include "kernels/vebox_ve_dn_enhance_intel.igdrcl_built_in"
|
||||
};
|
||||
|
||||
static const char *veDnDiEnhanceIntelSrc = {
|
||||
#include "kernels/vebox_ve_dn_di_enhance_intel.igdrcl_built_in"
|
||||
};
|
||||
|
||||
static const std::tuple<const char *, const char *> mediaBuiltIns[] = {
|
||||
std::make_tuple("block_motion_estimate_intel", blockMotionEstimateIntelSrc),
|
||||
std::make_tuple("block_advanced_motion_estimate_check_intel", blockAdvancedMotionEstimateCheckIntelSrc),
|
||||
std::make_tuple("block_advanced_motion_estimate_bidirectional_check_intel", blockAdvancedMotionEstimateBidirectionalCheckIntelSrc),
|
||||
std::make_tuple("ve_enhance_intel", veEnhanceIntelSrc),
|
||||
std::make_tuple("ve_dn_enhance_intel", veDnEnhanceIntelSrc),
|
||||
std::make_tuple("ve_dn_di_enhance_intel", veDnDiEnhanceIntelSrc),
|
||||
};
|
||||
|
||||
// Unlike other built-ins media kernels are not stored in BuiltIns object.
|
||||
// Pointer to program with built in kernels is returned to the user through API
|
||||
// call and user is responsible for releasing it by calling clReleaseProgram.
|
||||
Program *BuiltIns::createBuiltInProgram(
|
||||
Context &context,
|
||||
Device &device,
|
||||
const char *kernelNames,
|
||||
int &errcodeRet) {
|
||||
std::string programSourceStr = "";
|
||||
std::istringstream ss(kernelNames);
|
||||
std::string currentKernelName;
|
||||
|
||||
while (std::getline(ss, currentKernelName, ';')) {
|
||||
bool found = false;
|
||||
for (auto &builtInTuple : mediaBuiltIns) {
|
||||
if (currentKernelName == std::get<0>(builtInTuple)) {
|
||||
programSourceStr += std::get<1>(builtInTuple);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
errcodeRet = CL_INVALID_VALUE;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
if (programSourceStr.empty() == true) {
|
||||
errcodeRet = CL_INVALID_VALUE;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Program *pBuiltInProgram = nullptr;
|
||||
|
||||
pBuiltInProgram = Program::create(programSourceStr.c_str(), &context, device, true, nullptr);
|
||||
|
||||
if (pBuiltInProgram) {
|
||||
std::unordered_map<std::string, BuiltinDispatchInfoBuilder *> builtinsBuilders;
|
||||
builtinsBuilders["block_motion_estimate_intel"] =
|
||||
&BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, context, device);
|
||||
builtinsBuilders["block_advanced_motion_estimate_check_intel"] =
|
||||
&BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, context, device);
|
||||
builtinsBuilders["block_advanced_motion_estimate_bidirectional_check_intel"] =
|
||||
&BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, context, device);
|
||||
const cl_device_id clDevice = &device;
|
||||
errcodeRet = pBuiltInProgram->build(
|
||||
clDevice,
|
||||
mediaKernelsBuildOptions,
|
||||
enableCacheing,
|
||||
builtinsBuilders);
|
||||
} else {
|
||||
errcodeRet = CL_INVALID_VALUE;
|
||||
}
|
||||
return pBuiltInProgram;
|
||||
}
|
||||
|
||||
void BuiltinDispatchInfoBuilder::takeOwnership(Context *context) {
|
||||
for (auto &k : usedKernels) {
|
||||
k->takeOwnership(true);
|
||||
k->setContext(context);
|
||||
}
|
||||
}
|
||||
|
||||
void BuiltinDispatchInfoBuilder::releaseOwnership() {
|
||||
for (auto &k : usedKernels) {
|
||||
k->setContext(nullptr);
|
||||
k->releaseOwnership();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... KernelsDescArgsT>
|
||||
void BuiltinDispatchInfoBuilder::populate(Context &context, Device &device, EBuiltInOps op, const char *options, KernelsDescArgsT &&... desc) {
|
||||
auto src = kernelsLib.getBuiltinsLib().getBuiltinCode(op, BuiltinCode::ECodeType::Any, device);
|
||||
prog.reset(BuiltinsLib::createProgramFromCode(src, context, device).release());
|
||||
prog->build(0, nullptr, options, nullptr, nullptr, kernelsLib.isCacheingEnabled());
|
||||
grabKernels(std::forward<KernelsDescArgsT>(desc)...);
|
||||
}
|
||||
|
||||
template <typename HWFamily>
|
||||
class BuiltInOp<HWFamily, EBuiltInOps::CopyBufferToBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernLeftLeftover(nullptr), kernMiddle(nullptr), kernRightLeftover(nullptr) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyBufferToBuffer,
|
||||
"",
|
||||
"CopyBufferToBufferLeftLeftover", kernLeftLeftover,
|
||||
"CopyBufferToBufferMiddle", kernMiddle,
|
||||
"CopyBufferToBufferRightLeftover", kernRightLeftover);
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d1D, SplitDispatch::SplitMode::KernelSplit> kernelSplit1DBuilder;
|
||||
|
||||
uintptr_t start = reinterpret_cast<uintptr_t>(operationParams.dstPtr) + operationParams.dstOffset.x;
|
||||
|
||||
size_t middleAlignment = MemoryConstants::cacheLineSize;
|
||||
size_t middleElSize = sizeof(uint32_t) * 4;
|
||||
|
||||
uintptr_t leftSize = start % middleAlignment;
|
||||
leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; // calc left leftover size
|
||||
leftSize = std::min(leftSize, operationParams.size.x); // clamp left leftover size to requested size
|
||||
|
||||
uintptr_t rightSize = (start + operationParams.size.x) % middleAlignment; // calc right leftover size
|
||||
rightSize = std::min(rightSize, operationParams.size.x - leftSize); // clamp
|
||||
|
||||
uintptr_t middleSizeBytes = operationParams.size.x - leftSize - rightSize; // calc middle size
|
||||
|
||||
if (!isAligned<4>(reinterpret_cast<uintptr_t>(operationParams.srcPtr) + operationParams.srcOffset.x + leftSize)) {
|
||||
//corner case - src relative to dst does not have DWORD alignment
|
||||
leftSize += middleSizeBytes;
|
||||
middleSizeBytes = 0;
|
||||
}
|
||||
|
||||
auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker
|
||||
|
||||
// Set-up ISA
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover);
|
||||
|
||||
// Set-up common kernel args
|
||||
if (operationParams.srcSvmAlloc) {
|
||||
kernelSplit1DBuilder.setArgSvmAlloc(0, operationParams.srcPtr, operationParams.srcSvmAlloc);
|
||||
} else if (operationParams.srcMemObj) {
|
||||
kernelSplit1DBuilder.setArg(0, operationParams.srcMemObj);
|
||||
} else {
|
||||
kernelSplit1DBuilder.setArgSvm(0, operationParams.size.x, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY);
|
||||
}
|
||||
if (operationParams.dstSvmAlloc) {
|
||||
kernelSplit1DBuilder.setArgSvmAlloc(1, operationParams.dstPtr, operationParams.dstSvmAlloc);
|
||||
} else if (operationParams.dstMemObj) {
|
||||
kernelSplit1DBuilder.setArg(1, operationParams.dstMemObj);
|
||||
} else {
|
||||
kernelSplit1DBuilder.setArgSvm(1, operationParams.size.x, operationParams.dstPtr);
|
||||
}
|
||||
|
||||
// Set-up srcOffset
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 2, static_cast<uint32_t>(operationParams.srcOffset.x));
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 2, static_cast<uint32_t>(operationParams.srcOffset.x + leftSize));
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 2, static_cast<uint32_t>(operationParams.srcOffset.x + leftSize + middleSizeBytes));
|
||||
|
||||
// Set-up dstOffset
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast<uint32_t>(operationParams.dstOffset.x));
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 3, static_cast<uint32_t>(operationParams.dstOffset.x + leftSize));
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 3, static_cast<uint32_t>(operationParams.dstOffset.x + leftSize + middleSizeBytes));
|
||||
|
||||
// Set-up work sizes
|
||||
// Note for split walker, it would be just builder.SetDipatchGeometry(GWS, ELWS, OFFSET)
|
||||
kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, Vec3<size_t>{leftSize, 0, 0}, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Middle, Vec3<size_t>{middleSizeEls, 0, 0}, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Right, Vec3<size_t>{rightSize, 0, 0}, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelSplit1DBuilder.bake(multiDispatchInfo);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernLeftLeftover;
|
||||
Kernel *kernMiddle;
|
||||
Kernel *kernRightLeftover;
|
||||
};
|
||||
|
||||
template <typename HWFamily>
|
||||
class BuiltInOp<HWFamily, EBuiltInOps::CopyBufferRect> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernelBytes{nullptr} {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyBufferRect,
|
||||
"",
|
||||
"CopyBufferRectBytes2d", kernelBytes[0],
|
||||
"CopyBufferRectBytes2d", kernelBytes[1],
|
||||
"CopyBufferRectBytes3d", kernelBytes[2]);
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::NoSplit> kernelNoSplit3DBuilder;
|
||||
|
||||
size_t hostPtrSize = 0;
|
||||
bool is3D = false;
|
||||
|
||||
if (operationParams.srcMemObj && operationParams.dstMemObj) {
|
||||
DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && (operationParams.dstPtr == nullptr)));
|
||||
is3D = (operationParams.size.z > 1) || (operationParams.srcOffset.z > 0) || (operationParams.dstOffset.z > 0);
|
||||
} else {
|
||||
if (operationParams.srcPtr) {
|
||||
size_t origin[] = {operationParams.srcOffset.x, operationParams.srcOffset.y, operationParams.srcOffset.z};
|
||||
size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z};
|
||||
hostPtrSize = Buffer::calculateHostPtrSize(origin, region, operationParams.srcRowPitch, operationParams.srcSlicePitch);
|
||||
is3D = (operationParams.size.z > 1) || (operationParams.dstOffset.z > 0);
|
||||
} else if (operationParams.dstPtr) {
|
||||
size_t origin[] = {operationParams.dstOffset.x, operationParams.dstOffset.y, operationParams.dstOffset.z};
|
||||
size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z};
|
||||
hostPtrSize = Buffer::calculateHostPtrSize(origin, region, operationParams.dstRowPitch, operationParams.dstSlicePitch);
|
||||
is3D = (operationParams.size.z > 1) || (operationParams.srcOffset.z > 0);
|
||||
} else {
|
||||
DEBUG_BREAK_IF(!false);
|
||||
}
|
||||
}
|
||||
|
||||
// Set-up ISA
|
||||
int dimensions = is3D ? 3 : 2;
|
||||
kernelNoSplit3DBuilder.setKernel(kernelBytes[dimensions - 1]);
|
||||
|
||||
// arg0 = src
|
||||
if (operationParams.srcMemObj) {
|
||||
kernelNoSplit3DBuilder.setArg(0, operationParams.srcMemObj);
|
||||
} else {
|
||||
kernelNoSplit3DBuilder.setArgSvm(0, hostPtrSize, is3D ? operationParams.srcPtr : ptrOffset(operationParams.srcPtr, operationParams.srcOffset.z * operationParams.srcSlicePitch));
|
||||
}
|
||||
|
||||
// arg1 = dst
|
||||
if (operationParams.dstMemObj) {
|
||||
kernelNoSplit3DBuilder.setArg(1, operationParams.dstMemObj);
|
||||
} else {
|
||||
kernelNoSplit3DBuilder.setArgSvm(1, hostPtrSize, is3D ? operationParams.dstPtr : ptrOffset(operationParams.dstPtr, operationParams.dstOffset.z * operationParams.dstSlicePitch));
|
||||
}
|
||||
|
||||
// arg2 = srcOrigin
|
||||
uint32_t kSrcOrigin[4] = {(uint32_t)operationParams.srcOffset.x, (uint32_t)operationParams.srcOffset.y, (uint32_t)operationParams.srcOffset.z, 0};
|
||||
kernelNoSplit3DBuilder.setArg(2, sizeof(uint32_t) * 4, kSrcOrigin);
|
||||
|
||||
// arg3 = dstOrigin
|
||||
uint32_t kDstOrigin[4] = {(uint32_t)operationParams.dstOffset.x, (uint32_t)operationParams.dstOffset.y, (uint32_t)operationParams.dstOffset.z, 0};
|
||||
kernelNoSplit3DBuilder.setArg(3, sizeof(uint32_t) * 4, kDstOrigin);
|
||||
|
||||
// arg4 = srcPitch
|
||||
uint32_t kSrcPitch[2] = {(uint32_t)operationParams.srcRowPitch, (uint32_t)operationParams.srcSlicePitch};
|
||||
kernelNoSplit3DBuilder.setArg(4, sizeof(uint32_t) * 2, kSrcPitch);
|
||||
|
||||
// arg5 = dstPitch
|
||||
uint32_t kDstPitch[2] = {(uint32_t)operationParams.dstRowPitch, (uint32_t)operationParams.dstSlicePitch};
|
||||
kernelNoSplit3DBuilder.setArg(5, sizeof(uint32_t) * 2, kDstPitch);
|
||||
|
||||
// Set-up work sizes
|
||||
kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelNoSplit3DBuilder.bake(multiDispatchInfo);
|
||||
|
||||
// Store source and destination surfaces for residency purposes
|
||||
if (operationParams.srcMemObj) {
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new MemObjSurface(operationParams.srcMemObj)));
|
||||
} else {
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new HostPtrSurface(operationParams.srcPtr, hostPtrSize)));
|
||||
}
|
||||
if (operationParams.dstMemObj) {
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new MemObjSurface(operationParams.dstMemObj)));
|
||||
} else {
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new HostPtrSurface(operationParams.dstPtr, hostPtrSize)));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernelBytes[3];
|
||||
};
|
||||
|
||||
template <typename HWFamily>
|
||||
class BuiltInOp<HWFamily, EBuiltInOps::FillBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernLeftLeftover(nullptr), kernMiddle(nullptr), kernRightLeftover(nullptr) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::FillBuffer,
|
||||
"",
|
||||
"FillBufferLeftLeftover", kernLeftLeftover,
|
||||
"FillBufferMiddle", kernMiddle,
|
||||
"FillBufferRightLeftover", kernRightLeftover);
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d1D, SplitDispatch::SplitMode::KernelSplit> kernelSplit1DBuilder;
|
||||
|
||||
uintptr_t start = reinterpret_cast<uintptr_t>(operationParams.dstPtr) + operationParams.dstOffset.x;
|
||||
|
||||
size_t middleAlignment = MemoryConstants::cacheLineSize;
|
||||
size_t middleElSize = sizeof(uint32_t);
|
||||
|
||||
uintptr_t leftSize = start % middleAlignment;
|
||||
leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; // calc left leftover size
|
||||
leftSize = std::min(leftSize, operationParams.size.x); // clamp left leftover size to requested size
|
||||
|
||||
uintptr_t rightSize = (start + operationParams.size.x) % middleAlignment; // calc right leftover size
|
||||
rightSize = std::min(rightSize, operationParams.size.x - leftSize); // clamp
|
||||
|
||||
uintptr_t middleSizeBytes = operationParams.size.x - leftSize - rightSize; // calc middle size
|
||||
|
||||
auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker
|
||||
|
||||
// Set-up ISA
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover);
|
||||
|
||||
DEBUG_BREAK_IF((operationParams.srcMemObj == nullptr) || (operationParams.srcOffset != 0));
|
||||
DEBUG_BREAK_IF((operationParams.dstMemObj == nullptr) && (operationParams.dstSvmAlloc == nullptr));
|
||||
|
||||
// Set-up dstMemObj with buffer
|
||||
if (operationParams.dstSvmAlloc) {
|
||||
kernelSplit1DBuilder.setArgSvmAlloc(0, operationParams.dstPtr, operationParams.dstSvmAlloc);
|
||||
} else {
|
||||
kernelSplit1DBuilder.setArg(0, operationParams.dstMemObj);
|
||||
}
|
||||
|
||||
// Set-up dstOffset
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 1, static_cast<uint32_t>(operationParams.dstOffset.x));
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 1, static_cast<uint32_t>(operationParams.dstOffset.x + leftSize));
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 1, static_cast<uint32_t>(operationParams.dstOffset.x + leftSize + middleSizeBytes));
|
||||
|
||||
// Set-up srcMemObj with pattern
|
||||
kernelSplit1DBuilder.setArgSvm(2, operationParams.srcMemObj->getSize(), operationParams.srcMemObj->getGraphicsAllocation()->getUnderlyingBuffer(), operationParams.srcMemObj->getGraphicsAllocation());
|
||||
|
||||
// Set-up patternSizeInEls
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast<uint32_t>(operationParams.srcMemObj->getSize()));
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 3, static_cast<uint32_t>(operationParams.srcMemObj->getSize() / middleElSize));
|
||||
kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 3, static_cast<uint32_t>(operationParams.srcMemObj->getSize()));
|
||||
|
||||
// Set-up work sizes
|
||||
// Note for split walker, it would be just builder.SetDipatchGeomtry(GWS, ELWS, OFFSET)
|
||||
kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, Vec3<size_t>{leftSize, 0, 0}, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Middle, Vec3<size_t>{middleSizeEls, 0, 0}, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Right, Vec3<size_t>{rightSize, 0, 0}, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelSplit1DBuilder.bake(multiDispatchInfo);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernLeftLeftover;
|
||||
Kernel *kernMiddle;
|
||||
Kernel *kernRightLeftover;
|
||||
};
|
||||
|
||||
template <typename HWFamily>
|
||||
class BuiltInOp<HWFamily, EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernelBytes{nullptr} {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyBufferToImage3d,
|
||||
"",
|
||||
"CopyBufferToImage3dBytes", kernelBytes[0],
|
||||
"CopyBufferToImage3d2Bytes", kernelBytes[1],
|
||||
"CopyBufferToImage3d4Bytes", kernelBytes[2],
|
||||
"CopyBufferToImage3d8Bytes", kernelBytes[3],
|
||||
"CopyBufferToImage3d16Bytes", kernelBytes[4]);
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::NoSplit> kernelNoSplit3DBuilder;
|
||||
|
||||
DEBUG_BREAK_IF(!(((operationParams.srcPtr != nullptr) || (operationParams.srcMemObj != nullptr)) && (operationParams.dstPtr == nullptr)));
|
||||
|
||||
auto dstImage = castToObjectOrAbort<Image>(operationParams.dstMemObj);
|
||||
|
||||
// Redescribe image to be byte-copy
|
||||
auto dstImageRedescribed = dstImage->redescribe();
|
||||
multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr<MemObj>(dstImageRedescribed)); // life range same as mdi's
|
||||
|
||||
// Calculate srcRowPitch and srcSlicePitch
|
||||
auto bytesPerPixel = dstImage->getSurfaceFormatInfo().ImageElementSizeInBytes;
|
||||
|
||||
size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z};
|
||||
|
||||
auto srcRowPitch = static_cast<uint32_t>(operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel);
|
||||
|
||||
auto srcSlicePitch = static_cast<uint32_t>(
|
||||
operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch));
|
||||
|
||||
// Determine size of host ptr surface for residency purposes
|
||||
size_t hostPtrSize = operationParams.srcPtr ? Image::calculateHostPtrSize(region, srcRowPitch, srcSlicePitch, bytesPerPixel, dstImage->getImageDesc().image_type) : 0;
|
||||
|
||||
// Set-up kernel
|
||||
auto bytesExponent = Math::log2(bytesPerPixel);
|
||||
DEBUG_BREAK_IF(bytesExponent >= 5);
|
||||
kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]);
|
||||
|
||||
// Set-up source host ptr / buffer
|
||||
if (operationParams.srcPtr) {
|
||||
kernelNoSplit3DBuilder.setArgSvm(0, hostPtrSize, operationParams.srcPtr);
|
||||
} else {
|
||||
kernelNoSplit3DBuilder.setArg(0, operationParams.srcMemObj);
|
||||
}
|
||||
|
||||
// Set-up destination image
|
||||
kernelNoSplit3DBuilder.setArg(1, dstImageRedescribed);
|
||||
|
||||
// Set-up srcOffset
|
||||
kernelNoSplit3DBuilder.setArg(2, static_cast<uint32_t>(operationParams.srcOffset.x));
|
||||
|
||||
// Set-up dstOrigin
|
||||
{
|
||||
uint32_t origin[] = {
|
||||
static_cast<uint32_t>(operationParams.dstOffset.x),
|
||||
static_cast<uint32_t>(operationParams.dstOffset.y),
|
||||
static_cast<uint32_t>(operationParams.dstOffset.z),
|
||||
0};
|
||||
kernelNoSplit3DBuilder.setArg(3, sizeof(origin), origin);
|
||||
}
|
||||
|
||||
// Set-up srcRowPitch
|
||||
{
|
||||
uint32_t pitch[] = {
|
||||
static_cast<uint32_t>(srcRowPitch),
|
||||
static_cast<uint32_t>(srcSlicePitch)};
|
||||
kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch);
|
||||
}
|
||||
|
||||
// Set-up work sizes
|
||||
kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelNoSplit3DBuilder.bake(multiDispatchInfo);
|
||||
|
||||
// Store source and destination surfaces for residency purposes
|
||||
if (operationParams.srcMemObj) {
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new MemObjSurface(operationParams.srcMemObj)));
|
||||
} else {
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<HostPtrSurface>(new HostPtrSurface(operationParams.srcPtr, hostPtrSize)));
|
||||
}
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new MemObjSurface(operationParams.dstMemObj)));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernelBytes[5];
|
||||
};
|
||||
|
||||
template <typename HWFamily>
|
||||
class BuiltInOp<HWFamily, EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernelBytes{nullptr} {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyImage3dToBuffer,
|
||||
"",
|
||||
"CopyImage3dToBufferBytes", kernelBytes[0],
|
||||
"CopyImage3dToBuffer2Bytes", kernelBytes[1],
|
||||
"CopyImage3dToBuffer4Bytes", kernelBytes[2],
|
||||
"CopyImage3dToBuffer8Bytes", kernelBytes[3],
|
||||
"CopyImage3dToBuffer16Bytes", kernelBytes[4]);
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::NoSplit> kernelNoSplit3DBuilder;
|
||||
|
||||
DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && ((operationParams.dstPtr != nullptr) || (operationParams.dstMemObj != nullptr))));
|
||||
|
||||
auto srcImage = castToObjectOrAbort<Image>(operationParams.srcMemObj);
|
||||
|
||||
// Redescribe image to be byte-copy
|
||||
auto srcImageRedescribed = srcImage->redescribe();
|
||||
multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr<MemObj>(srcImageRedescribed)); // life range same as mdi's
|
||||
|
||||
// Calculate dstRowPitch and dstSlicePitch
|
||||
auto bytesPerPixel = srcImage->getSurfaceFormatInfo().ImageElementSizeInBytes;
|
||||
|
||||
size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z};
|
||||
|
||||
auto dstRowPitch = static_cast<uint32_t>(operationParams.srcRowPitch ? operationParams.srcRowPitch : region[0] * bytesPerPixel);
|
||||
|
||||
auto dstSlicePitch = static_cast<uint32_t>(
|
||||
operationParams.srcSlicePitch ? operationParams.srcSlicePitch : ((srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch));
|
||||
|
||||
// Determine size of host ptr surface for residency purposes
|
||||
size_t hostPtrSize = operationParams.dstPtr ? Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, srcImage->getImageDesc().image_type) : 0;
|
||||
|
||||
// Set-up ISA
|
||||
auto bytesExponent = Math::log2(bytesPerPixel);
|
||||
DEBUG_BREAK_IF(bytesExponent >= 5);
|
||||
kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]);
|
||||
|
||||
// Set-up source image
|
||||
kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed);
|
||||
|
||||
// Set-up destination host ptr / buffer
|
||||
if (operationParams.dstPtr) {
|
||||
kernelNoSplit3DBuilder.setArgSvm(1, hostPtrSize, operationParams.dstPtr);
|
||||
} else {
|
||||
kernelNoSplit3DBuilder.setArg(1, operationParams.dstMemObj);
|
||||
}
|
||||
|
||||
// Set-up srcOrigin
|
||||
{
|
||||
uint32_t origin[] = {
|
||||
static_cast<uint32_t>(operationParams.srcOffset.x),
|
||||
static_cast<uint32_t>(operationParams.srcOffset.y),
|
||||
static_cast<uint32_t>(operationParams.srcOffset.z),
|
||||
0};
|
||||
kernelNoSplit3DBuilder.setArg(2, sizeof(origin), origin);
|
||||
}
|
||||
|
||||
// Set-up dstOffset
|
||||
kernelNoSplit3DBuilder.setArg(3, static_cast<uint32_t>(operationParams.dstOffset.x));
|
||||
|
||||
// Set-up dstRowPitch
|
||||
{
|
||||
uint32_t pitch[] = {
|
||||
static_cast<uint32_t>(dstRowPitch),
|
||||
static_cast<uint32_t>(dstSlicePitch)};
|
||||
kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch);
|
||||
}
|
||||
|
||||
// Set-up work sizes
|
||||
kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelNoSplit3DBuilder.bake(multiDispatchInfo);
|
||||
|
||||
// Store source and destination surfaces for residency purposes
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new MemObjSurface(operationParams.srcMemObj)));
|
||||
if (operationParams.dstMemObj) {
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new MemObjSurface(operationParams.dstMemObj)));
|
||||
} else {
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new HostPtrSurface(operationParams.dstPtr, hostPtrSize)));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernelBytes[5];
|
||||
};
|
||||
|
||||
template <typename HWFamily>
|
||||
class BuiltInOp<HWFamily, EBuiltInOps::CopyImageToImage3d> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernel(nullptr) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyImageToImage3d,
|
||||
"",
|
||||
"CopyImageToImage3d", kernel);
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::NoSplit> kernelNoSplit3DBuilder;
|
||||
|
||||
DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && (operationParams.dstPtr == nullptr)));
|
||||
|
||||
auto srcImage = castToObjectOrAbort<Image>(operationParams.srcMemObj);
|
||||
auto dstImage = castToObjectOrAbort<Image>(operationParams.dstMemObj);
|
||||
|
||||
// Redescribe images to be byte-copies
|
||||
auto srcImageRedescribed = srcImage->redescribe();
|
||||
auto dstImageRedescribed = dstImage->redescribe();
|
||||
multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr<MemObj>(srcImageRedescribed)); // life range same as mdi's
|
||||
multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr<MemObj>(dstImageRedescribed)); // life range same as mdi's
|
||||
|
||||
// Set-up kernel
|
||||
kernelNoSplit3DBuilder.setKernel(kernel);
|
||||
|
||||
// Set-up source image
|
||||
kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed);
|
||||
|
||||
// Set-up destination image
|
||||
kernelNoSplit3DBuilder.setArg(1, dstImageRedescribed);
|
||||
|
||||
// Set-up srcOrigin
|
||||
{
|
||||
uint32_t origin[] = {
|
||||
static_cast<uint32_t>(operationParams.srcOffset.x),
|
||||
static_cast<uint32_t>(operationParams.srcOffset.y),
|
||||
static_cast<uint32_t>(operationParams.srcOffset.z),
|
||||
0};
|
||||
kernelNoSplit3DBuilder.setArg(2, sizeof(origin), origin);
|
||||
}
|
||||
|
||||
// Set-up dstOrigin
|
||||
{
|
||||
uint32_t origin[] = {
|
||||
static_cast<uint32_t>(operationParams.dstOffset.x),
|
||||
static_cast<uint32_t>(operationParams.dstOffset.y),
|
||||
static_cast<uint32_t>(operationParams.dstOffset.z),
|
||||
0};
|
||||
kernelNoSplit3DBuilder.setArg(3, sizeof(origin), origin);
|
||||
}
|
||||
|
||||
// Set-up work sizes
|
||||
kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelNoSplit3DBuilder.bake(multiDispatchInfo);
|
||||
|
||||
// Store source and destination surfaces for residency purposes
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new MemObjSurface(srcImage)));
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new MemObjSurface(dstImage)));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernel;
|
||||
};
|
||||
|
||||
template <typename HWFamily>
|
||||
class BuiltInOp<HWFamily, EBuiltInOps::FillImage3d> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernel(nullptr) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::FillImage3d,
|
||||
"",
|
||||
"FillImage3d", kernel);
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::NoSplit> kernelNoSplit3DBuilder;
|
||||
|
||||
DEBUG_BREAK_IF(!((operationParams.srcMemObj == nullptr) && (operationParams.srcPtr != nullptr) && (operationParams.dstPtr == nullptr)));
|
||||
|
||||
auto image = castToObjectOrAbort<Image>(operationParams.dstMemObj);
|
||||
|
||||
// Redescribe image to be byte-copy
|
||||
auto imageRedescribed = image->redescribeFillImage();
|
||||
multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr<MemObj>(imageRedescribed));
|
||||
|
||||
// Set-up kernel
|
||||
kernelNoSplit3DBuilder.setKernel(kernel);
|
||||
|
||||
// Set-up destination image
|
||||
kernelNoSplit3DBuilder.setArg(0, imageRedescribed);
|
||||
|
||||
// Set-up fill color
|
||||
int iFillColor[4] = {0};
|
||||
const void *fillColor = operationParams.srcPtr;
|
||||
convertFillColor(fillColor,
|
||||
iFillColor,
|
||||
image->getSurfaceFormatInfo().OCLImageFormat,
|
||||
imageRedescribed->getSurfaceFormatInfo().OCLImageFormat);
|
||||
kernelNoSplit3DBuilder.setArg(1, 4 * sizeof(int32_t), iFillColor);
|
||||
|
||||
// Set-up dstOffset
|
||||
{
|
||||
uint32_t offset[] = {
|
||||
static_cast<uint32_t>(operationParams.dstOffset.x),
|
||||
static_cast<uint32_t>(operationParams.dstOffset.y),
|
||||
static_cast<uint32_t>(operationParams.dstOffset.z),
|
||||
0};
|
||||
kernelNoSplit3DBuilder.setArg(2, sizeof(offset), offset);
|
||||
}
|
||||
|
||||
// Set-up work sizes
|
||||
kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
kernelNoSplit3DBuilder.bake(multiDispatchInfo);
|
||||
|
||||
// Store destination surface for residency purposes
|
||||
multiDispatchInfo.pushUsedSurface(std::unique_ptr<Surface>(new MemObjSurface(image)));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernel;
|
||||
};
|
||||
|
||||
BuiltinDispatchInfoBuilder &BuiltIns::getBuiltinDispatchInfoBuilder(EBuiltInOps operation, Context &context, Device &device) {
|
||||
uint32_t operationId = static_cast<uint32_t>(operation);
|
||||
auto &operationBuilder = BuiltinOpsBuilders[operationId];
|
||||
switch (operation) {
|
||||
default:
|
||||
throw std::runtime_error("getBuiltinDispatchInfoBuilder failed");
|
||||
case EBuiltInOps::CopyBufferToBuffer:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first.reset(new BuiltInOp<HWFamily, EBuiltInOps::CopyBufferToBuffer>(*this, context, device)); });
|
||||
break;
|
||||
case EBuiltInOps::CopyBufferRect:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first.reset(new BuiltInOp<HWFamily, EBuiltInOps::CopyBufferRect>(*this, context, device)); });
|
||||
break;
|
||||
case EBuiltInOps::FillBuffer:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first.reset(new BuiltInOp<HWFamily, EBuiltInOps::FillBuffer>(*this, context, device)); });
|
||||
break;
|
||||
case EBuiltInOps::CopyBufferToImage3d:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first.reset(new BuiltInOp<HWFamily, EBuiltInOps::CopyBufferToImage3d>(*this, context, device)); });
|
||||
break;
|
||||
case EBuiltInOps::CopyImage3dToBuffer:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first.reset(new BuiltInOp<HWFamily, EBuiltInOps::CopyImage3dToBuffer>(*this, context, device)); });
|
||||
break;
|
||||
case EBuiltInOps::CopyImageToImage3d:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first.reset(new BuiltInOp<HWFamily, EBuiltInOps::CopyImageToImage3d>(*this, context, device)); });
|
||||
break;
|
||||
case EBuiltInOps::FillImage3d:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first.reset(new BuiltInOp<HWFamily, EBuiltInOps::FillImage3d>(*this, context, device)); });
|
||||
break;
|
||||
case EBuiltInOps::VmeBlockMotionEstimateIntel:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first.reset(new BuiltInOp<HWFamily, EBuiltInOps::VmeBlockMotionEstimateIntel>(*this, context, device)); });
|
||||
break;
|
||||
case EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first.reset(new BuiltInOp<HWFamily, EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel>(*this, context, device)); });
|
||||
break;
|
||||
case EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first.reset(new BuiltInOp<HWFamily, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel>(*this, context, device)); });
|
||||
break;
|
||||
}
|
||||
return *operationBuilder.first;
|
||||
}
|
||||
|
||||
std::unique_ptr<BuiltinDispatchInfoBuilder> BuiltIns::setBuiltinDispatchInfoBuilder(EBuiltInOps operation, Context &context, Device &device, std::unique_ptr<BuiltinDispatchInfoBuilder> builder) {
|
||||
uint32_t operationId = static_cast<uint32_t>(operation);
|
||||
auto &operationBuilder = BuiltinOpsBuilders[operationId];
|
||||
operationBuilder.first.swap(builder);
|
||||
return builder;
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
301
runtime/built_ins/built_ins.h
Normal file
301
runtime/built_ins/built_ins.h
Normal file
@@ -0,0 +1,301 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/built_ins/sip.h"
|
||||
#include "runtime/scheduler/scheduler_kernel.h"
|
||||
#include "runtime/program/program.h"
|
||||
#include "runtime/utilities/vec.h"
|
||||
#include "runtime/os_interface/os_inc.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
|
||||
namespace OCLRT {
|
||||
typedef std::vector<char> BuiltinResourceT;
|
||||
|
||||
extern const char *mediaKernelsBuildOptions;
|
||||
|
||||
enum class EBuiltInOps : uint32_t {
|
||||
CopyBufferToBuffer = 0,
|
||||
CopyBufferRect,
|
||||
FillBuffer,
|
||||
CopyBufferToImage3d,
|
||||
CopyImage3dToBuffer,
|
||||
CopyImageToImage1d,
|
||||
CopyImageToImage2d,
|
||||
CopyImageToImage3d,
|
||||
FillImage1d,
|
||||
FillImage2d,
|
||||
FillImage3d,
|
||||
VmeBlockMotionEstimateIntel,
|
||||
VmeBlockAdvancedMotionEstimateCheckIntel,
|
||||
VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel,
|
||||
Scheduler,
|
||||
COUNT
|
||||
};
|
||||
|
||||
BuiltinResourceT createBuiltinResource(const char *ptr, size_t size);
|
||||
BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r);
|
||||
std::string createBuiltinResourceName(EBuiltInOps builtin, const std::string &extension,
|
||||
const std::string &platformName = "", uint32_t deviceRevId = 0);
|
||||
std::string joinPath(const std::string &lhs, const std::string &rhs);
|
||||
const char *getBuiltinAsString(EBuiltInOps builtin);
|
||||
|
||||
class Storage {
|
||||
public:
|
||||
Storage(const std::string &rootPath)
|
||||
: rootPath(rootPath) {
|
||||
}
|
||||
|
||||
BuiltinResourceT load(const std::string &resourceName);
|
||||
|
||||
protected:
|
||||
virtual BuiltinResourceT loadImpl(const std::string &fullResourceName) = 0;
|
||||
|
||||
std::string rootPath;
|
||||
};
|
||||
|
||||
class FileStorage : public Storage {
|
||||
public:
|
||||
FileStorage(const std::string &rootPath = "")
|
||||
: Storage(rootPath) {
|
||||
}
|
||||
|
||||
protected:
|
||||
BuiltinResourceT loadImpl(const std::string &fullResourceName) override;
|
||||
};
|
||||
|
||||
struct EmbeddedStorageRegistry {
|
||||
static EmbeddedStorageRegistry &getInstance() {
|
||||
static EmbeddedStorageRegistry gsr;
|
||||
return gsr;
|
||||
}
|
||||
|
||||
void store(const std::string &name, BuiltinResourceT &&resource) {
|
||||
resources.emplace(name, BuiltinResourceT(std::move(resource)));
|
||||
}
|
||||
|
||||
const BuiltinResourceT *get(const std::string &name) const;
|
||||
|
||||
private:
|
||||
using ResourcesContainer = std::unordered_map<std::string, BuiltinResourceT>;
|
||||
ResourcesContainer resources;
|
||||
};
|
||||
|
||||
class EmbeddedStorage : public Storage {
|
||||
public:
|
||||
EmbeddedStorage(const std::string &rootPath)
|
||||
: Storage(rootPath) {
|
||||
}
|
||||
|
||||
protected:
|
||||
BuiltinResourceT loadImpl(const std::string &fullResourceName) override;
|
||||
};
|
||||
|
||||
struct BuiltinCode {
|
||||
enum class ECodeType {
|
||||
Any = 0, // for requesting "any" code available - priorities as below
|
||||
Binary = 1, // ISA - highest priority
|
||||
Intermediate = 2, // SPIR/LLVM - medium prioroty
|
||||
Source = 3, // OCL C - lowest priority
|
||||
COUNT,
|
||||
INVALID
|
||||
};
|
||||
|
||||
static const char *getExtension(ECodeType ct) {
|
||||
switch (ct) {
|
||||
default:
|
||||
return "";
|
||||
case ECodeType::Binary:
|
||||
return ".bin";
|
||||
case ECodeType::Intermediate:
|
||||
return ".bc";
|
||||
case ECodeType::Source:
|
||||
return ".cl";
|
||||
}
|
||||
}
|
||||
|
||||
ECodeType type;
|
||||
BuiltinResourceT resource;
|
||||
Device *targetDevice;
|
||||
};
|
||||
|
||||
class BuiltinsLib {
|
||||
public:
|
||||
BuiltinsLib();
|
||||
BuiltinCode getBuiltinCode(EBuiltInOps builtin, BuiltinCode::ECodeType requestedCodeType, Device &device);
|
||||
|
||||
static std::unique_ptr<Program> createProgramFromCode(const BuiltinCode &bc, Context &context, Device &device);
|
||||
|
||||
protected:
|
||||
BuiltinResourceT getBuiltinResource(EBuiltInOps builtin, BuiltinCode::ECodeType requestedCodeType, Device &device);
|
||||
|
||||
using StoragesContainerT = std::vector<std::unique_ptr<Storage>>;
|
||||
StoragesContainerT allStorages; // sorted by priority allStorages[0] will be checked before allStorages[1], etc.
|
||||
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
class Context;
|
||||
class Device;
|
||||
class Kernel;
|
||||
class Program;
|
||||
|
||||
struct BuiltInKernel {
|
||||
const char *pSource = nullptr;
|
||||
Program *pProgram = nullptr;
|
||||
std::once_flag programIsInitialized; // guard for creating+building the program
|
||||
Kernel *pKernel = nullptr;
|
||||
|
||||
BuiltInKernel() {
|
||||
}
|
||||
};
|
||||
|
||||
class BuiltinDispatchInfoBuilder;
|
||||
|
||||
class BuiltIns {
|
||||
public:
|
||||
using HWFamily = int;
|
||||
std::pair<std::unique_ptr<BuiltinDispatchInfoBuilder>, std::once_flag> BuiltinOpsBuilders[static_cast<uint32_t>(EBuiltInOps::COUNT)];
|
||||
|
||||
BuiltinDispatchInfoBuilder &getBuiltinDispatchInfoBuilder(EBuiltInOps op, Context &context, Device &device);
|
||||
std::unique_ptr<BuiltinDispatchInfoBuilder> setBuiltinDispatchInfoBuilder(EBuiltInOps op, Context &context, Device &device,
|
||||
std::unique_ptr<BuiltinDispatchInfoBuilder> newBuilder);
|
||||
|
||||
static BuiltIns &getInstance();
|
||||
static void shutDown();
|
||||
Program *createBuiltInProgram(
|
||||
Context &context,
|
||||
Device &device,
|
||||
const char *kernelNames,
|
||||
int &errcodeRet);
|
||||
|
||||
SchedulerKernel &getSchedulerKernel(Context &context);
|
||||
|
||||
SipKernel &getSipKernel(SipKernelType kernel, Context &context);
|
||||
|
||||
BuiltinsLib &getBuiltinsLib() {
|
||||
DEBUG_BREAK_IF(!builtinsLib.get());
|
||||
return *builtinsLib;
|
||||
}
|
||||
|
||||
void setCacheingEnableState(bool enableCacheing) {
|
||||
this->enableCacheing = enableCacheing;
|
||||
}
|
||||
|
||||
bool isCacheingEnabled() const {
|
||||
return this->enableCacheing;
|
||||
}
|
||||
|
||||
protected:
|
||||
BuiltIns();
|
||||
~BuiltIns();
|
||||
|
||||
// singleton
|
||||
static BuiltIns *pInstance;
|
||||
|
||||
// scheduler kernel
|
||||
BuiltInKernel schedulerBuiltIn;
|
||||
|
||||
// sip builtins
|
||||
std::pair<std::unique_ptr<SipKernel>, std::once_flag> sipKernels[static_cast<uint32_t>(SipKernelType::COUNT)];
|
||||
|
||||
std::unique_ptr<BuiltinsLib> builtinsLib;
|
||||
|
||||
using ProgramsContainerT = std::array<std::pair<std::unique_ptr<Program>, std::once_flag>, static_cast<size_t>(EBuiltInOps::COUNT)>;
|
||||
ProgramsContainerT builtinPrograms;
|
||||
bool enableCacheing = true;
|
||||
};
|
||||
|
||||
class MemObj;
|
||||
|
||||
class BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
struct BuiltinOpParams {
|
||||
void *srcPtr = nullptr;
|
||||
void *dstPtr = nullptr;
|
||||
MemObj *srcMemObj = nullptr;
|
||||
MemObj *dstMemObj = nullptr;
|
||||
GraphicsAllocation *srcSvmAlloc = nullptr;
|
||||
GraphicsAllocation *dstSvmAlloc = nullptr;
|
||||
Vec3<size_t> srcOffset = {0, 0, 0};
|
||||
Vec3<size_t> dstOffset = {0, 0, 0};
|
||||
Vec3<size_t> size = {0, 0, 0};
|
||||
size_t srcRowPitch = 0;
|
||||
size_t dstRowPitch = 0;
|
||||
size_t srcSlicePitch = 0;
|
||||
size_t dstSlicePitch = 0;
|
||||
};
|
||||
|
||||
BuiltinDispatchInfoBuilder(BuiltIns &kernelLib) : kernelsLib(kernelLib) {}
|
||||
|
||||
template <typename... KernelsDescArgsT>
|
||||
void populate(Context &context, Device &device, EBuiltInOps operation, const char *options, KernelsDescArgsT &&... desc);
|
||||
|
||||
virtual bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const {
|
||||
return false;
|
||||
}
|
||||
virtual bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, Kernel *kernel,
|
||||
const uint32_t dim, const Vec3<size_t> &gws, const Vec3<size_t> &elws, const Vec3<size_t> &offset) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual cl_int validateDispatch(Kernel *kernel, uint32_t inworkDim, const Vec3<size_t> &gws, const Vec3<size_t> &elws, const Vec3<size_t> &offset) const {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
// returns true if argument should be updated in kernel exposed to user code
|
||||
virtual bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void takeOwnership(Context *context);
|
||||
void releaseOwnership();
|
||||
|
||||
protected:
|
||||
template <typename KernelNameT, typename... KernelsDescArgsT>
|
||||
void grabKernels(KernelNameT &&kernelName, Kernel *&kernelDst, KernelsDescArgsT &&... kernelsDesc) {
|
||||
const KernelInfo *ki = prog->getKernelInfo(kernelName);
|
||||
cl_int err = 0;
|
||||
kernelDst = Kernel::create(prog.get(), *ki, &err);
|
||||
kernelDst->isBuiltIn = true;
|
||||
usedKernels.push_back(std::unique_ptr<Kernel>(kernelDst));
|
||||
grabKernels(std::forward<KernelsDescArgsT>(kernelsDesc)...);
|
||||
}
|
||||
|
||||
cl_int grabKernels() { return CL_SUCCESS; }
|
||||
|
||||
std::unique_ptr<Program> prog;
|
||||
std::vector<std::unique_ptr<Kernel>> usedKernels;
|
||||
BuiltIns &kernelsLib;
|
||||
};
|
||||
|
||||
template <typename HWFamily, EBuiltInOps OpCode>
|
||||
class BuiltInOp;
|
||||
|
||||
} // namespace OCLRT
|
||||
216
runtime/built_ins/built_ins_storage.cpp
Normal file
216
runtime/built_ins/built_ins_storage.cpp
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
const char *getBuiltinAsString(EBuiltInOps builtin) {
|
||||
switch (builtin) {
|
||||
default:
|
||||
return "unknown";
|
||||
case EBuiltInOps::CopyBufferToBuffer:
|
||||
return "copy_buffer_to_buffer.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyBufferRect:
|
||||
return "copy_buffer_rect.igdrcl_built_in";
|
||||
case EBuiltInOps::FillBuffer:
|
||||
return "fill_buffer.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyBufferToImage3d:
|
||||
return "copy_buffer_to_image3d.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImage3dToBuffer:
|
||||
return "copy_image3d_to_buffer.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImageToImage1d:
|
||||
return "copy_image_to_image1d.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImageToImage2d:
|
||||
return "copy_image_to_image2d.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImageToImage3d:
|
||||
return "copy_image_to_image3d.igdrcl_built_in";
|
||||
case EBuiltInOps::FillImage1d:
|
||||
return "fill_image1d.igdrcl_built_in";
|
||||
case EBuiltInOps::FillImage2d:
|
||||
return "fill_image2d.igdrcl_built_in";
|
||||
case EBuiltInOps::FillImage3d:
|
||||
return "fill_image3d.igdrcl_built_in";
|
||||
case EBuiltInOps::VmeBlockMotionEstimateIntel:
|
||||
return "vme_block_motion_estimate_intel.igdrcl_built_in";
|
||||
case EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel:
|
||||
return "vme_block_advanced_motion_estimate_check_intel.igdrcl_built_in";
|
||||
case EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel:
|
||||
return "vme_block_advanced_motion_estimate_bidirectional_check_intel";
|
||||
case EBuiltInOps::Scheduler:
|
||||
return "scheduler.igdrcl_built_in";
|
||||
};
|
||||
}
|
||||
|
||||
BuiltinResourceT createBuiltinResource(const char *ptr, size_t size) {
|
||||
return BuiltinResourceT(ptr, ptr + size);
|
||||
}
|
||||
|
||||
BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r) {
|
||||
return BuiltinResourceT(r);
|
||||
}
|
||||
|
||||
std::string createBuiltinResourceName(EBuiltInOps builtin, const std::string &extension,
|
||||
const std::string &platformName, uint32_t deviceRevId) {
|
||||
std::string ret;
|
||||
if (platformName.size() > 0) {
|
||||
ret = platformName;
|
||||
ret += "_" + std::to_string(deviceRevId);
|
||||
ret += "_";
|
||||
}
|
||||
|
||||
ret += getBuiltinAsString(builtin);
|
||||
|
||||
if (extension.size() > 0) {
|
||||
ret += extension;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string joinPath(const std::string &lhs, const std::string &rhs) {
|
||||
if (lhs.size() == 0) {
|
||||
return rhs;
|
||||
}
|
||||
|
||||
if (rhs.size() == 0) {
|
||||
return lhs;
|
||||
}
|
||||
|
||||
if (*lhs.rbegin() == PATH_SEPARATOR) {
|
||||
return lhs + rhs;
|
||||
}
|
||||
|
||||
return lhs + PATH_SEPARATOR + rhs;
|
||||
}
|
||||
|
||||
std::string getDriverInstallationPath() {
|
||||
return "";
|
||||
}
|
||||
|
||||
BuiltinResourceT Storage::load(const std::string &resourceName) {
|
||||
return loadImpl(joinPath(rootPath, resourceName));
|
||||
}
|
||||
|
||||
BuiltinResourceT FileStorage::loadImpl(const std::string &fullResourceName) {
|
||||
BuiltinResourceT ret;
|
||||
|
||||
std::ifstream f{fullResourceName, std::ios::in | std::ios::binary | std::ios::ate};
|
||||
auto end = f.tellg();
|
||||
f.seekg(0, std::ios::beg);
|
||||
auto beg = f.tellg();
|
||||
auto s = end - beg;
|
||||
ret.resize(static_cast<size_t>(s));
|
||||
f.read(ret.data(), s);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const BuiltinResourceT *EmbeddedStorageRegistry::get(const std::string &name) const {
|
||||
auto it = resources.find(name);
|
||||
if (resources.end() == it) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
BuiltinResourceT EmbeddedStorage::loadImpl(const std::string &fullResourceName) {
|
||||
auto *constResource = EmbeddedStorageRegistry::getInstance().get(fullResourceName);
|
||||
if (constResource == nullptr) {
|
||||
BuiltinResourceT ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return createBuiltinResource(*constResource);
|
||||
}
|
||||
|
||||
BuiltinsLib::BuiltinsLib() {
|
||||
allStorages.push_back(std::unique_ptr<Storage>(new EmbeddedStorage("")));
|
||||
allStorages.push_back(std::unique_ptr<Storage>(new FileStorage(getDriverInstallationPath())));
|
||||
}
|
||||
|
||||
BuiltinCode BuiltinsLib::getBuiltinCode(EBuiltInOps builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) {
|
||||
std::lock_guard<std::mutex> lockRaii{mutex};
|
||||
|
||||
BuiltinResourceT bc;
|
||||
BuiltinCode::ECodeType usedCodetType = BuiltinCode::ECodeType::INVALID;
|
||||
if (requestedCodeType == BuiltinCode::ECodeType::Any) {
|
||||
for (uint32_t codeType = static_cast<uint32_t>(BuiltinCode::ECodeType::Binary), e = static_cast<uint32_t>(BuiltinCode::ECodeType::COUNT);
|
||||
codeType != e; ++codeType) {
|
||||
bc = getBuiltinResource(builtin, static_cast<BuiltinCode::ECodeType>(codeType), device);
|
||||
if (bc.size() > 0) {
|
||||
usedCodetType = static_cast<BuiltinCode::ECodeType>(codeType);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bc = getBuiltinResource(builtin, requestedCodeType, device);
|
||||
usedCodetType = requestedCodeType;
|
||||
}
|
||||
|
||||
BuiltinCode ret;
|
||||
std::swap(ret.resource, bc);
|
||||
ret.type = usedCodetType;
|
||||
ret.targetDevice = &device;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::unique_ptr<Program> BuiltinsLib::createProgramFromCode(const BuiltinCode &bc, Context &context, Device &device) {
|
||||
std::unique_ptr<Program> ret;
|
||||
const char *data = bc.resource.data();
|
||||
size_t dataLen = bc.resource.size();
|
||||
cl_int err = 0;
|
||||
switch (bc.type) {
|
||||
default:
|
||||
break;
|
||||
case BuiltinCode::ECodeType::Source:
|
||||
case BuiltinCode::ECodeType::Intermediate:
|
||||
ret.reset(Program::create(data, &context, device, true, &err));
|
||||
break;
|
||||
case BuiltinCode::ECodeType::Binary:
|
||||
ret.reset(Program::createFromGenBinary(&context, data, dataLen, true, nullptr));
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
BuiltinResourceT BuiltinsLib::getBuiltinResource(EBuiltInOps builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) {
|
||||
BuiltinResourceT bc;
|
||||
std::string resourceNameGeneric = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType));
|
||||
std::string resourceNameForPlatform = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), device.getProductAbbrev());
|
||||
std::string resourceNameForPlatformAndStepping = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), device.getProductAbbrev(),
|
||||
device.getHardwareInfo().pPlatform->usRevId);
|
||||
|
||||
for (auto &rn : {resourceNameForPlatformAndStepping, resourceNameForPlatform, resourceNameGeneric}) { // first look for dedicated version, only fallback to generic one
|
||||
for (auto &s : allStorages) {
|
||||
bc = s.get()->load(rn);
|
||||
if (bc.size() != 0) {
|
||||
return bc;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bc;
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
120
runtime/built_ins/kernels/CMakeLists.txt
Normal file
120
runtime/built_ins/kernels/CMakeLists.txt
Normal file
@@ -0,0 +1,120 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
add_custom_target(builtins)
|
||||
set_target_properties(builtins PROPERTIES FOLDER "built_ins")
|
||||
set (BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}")
|
||||
add_dependencies(${BUILTINS_BINARIES_LIB_NAME} builtins)
|
||||
|
||||
# Set variables needed to compile built-in kernels
|
||||
set (COPY_BUFFER_TO_BUFFER_BUILTIN copy_buffer_to_buffer.igdrcl_built_in)
|
||||
set (COPY_BUFFER_RECT_BUILTIN copy_buffer_rect.igdrcl_built_in)
|
||||
set (FILL_BUFFER_BUILTIN fill_buffer.igdrcl_built_in)
|
||||
set (COPY_BUFFER_TO_IMAGE3D_BUILTIN copy_buffer_to_image3d.igdrcl_built_in)
|
||||
set (COPY_IMAGE3D_TO_BUFFER_BUILTIN copy_image3d_to_buffer.igdrcl_built_in)
|
||||
set (COPY_IMAGE_TO_IMAGE1D_BUILTIN copy_image_to_image1d.igdrcl_built_in)
|
||||
set (COPY_IMAGE_TO_IMAGE2D_BUILTIN copy_image_to_image2d.igdrcl_built_in)
|
||||
set (COPY_IMAGE_TO_IMAGE3D_BUILTIN copy_image_to_image3d.igdrcl_built_in)
|
||||
set (FILL_IMAGE1D_BUILTIN fill_image1d.igdrcl_built_in)
|
||||
set (FILL_IMAGE2D_BUILTIN fill_image2d.igdrcl_built_in)
|
||||
set (FILL_IMAGE3D_BUILTIN fill_image3d.igdrcl_built_in)
|
||||
|
||||
if("${NEO_ARCH}" STREQUAL "x32")
|
||||
set(BUILTIN_OPTIONS "-cl-intel-greater-than-4GB-buffer-required")
|
||||
else()
|
||||
set(BUILTIN_OPTIONS "")
|
||||
endif()
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug" )
|
||||
set(BUILTIN_DEBUG_OPTION "-D DEBUG")
|
||||
else()
|
||||
set(BUILTIN_DEBUG_OPTION "")
|
||||
endif()
|
||||
|
||||
set(BUILTINS_INCLUDE_DIR ${TargetDir} PARENT_SCOPE)
|
||||
set(BUILTIN_CPP "")
|
||||
|
||||
# Define function for compiling built-ins (with cloc)
|
||||
function(compile_builtin gen_name builtin)
|
||||
set(OUTPUTDIR "${BUILTINS_OUTDIR_WITH_ARCH}/${gen_name}")
|
||||
|
||||
# get filename
|
||||
get_filename_component(FILENAME ${builtin} NAME)
|
||||
|
||||
# get name of the file w/o extension
|
||||
get_filename_component(BASENAME ${builtin} NAME_WE)
|
||||
|
||||
set(OUTPUTPATH_BASE "${OUTPUTDIR}/${BASENAME}_${gen_name}")
|
||||
set(OUTPUT_FILES
|
||||
${OUTPUTPATH_BASE}.bc
|
||||
${OUTPUTPATH_BASE}.bin
|
||||
${OUTPUTPATH_BASE}.cpp
|
||||
${OUTPUTPATH_BASE}.gen
|
||||
)
|
||||
|
||||
# function returns builtin cpp filename
|
||||
unset(BUILTIN_CPP)
|
||||
# set variable outside function
|
||||
set(BUILTIN_CPP built_ins/${NEO_ARCH}/${gen_name}/${BASENAME}_${gen_name}.cpp PARENT_SCOPE)
|
||||
|
||||
if(MSVC)
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUT_FILES}
|
||||
COMMAND cloc -q -file ${FILENAME} -device ${gen_name} ${BUILTIN_OPTIONS} -${NEO_BITS} -out_dir ${OUTPUTDIR} -cpp_file -options "-cl-kernel-arg-info ${BUILTIN_DEBUG_OPTION}"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DEPENDS ${builtin} cloc copy_compiler_files
|
||||
)
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUT_FILES}
|
||||
COMMAND LD_LIBRARY_PATH=$<TARGET_FILE_DIR:cloc> $<TARGET_FILE:cloc> -q -file ${FILENAME} -device ${gen_name} ${BUILTIN_OPTIONS} -${NEO_BITS} -out_dir ${OUTPUTDIR} -cpp_file -options "-cl-kernel-arg-info ${BUILTIN_DEBUG_OPTION}"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DEPENDS ${builtin} cloc copy_compiler_files
|
||||
)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
macro(compile_builtins GEN_NUM PLATFORM_IT)
|
||||
string(TOLOWER ${PLATFORM_IT} PLATFORM_LOWER)
|
||||
string(CONCAT GEN "_GEN" ${GEN_NUM} "_" ${PLATFORM_IT})
|
||||
|
||||
set (BUILTINS_COMMANDS)
|
||||
foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS})
|
||||
compile_builtin(${PLATFORM_LOWER} ${${GENERATED_BUILTIN}_BUILTIN})
|
||||
list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP})
|
||||
set (RUNTIME_GENERATED_${GENERATED_BUILTIN}${GEN} ${BUILTIN_CPP} PARENT_SCOPE)
|
||||
endforeach(GENERATED_BUILTIN)
|
||||
|
||||
set(target_name builtins_${PLATFORM_LOWER})
|
||||
add_custom_target(${target_name} DEPENDS ${BUILTINS_COMMANDS})
|
||||
add_dependencies(builtins ${target_name})
|
||||
set_target_properties(${target_name} PROPERTIES FOLDER "built_ins/${PLATFORM_LOWER}")
|
||||
endmacro()
|
||||
|
||||
# Compile built-in kernels for all GENs
|
||||
foreach(GEN_NUM RANGE ${MAX_GEN} 0 -1)
|
||||
GEN_CONTAINS_PLATFORMS("SUPPORTED" ${GEN_NUM} GENX_HAS_PLATFORMS)
|
||||
if(${GENX_HAS_PLATFORMS})
|
||||
GET_PLATFORMS_FOR_GEN("SUPPORTED" ${GEN_NUM} SUPPORTED_GENX_PLATFORMS)
|
||||
foreach(PLATFORM_IT ${SUPPORTED_GENX_PLATFORMS})
|
||||
compile_builtins(${GEN_NUM} ${PLATFORM_IT})
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
63
runtime/built_ins/kernels/copy_buffer_rect.igdrcl_built_in
Normal file
63
runtime/built_ins/kernels/copy_buffer_rect.igdrcl_built_in
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint bytesToRead )
|
||||
{
|
||||
pSrc += ( srcOffsetInBytes + get_global_id(0) );
|
||||
pDst += ( dstOffsetInBytes + get_global_id(0) );
|
||||
pDst[ 0 ] = pSrc[ 0 ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
)==="
|
||||
176
runtime/built_ins/kernels/copy_buffer_to_image3d.igdrcl_built_in
Normal file
176
runtime/built_ins/kernels/copy_buffer_to_image3d.igdrcl_built_in
Normal file
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (__global uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
c.x = (uint)combined;
|
||||
}
|
||||
else{
|
||||
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 4));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
}
|
||||
else{
|
||||
c.x = (*(__global uint*)(src + LOffset + x * 4));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 8));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
|
||||
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint2*)(src + LOffset + x * 8));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, (__global uint4)(c.x, c.y, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x0000000f){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 16));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.z = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.w = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint4 *)(src + LOffset + x * 16));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
154
runtime/built_ins/kernels/copy_image3d_to_buffer.igdrcl_built_in
Normal file
154
runtime/built_ins/kernels/copy_image3d_to_buffer.igdrcl_built_in
Normal file
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
}
|
||||
else{
|
||||
uint2 d = (uint2)(c.x,c.y);
|
||||
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x0000000f){
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
}
|
||||
else{
|
||||
*(__global uint4*)(dst + DstOffset + x * 16) = c;
|
||||
}
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImageToImage1d(
|
||||
__read_only image1d_t input,
|
||||
__write_only image1d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
|
||||
const int srcCoord = x + srcOffset.x;
|
||||
const int dstCoord = x + dstOffset.x;
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImageToImage2d(
|
||||
__read_only image2d_t input,
|
||||
__write_only image2d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int2 srcCoord = (int2)(x, y) + (int2)(srcOffset.x, srcOffset.y);
|
||||
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyImageToImage3d(
|
||||
__read_only image3d_t input,
|
||||
__write_only image3d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
64
runtime/built_ins/kernels/fill_buffer.igdrcl_built_in
Normal file
64
runtime/built_ins/kernels/fill_buffer.igdrcl_built_in
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
uint srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
)==="
|
||||
33
runtime/built_ins/kernels/fill_image1d.igdrcl_built_in
Normal file
33
runtime/built_ins/kernels/fill_image1d.igdrcl_built_in
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void FillImage1d(
|
||||
__write_only image1d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
|
||||
const int dstCoord = x + dstOffset.x;
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
)==="
|
||||
34
runtime/built_ins/kernels/fill_image2d.igdrcl_built_in
Normal file
34
runtime/built_ins/kernels/fill_image2d.igdrcl_built_in
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void FillImage2d(
|
||||
__write_only image2d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
)==="
|
||||
37
runtime/built_ins/kernels/fill_image3d.igdrcl_built_in
Normal file
37
runtime/built_ins/kernels/fill_image3d.igdrcl_built_in
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void FillImage3d(
|
||||
__write_only image3d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void ve_dn_di_enhance_intel(sampler_t accelerator,
|
||||
int flags,
|
||||
__read_only image2d_t current_input,
|
||||
__read_only image2d_t ref_input,
|
||||
__write_only image2d_t current_output,
|
||||
__write_only image2d_t ref_output,
|
||||
__write_only image2d_t dndi_output) {
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void ve_dn_enhance_intel(sampler_t accelerator,
|
||||
int flags,
|
||||
__read_only image2d_t ref_input,
|
||||
__read_only image2d_t current_input,
|
||||
__write_only image2d_t current_output) {
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void ve_enhance_intel(sampler_t accelerator,
|
||||
int flags,
|
||||
__read_only image2d_t current_input,
|
||||
__write_only image2d_t current_output) {
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,458 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
|
||||
block_advanced_motion_estimate_bidirectional_check_intel(
|
||||
sampler_t accelerator, __read_only image2d_t srcImg,
|
||||
__read_only image2d_t refImg, __read_only image2d_t src_check_image,
|
||||
__read_only image2d_t ref0_check_image,
|
||||
__read_only image2d_t ref1_check_image, uint flags,
|
||||
uint search_cost_penalty, uint search_cost_precision, short2 count_global,
|
||||
uchar bidir_weight, __global short2 *count_motion_vector_buffer,
|
||||
__global short2 *prediction_motion_vector_buffer,
|
||||
__global char *skip_input_mode_buffer,
|
||||
__global short2 *skip_motion_vector_buffer,
|
||||
__global short2 *search_motion_vector_buffer,
|
||||
__global char *intra_search_predictor_modes,
|
||||
__global ushort *search_residuals, __global ushort *skip_residuals,
|
||||
__global ushort *intra_residuals, __read_only image2d_t intraSrcImg,
|
||||
int height, int width, int stride) {
|
||||
__local uint dstSearch[64]; // 8 GRFs
|
||||
__local uint dstSkipIntra[32 + 24]; // 7 GRFs (4 for inter, 3 for intra)
|
||||
|
||||
// distortion in the 6th GRF
|
||||
__local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5];
|
||||
|
||||
// Initialize the MV cost table:
|
||||
// MV Cost in U4U4 format:
|
||||
// No cost : 0, 0, 0, 0, 0, 0, 0, 0
|
||||
// Low Cost : 1, 4, 5, 9, 10, 12, 14, 15
|
||||
// Normal Cost: 5, 26, 29, 43, 45, 47, 57, 57
|
||||
// High Cost : 29, 61, 72, 78, 88, 89, 91, 92
|
||||
|
||||
uint2 MVCostTable;
|
||||
if (search_cost_penalty == 1) {
|
||||
MVCostTable.s0 = 0x09050401;
|
||||
MVCostTable.s1 = 0x0F0E0C0A;
|
||||
} else if (search_cost_penalty == 2) {
|
||||
MVCostTable.s0 = 0x2B1D1A05;
|
||||
MVCostTable.s1 = 0x39392F2D;
|
||||
} else if (search_cost_penalty == 3) {
|
||||
MVCostTable.s0 = 0x4E483D1D;
|
||||
MVCostTable.s1 = 0x5C5B5958;
|
||||
} else {
|
||||
MVCostTable.s0 = 0;
|
||||
MVCostTable.s1 = 0;
|
||||
}
|
||||
|
||||
uint MVCostPrecision = ((uint)search_cost_precision) << 16;
|
||||
|
||||
// Frame is divided into rows * columns of MBs.
|
||||
// One h/w thread per WG.
|
||||
// One WG processes "row" MBs - one row per iteration and one MB per row.
|
||||
// Number of WGs (or h/w threads) is number of columns MBs.Each iteration
|
||||
// processes the MB in a row - gid_0 is the MB id in a row and gid_1 is the
|
||||
// row offset.
|
||||
|
||||
int sid_0 = stride * get_group_id(0);
|
||||
int gid_0 = sid_0 / height;
|
||||
int gid_1 = sid_0 % height;
|
||||
for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height;
|
||||
sid++, gid_0 = sid / height, gid_1 = sid % height) {
|
||||
int2 srcCoord;
|
||||
|
||||
srcCoord.x = gid_0 * 16 +
|
||||
get_global_offset(0); // 16 pixels wide MBs (globally scalar)
|
||||
srcCoord.y = gid_1 * 16 +
|
||||
get_global_offset(1); // 16 pixels tall MBs (globally scalar)
|
||||
uint curMB = gid_0 + gid_1 * width; // current MB id
|
||||
short2 count;
|
||||
|
||||
// If either the search or skip vector counts are per-MB, then we need to
|
||||
// read in
|
||||
// the count motion vector buffer.
|
||||
|
||||
if ((count_global.s0 == -1) | (count_global.s1 == -1)) {
|
||||
count = count_motion_vector_buffer[curMB];
|
||||
}
|
||||
|
||||
// If either the search or skip vector counts are per-frame, we need to use
|
||||
// those.
|
||||
|
||||
if (count_global.s0 >= 0) {
|
||||
count.s0 = count_global.s0;
|
||||
}
|
||||
|
||||
if (count_global.s1 >= 0) {
|
||||
count.s1 = count_global.s1;
|
||||
}
|
||||
|
||||
int countPredMVs = count.x;
|
||||
if (countPredMVs != 0) {
|
||||
uint offset = curMB * 4; // 4 predictors per MB
|
||||
offset += get_local_id(0) % 4; // 16 work-items access 4 MVs for MB
|
||||
// one predictor for MB per SIMD channel
|
||||
|
||||
// Reduce predictors from Q-pixel to integer precision.
|
||||
int2 predMV = 0;
|
||||
|
||||
if (get_local_id(0) < countPredMVs) {
|
||||
// one MV per work-item
|
||||
predMV = convert_int2(prediction_motion_vector_buffer[offset]);
|
||||
// Predictors are input in QP resolution. Convert that to integer
|
||||
// resolution.
|
||||
predMV.x /= 4;
|
||||
predMV.y /= 4;
|
||||
predMV.y &= 0xFFFFFFFE;
|
||||
}
|
||||
|
||||
// Do up to 4 IMEs, get the best MVs and their distortions, and optionally
|
||||
// a FBR of
|
||||
// the best MVs. Finally the results are written out to SLM.
|
||||
|
||||
intel_work_group_vme_mb_multi_query_4(
|
||||
dstSearch, // best search MV and its distortions into SLM
|
||||
countPredMVs, // count of predictor MVs (globally scalar - value range
|
||||
// 1 to 4)
|
||||
MVCostPrecision, // MV cost precision
|
||||
MVCostTable, // MV cost table
|
||||
srcCoord, // MB 2-D offset (globally scalar)
|
||||
predMV, // predictor MVs (up to 4 distinct MVs for SIMD16 thread)
|
||||
srcImg, // source
|
||||
refImg, // reference
|
||||
accelerator); // vme object
|
||||
}
|
||||
|
||||
int doIntra = ((flags & 0x2) != 0);
|
||||
int intraEdges = 0;
|
||||
if (doIntra) {
|
||||
// Enable all edges by default.
|
||||
intraEdges = 0x3C;
|
||||
// If this is a left-edge MB, then disable left edges.
|
||||
if ((gid_0 == 0) & (get_global_offset(0) == 0)) {
|
||||
intraEdges &= 0x18;
|
||||
}
|
||||
|
||||
// If this is a right edge MB then disable right edges.
|
||||
if (gid_0 == width - 1) {
|
||||
intraEdges &= 0x34;
|
||||
}
|
||||
|
||||
// If this is a top-edge MB, then disable top edges.
|
||||
if ((gid_1 == 0) & (get_global_offset(1) == 0)) {
|
||||
intraEdges &= 0x20;
|
||||
}
|
||||
|
||||
// Set bit6=bit5.
|
||||
intraEdges |= ((intraEdges & 0x20) << 1);
|
||||
|
||||
intraEdges <<= 8;
|
||||
}
|
||||
|
||||
int skip_block_type_8x8 = flags & 0x4;
|
||||
|
||||
int countSkipMVs = count.y;
|
||||
if (countSkipMVs != 0 || doIntra == true) {
|
||||
// one set of skip MV per SIMD channel
|
||||
|
||||
// Do up to 4 skip checks and get the distortions for each of them.
|
||||
// Finally the results are written out to SLM.
|
||||
|
||||
if ((skip_block_type_8x8 == 0) | ((doIntra) & (countSkipMVs == 0))) {
|
||||
// 16x16:
|
||||
|
||||
uint offset = curMB * 4 * 2; // 4 sets of skip check MVs per MB
|
||||
int skipMV = 0;
|
||||
if (get_local_id(0) < countSkipMVs * 2) // need 2 values per MV
|
||||
{
|
||||
offset +=
|
||||
(get_local_id(0)); // 16 work-items access 4 sets of MVs for MB
|
||||
__global int *skip1_motion_vector_buffer =
|
||||
(__global int *)skip_motion_vector_buffer;
|
||||
skipMV = skip1_motion_vector_buffer[offset]; // one MV per work-item
|
||||
}
|
||||
|
||||
uchar skipMode = 0;
|
||||
if (get_local_id(0) < countSkipMVs) {
|
||||
skipMode = skip_input_mode_buffer[curMB];
|
||||
|
||||
if (skipMode == 0) {
|
||||
skipMode = 1;
|
||||
}
|
||||
if (skipMode > 3) {
|
||||
skipMode = 3;
|
||||
}
|
||||
}
|
||||
|
||||
intel_work_group_vme_mb_multi_bidir_check_16x16(
|
||||
dstSkipIntra, // distortions into SLM
|
||||
countSkipMVs, // count of skip check MVs (globally scalar - value
|
||||
// range 1 to 4)
|
||||
doIntra, // compute intra modes
|
||||
intraEdges, // intra edges to use
|
||||
srcCoord, // MB 2-D offset (globally scalar)
|
||||
bidir_weight, // bidirectional weight
|
||||
skipMode, // skip modes
|
||||
skipMV, // skip check MVs (up to 4 distinct sets of skip check MVs
|
||||
// for SIMD16 thread)
|
||||
src_check_image, // source
|
||||
ref0_check_image, // reference fwd
|
||||
ref1_check_image, // reference bwd
|
||||
intraSrcImg, // intra source
|
||||
accelerator); // vme object
|
||||
} else {
|
||||
// 8x8:
|
||||
|
||||
uint offset =
|
||||
curMB * 4 *
|
||||
8; // 4 sets of skip check MVs, 16 shorts (8 ints) each per MB
|
||||
int2 skipMVs = 0;
|
||||
if (get_local_id(0) < countSkipMVs * 8) // need 8 values per MV
|
||||
{
|
||||
offset +=
|
||||
(get_local_id(0)); // 16 work-items access 4 sets of MVs for MB
|
||||
__global int *skip1_motion_vector_buffer =
|
||||
(__global int *)(skip_motion_vector_buffer);
|
||||
skipMVs.x = skip1_motion_vector_buffer[offset]; // four component MVs
|
||||
// per work-item
|
||||
skipMVs.y = skip1_motion_vector_buffer[offset + 16];
|
||||
}
|
||||
|
||||
uchar skipModes = 0;
|
||||
if (get_local_id(0) < countSkipMVs) {
|
||||
skipModes = skip_input_mode_buffer[curMB];
|
||||
}
|
||||
|
||||
intel_work_group_vme_mb_multi_bidir_check_8x8(
|
||||
dstSkipIntra, // distortions into SLM
|
||||
countSkipMVs, // count of skip check MVs per MB (globally scalar -
|
||||
// value range 1 to 4)
|
||||
doIntra, // compute intra modes
|
||||
intraEdges, // intra edges to use
|
||||
srcCoord, // MB 2-D offset (globally scalar)
|
||||
bidir_weight, // bidirectional weight
|
||||
skipModes, // skip modes
|
||||
skipMVs, // skip check MVs (up to 4 distinct sets of skip check MVs
|
||||
// for SIMD16 thread)
|
||||
src_check_image, // source
|
||||
ref0_check_image, // reference fwd
|
||||
ref1_check_image, // reference bwd
|
||||
intraSrcImg, // intra source
|
||||
accelerator); // vme object
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// Write Out motion estimation result:
|
||||
// Result format
|
||||
// Hierarchical row-major layout
|
||||
// i.e. row-major of blocks MVs in MBs, and row-major of 4 sets of
|
||||
// MVs/distortion in blocks
|
||||
if (countPredMVs != 0) {
|
||||
// 4x4
|
||||
if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) {
|
||||
int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width);
|
||||
|
||||
// 1. 16 work-items enabled.
|
||||
// 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30
|
||||
// (interleaved
|
||||
// fwd/bdw MVs) with constant offset 8 (control data size) from SLM
|
||||
// into contiguous
|
||||
// short2 locations 0, 1, .., 15 of global buffer
|
||||
// search_motion_vector_buffer with
|
||||
// offset index.
|
||||
// 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from
|
||||
// distSearch into
|
||||
// contiguous ushort locations 0, 1, .., 15 of search_residuals with
|
||||
// offset index.
|
||||
|
||||
short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]);
|
||||
search_motion_vector_buffer[index] = val;
|
||||
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (search_residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
search_residuals[index] = distSearch[get_local_id(0)];
|
||||
}
|
||||
}
|
||||
|
||||
// 8x8
|
||||
else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) {
|
||||
// Only 1st 4 work-item are needed.
|
||||
if (get_local_id(0) < 4) {
|
||||
int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width);
|
||||
|
||||
// 1. 4 work-items enabled.
|
||||
// 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24
|
||||
// (interleaved
|
||||
// fwd/bdw MVs) with constant offset 8 from SLM into contiguous
|
||||
// short2 locations
|
||||
// 0, 1, .., 15 of global buffer search_motion_vector_buffer with
|
||||
// offset index.
|
||||
// 3. Work-items gather strided ushort locations 0, 4, 8, 12 from
|
||||
// distSearch into
|
||||
// contiguous ushort locations 0, 1, .., 15 of search_residuals
|
||||
// with offset index.
|
||||
|
||||
short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]);
|
||||
search_motion_vector_buffer[index] = val;
|
||||
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (search_residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
search_residuals[index] = distSearch[get_local_id(0) * 4];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 16x16
|
||||
else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) {
|
||||
// One 1st work is needed.
|
||||
if (get_local_id(0) == 0) {
|
||||
int index = gid_0 + gid_1 * width;
|
||||
|
||||
// 1. 1 work-item enabled.
|
||||
// 2. Work-item gathers fwd MV in dword location 0 with constant
|
||||
// offset 8 from
|
||||
// SLM into short2 locations 0 of global buffer
|
||||
// search_motion_vector_buffer.
|
||||
// 3. Work-item gathers ushort location 0 from distSearch into ushort
|
||||
// location 0 of search_residuals with offset index.
|
||||
|
||||
short2 val = as_short2(dstSearch[8]);
|
||||
search_motion_vector_buffer[index] = val;
|
||||
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (search_residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
search_residuals[index] = distSearch[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write out motion skip check result:
|
||||
// Result format
|
||||
// Hierarchical row-major layout
|
||||
// i.e. row-major of blocks in MBs, and row-major of 8 sets of
|
||||
// distortions in blocks
|
||||
if (countSkipMVs != 0) {
|
||||
if (skip_block_type_8x8 == false) {
|
||||
// Copy out 4 (1 component) sets of distortion values.
|
||||
|
||||
int index = (gid_0 * 4) + (get_local_id(0)) + (gid_1 * 4 * width);
|
||||
|
||||
if (get_local_id(0) < countSkipMVs) {
|
||||
// 1. Up to 4 work-items are enabled.
|
||||
// 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and
|
||||
// copies them to contiguous skip_residual locations 0, 1, 2, ..,
|
||||
// 7.
|
||||
__local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
|
||||
skip_residuals[index] = distSkip[get_local_id(0) * 16];
|
||||
}
|
||||
} else {
|
||||
// Copy out 4 (4 component) sets of distortion values.
|
||||
int index =
|
||||
(gid_0 * 4 * 4) + (get_local_id(0)) + (gid_1 * 4 * 4 * width);
|
||||
|
||||
if (get_local_id(0) < countSkipMVs * 4) {
|
||||
// 1. Up to 16 work-items are enabled.
|
||||
// 2. The work-item gathers distSkip locations 0, 4*1, .., 4*15 and
|
||||
// copies them to contiguous skip_residual locations 0, 1, 2, ..,
|
||||
// 15.
|
||||
|
||||
__local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
|
||||
skip_residuals[index] = distSkip[get_local_id(0) * 4];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write out intra search result:
|
||||
if (doIntra) {
|
||||
// Write out the 4x4 intra modes
|
||||
if (get_local_id(0) < 8) {
|
||||
__local char *dstIntra_4x4 =
|
||||
(__local char *)(&dstSkipIntra[32 + 16 + 4]);
|
||||
char value = dstIntra_4x4[get_local_id(0)];
|
||||
char value_low = (value)&0xf;
|
||||
char value_high = (value >> 4) & 0xf;
|
||||
|
||||
int index_low =
|
||||
(gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width);
|
||||
|
||||
int index_high =
|
||||
(gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width);
|
||||
|
||||
intra_search_predictor_modes[index_low + 5] = value_low;
|
||||
intra_search_predictor_modes[index_high + 5] = value_high;
|
||||
}
|
||||
|
||||
// Write out the 8x8 intra modes
|
||||
if (get_local_id(0) < 4) {
|
||||
__local char *dstIntra_8x8 =
|
||||
(__local char *)(&dstSkipIntra[32 + 8 + 4]);
|
||||
char value = dstIntra_8x8[get_local_id(0) * 2];
|
||||
char value_low = (value)&0xf;
|
||||
int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width);
|
||||
intra_search_predictor_modes[index + 1] = value_low;
|
||||
}
|
||||
|
||||
// Write out the 16x16 intra modes
|
||||
if (get_local_id(0) < 1) {
|
||||
__local char *dstIntra_16x16 =
|
||||
(__local char *)(&dstSkipIntra[32 + 0 + 4]);
|
||||
char value = dstIntra_16x16[0];
|
||||
char value_low = (value)&0xf;
|
||||
int index = (gid_0 * 22) + (gid_1 * 22 * width);
|
||||
intra_search_predictor_modes[index] = value_low;
|
||||
}
|
||||
|
||||
// Get the intra residuals.
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (intra_residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
int index = (gid_0 * 4) + (gid_1 * 4 * width);
|
||||
|
||||
if (get_local_id(0) < 1) {
|
||||
__local ushort *distIntra_4x4 =
|
||||
(__local ushort *)(&dstSkipIntra[32 + 16 + 3]);
|
||||
__local ushort *distIntra_8x8 =
|
||||
(__local ushort *)(&dstSkipIntra[32 + 8 + 3]);
|
||||
__local ushort *distIntra_16x16 =
|
||||
(__local ushort *)(&dstSkipIntra[32 + 0 + 3]);
|
||||
|
||||
intra_residuals[index + 2] = distIntra_4x4[0];
|
||||
intra_residuals[index + 1] = distIntra_8x8[0];
|
||||
intra_residuals[index + 0] = distIntra_16x16[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
)==="
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
|
||||
block_advanced_motion_estimate_bidirectional_check_intel(
|
||||
sampler_t accelerator, __read_only image2d_t srcImg,
|
||||
__read_only image2d_t refImg, __read_only image2d_t src_check_image,
|
||||
__read_only image2d_t ref0_check_image,
|
||||
__read_only image2d_t ref1_check_image, uint flags,
|
||||
uint search_cost_penalty, uint search_cost_precision, short2 count_global,
|
||||
uchar bidir_weight, __global short2 *count_motion_vector_buffer,
|
||||
__global short2 *prediction_motion_vector_buffer,
|
||||
__global char *skip_input_mode_buffer,
|
||||
__global short2 *skip_motion_vector_buffer,
|
||||
__global short2 *search_motion_vector_buffer,
|
||||
__global char *intra_search_predictor_modes,
|
||||
__global ushort *search_residuals, __global ushort *skip_residuals,
|
||||
__global ushort *intra_residuals) {
|
||||
}
|
||||
|
||||
)==="
|
||||
@@ -0,0 +1,390 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
|
||||
block_advanced_motion_estimate_check_intel(
|
||||
sampler_t accelerator, __read_only image2d_t srcImg,
|
||||
__read_only image2d_t refImg, uint flags, uint skip_block_type,
|
||||
uint search_cost_penalty, uint search_cost_precision,
|
||||
__global short2 *count_motion_vector_buffer,
|
||||
__global short2 *predictors_buffer,
|
||||
__global short2 *skip_motion_vector_buffer,
|
||||
__global short2 *motion_vector_buffer,
|
||||
__global char *intra_search_predictor_modes, __global ushort *residuals,
|
||||
__global ushort *skip_residuals, __global ushort *intra_residuals,
|
||||
__read_only image2d_t intraSrcImg, int height, int width, int stride) {
|
||||
__local uint dstSearch[64]; // 8 GRFs
|
||||
__local uint dstSkipIntra[64 + 24]; // 11 GRFs (8 for inter, 3 for intra)
|
||||
|
||||
__local ushort *distSearch =
|
||||
(__local ushort *)&dstSearch[8 * 5]; // distortion in the 6th GRF
|
||||
|
||||
// Initialize the MV cost table:
|
||||
// MV Cost in U4U4 format:
|
||||
// No cost : 0, 0, 0, 0, 0, 0, 0, 0
|
||||
// Low Cost : 1, 4, 5, 9, 10, 12, 14, 15
|
||||
// Normal Cost: 5, 26, 29, 43, 45, 47, 57, 57
|
||||
// High Cost : 29, 61, 72, 78, 88, 89, 91, 92
|
||||
|
||||
uint2 MVCostTable;
|
||||
if (search_cost_penalty == 1) {
|
||||
MVCostTable.s0 = 0x09050401;
|
||||
MVCostTable.s1 = 0x0F0E0C0A;
|
||||
} else if (search_cost_penalty == 2) {
|
||||
MVCostTable.s0 = 0x2B1D1A05;
|
||||
MVCostTable.s1 = 0x39392F2D;
|
||||
} else if (search_cost_penalty == 3) {
|
||||
MVCostTable.s0 = 0x4E483D1D;
|
||||
MVCostTable.s1 = 0x5C5B5958;
|
||||
} else {
|
||||
MVCostTable.s0 = 0;
|
||||
MVCostTable.s1 = 0;
|
||||
}
|
||||
|
||||
uint MVCostPrecision = ((uint)search_cost_precision) << 16;
|
||||
// Frame is divided into rows * columns of MBs.
|
||||
// One h/w thread per WG.
|
||||
// One WG processes 'row' MBs - one row per iteration and one MB per row.
|
||||
// Number of WGs (or h/w threads) is number of columns MBs
|
||||
// Each iteration processes the MB in a row - gid_0 is the MB id in a row and
|
||||
// gid_1 is the row offset.
|
||||
|
||||
int sid_0 = stride * get_group_id(0);
|
||||
int gid_0 = sid_0 / height;
|
||||
int gid_1 = sid_0 % height;
|
||||
for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height;
|
||||
sid++, gid_0 = sid / height, gid_1 = sid % height) {
|
||||
int2 srcCoord;
|
||||
|
||||
srcCoord.x = gid_0 * 16 +
|
||||
get_global_offset(0); // 16 pixels wide MBs (globally scalar)
|
||||
srcCoord.y = gid_1 * 16 +
|
||||
get_global_offset(1); // 16 pixels tall MBs (globally scalar)
|
||||
|
||||
uint curMB = gid_0 + gid_1 * width; // current MB id
|
||||
short2 count = count_motion_vector_buffer[curMB];
|
||||
|
||||
int countPredMVs = count.x;
|
||||
if (countPredMVs != 0) {
|
||||
uint offset = curMB * 8; // 8 predictors per MB
|
||||
offset += get_local_id(0) % 8; // 16 work-items access 8 MVs for MB
|
||||
// one predictor for MB per SIMD channel
|
||||
|
||||
// Reduce predictors from Q-pixel to integer precision.
|
||||
|
||||
int2 predMV = 0;
|
||||
if (get_local_id(0) < countPredMVs) {
|
||||
predMV =
|
||||
convert_int2(predictors_buffer[offset]); // one MV per work-item
|
||||
predMV.x /= 4;
|
||||
predMV.y /= 4;
|
||||
predMV.y &= 0xFFFE;
|
||||
}
|
||||
|
||||
// Do up to 8 IMEs, get the best MVs and their distortions, and optionally
|
||||
// a FBR of the best MVs.
|
||||
// Finally the results are written out to SLM.
|
||||
|
||||
intel_work_group_vme_mb_multi_query_8(
|
||||
dstSearch, // best search MV and its distortions into SLM
|
||||
countPredMVs, // count of predictor MVs (globally scalar - value range
|
||||
// 1 to 8)
|
||||
MVCostPrecision, // MV cost precision
|
||||
MVCostTable, // MV cost table
|
||||
srcCoord, // MB 2-D offset (globally scalar)
|
||||
predMV, // predictor MVs (up to 8 distinct MVs for SIMD16 thread)
|
||||
srcImg, // source
|
||||
refImg, // reference
|
||||
accelerator); // vme object
|
||||
}
|
||||
|
||||
int doIntra = (flags & 0x2) != 0;
|
||||
int intraEdges = 0;
|
||||
if (doIntra) {
|
||||
// Enable all edges by default.
|
||||
intraEdges = 0x3C;
|
||||
// If this is a left-edge MB, then disable left edges.
|
||||
if ((gid_0 == 0) & (get_global_offset(0) == 0)) {
|
||||
intraEdges &= 0x18;
|
||||
}
|
||||
// If this is a right edge MB then disable right edges.
|
||||
if (gid_0 == width - 1) {
|
||||
intraEdges &= 0x34;
|
||||
}
|
||||
// If this is a top-edge MB, then disable top edges.
|
||||
if ((gid_1 == 0) & (get_global_offset(1) == 0)) {
|
||||
intraEdges &= 0x20;
|
||||
}
|
||||
// Set bit6=bit5.
|
||||
intraEdges |= ((intraEdges & 0x20) << 1);
|
||||
intraEdges <<= 8;
|
||||
}
|
||||
int countSkipMVs = count.y;
|
||||
if (countSkipMVs != 0 || doIntra == true) {
|
||||
uint offset = curMB * 8; // 8 sets of skip check MVs per MB
|
||||
offset +=
|
||||
(get_local_id(0) % 8); // 16 work-items access 8 sets of MVs for MB
|
||||
// one set of skip MV per SIMD channel
|
||||
|
||||
// Do up to 8 skip checks and get the distortions for each of them.
|
||||
// Finally the results are written out to SLM.
|
||||
|
||||
if ((skip_block_type == 0x0) | ((doIntra) & (countSkipMVs == 0))) {
|
||||
int skipMVs = 0;
|
||||
if (get_local_id(0) < countSkipMVs) {
|
||||
__global int *skip1_motion_vector_buffer =
|
||||
(__global int *)skip_motion_vector_buffer;
|
||||
skipMVs = skip1_motion_vector_buffer[offset]; // one packed MV for one
|
||||
// work-item
|
||||
}
|
||||
intel_work_group_vme_mb_multi_check_16x16(
|
||||
dstSkipIntra, // distortions into SLM
|
||||
countSkipMVs, // count of skip check MVs (value range 0 to 8)
|
||||
doIntra, // compute intra modes
|
||||
intraEdges, // intra edges to use
|
||||
srcCoord, // MB 2-D offset (globally scalar)
|
||||
skipMVs, // skip check MVs (up to 8 sets of skip check MVs for
|
||||
// SIMD16 thread)
|
||||
srcImg, // source
|
||||
refImg, // reference
|
||||
intraSrcImg, // intra source
|
||||
accelerator);
|
||||
}
|
||||
|
||||
if ((skip_block_type == 0x1) & (countSkipMVs > 0)) {
|
||||
int4 skipMVs = 0;
|
||||
if (get_local_id(0) < countSkipMVs) {
|
||||
__global int4 *skip4_motion_vector_buffer =
|
||||
(__global int4 *)(skip_motion_vector_buffer);
|
||||
skipMVs = skip4_motion_vector_buffer[offset]; // four component MVs
|
||||
// per work-item
|
||||
}
|
||||
intel_work_group_vme_mb_multi_check_8x8(
|
||||
dstSkipIntra, // distortions into SLM
|
||||
countSkipMVs, // count of skip check MVs per MB (value range 0 to 8)
|
||||
doIntra, // compute intra modes
|
||||
intraEdges, // intra edges to use
|
||||
srcCoord, // MB 2-D offset (globally scalar)
|
||||
skipMVs, // skip check MVs (up to 8 ets of skip check MVs for SIMD16
|
||||
// thread)
|
||||
srcImg, // source
|
||||
refImg, // reference
|
||||
intraSrcImg, // intra source
|
||||
accelerator);
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// Write Out motion estimation result:
|
||||
// Result format
|
||||
// Hierarchical row-major layout
|
||||
// i.e. row-major of blocks MVs in MBs, and row-major of 8 sets of
|
||||
// MVs/distortion in blocks
|
||||
|
||||
if (countPredMVs != 0) {
|
||||
// 4x4
|
||||
if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) {
|
||||
int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width);
|
||||
|
||||
// 1. 16 work-items enabled.
|
||||
// 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30
|
||||
// (interleaved
|
||||
// fwd/bdw MVs) with constant offset 8 (control data size) from SLM
|
||||
// into contiguous
|
||||
// short2 locations 0, 1, .., 15 of global buffer
|
||||
// search_motion_vector_buffer with
|
||||
// offset index.
|
||||
// 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from
|
||||
// distSearch into
|
||||
// contiguous ushort locations 0, 1, .., 15 of search_residuals with
|
||||
// offset index.
|
||||
|
||||
short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]);
|
||||
motion_vector_buffer[index] = val;
|
||||
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
residuals[index] = distSearch[get_local_id(0)];
|
||||
}
|
||||
}
|
||||
|
||||
// 8x8
|
||||
else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) {
|
||||
// Only 1st 4 work-item are needed.
|
||||
if (get_local_id(0) < 4) {
|
||||
int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width);
|
||||
|
||||
// 1. 4 work-items enabled.
|
||||
// 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24
|
||||
// (interleaved
|
||||
// fwd/bdw MVs) with constant offset 8 from SLM into contiguous
|
||||
// short2 locations
|
||||
// 0, 1, .., 15 of global buffer search_motion_vector_buffer with
|
||||
// offset index.
|
||||
// 3. Work-items gather strided ushort locations 0, 4, 8, 12 from
|
||||
// distSearch into
|
||||
// contiguous ushort locations 0, 1, .., 15 of search_residuals
|
||||
// with offset index.
|
||||
|
||||
short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]);
|
||||
motion_vector_buffer[index] = val;
|
||||
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
residuals[index] = distSearch[get_local_id(0) * 4];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 16x16
|
||||
else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) {
|
||||
// One 1st work is needed.
|
||||
if (get_local_id(0) == 0) {
|
||||
int index = gid_0 + gid_1 * width;
|
||||
|
||||
// 1. 1 work-item enabled.
|
||||
// 2. Work-item gathers fwd MV in dword location 0 with constant
|
||||
// offset 8 from
|
||||
// SLM into short2 locations 0 of global buffer
|
||||
// search_motion_vector_buffer.
|
||||
// 3. Work-item gathers ushort location 0 from distSearch into ushort
|
||||
// location 0 of search_residuals with offset index.
|
||||
|
||||
short2 val = as_short2(dstSearch[8]);
|
||||
motion_vector_buffer[index] = val;
|
||||
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
residuals[index] = distSearch[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write out motion skip check result:
|
||||
// Result format
|
||||
// Hierarchical row-major layout
|
||||
// i.e. row-major of blocks in MBs, and row-major of 8 sets of
|
||||
// distortions in blocks
|
||||
|
||||
if (countSkipMVs != 0) {
|
||||
if (skip_block_type == 0x0) {
|
||||
// Copy out 8 (1 component) sets of distortion values.
|
||||
|
||||
int index = (gid_0 * 8) + (get_local_id(0)) + (gid_1 * 8 * width);
|
||||
|
||||
if (get_local_id(0) < countSkipMVs) {
|
||||
__local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
|
||||
|
||||
// 1. Up to 8 work-items are enabled.
|
||||
// 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and
|
||||
// copies them to contiguous skip_residual locations 0, 1, 2, ..,
|
||||
// 7.
|
||||
skip_residuals[index] = distSkip[get_local_id(0) * 16];
|
||||
}
|
||||
} else {
|
||||
// Copy out 8 (4 component) sets of distortion values.
|
||||
|
||||
int index =
|
||||
(gid_0 * 8 * 4) + (get_local_id(0)) + (gid_1 * 8 * 4 * width);
|
||||
|
||||
__local ushort *distSkip = (__local ushort *)&dstSkipIntra[0];
|
||||
|
||||
if (get_local_id(0) < countSkipMVs * 4) {
|
||||
// 1. Up to 16 work-items are enabled.
|
||||
// 2. The work-item gathers distSkip locations 0, 4*1, .., 4*31 and
|
||||
// copies them to contiguous skip_residual locations 0, 1, 2, ..,
|
||||
// 31.
|
||||
|
||||
skip_residuals[index] = distSkip[get_local_id(0) * 4];
|
||||
skip_residuals[index + 16] = distSkip[(get_local_id(0) + 16) * 4];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write out intra search result:
|
||||
|
||||
if (doIntra) {
|
||||
|
||||
int index_low =
|
||||
(gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width);
|
||||
int index_high =
|
||||
(gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width);
|
||||
|
||||
// Write out the 4x4 intra modes
|
||||
if (get_local_id(0) < 8) {
|
||||
__local char *dstIntra_4x4 =
|
||||
(__local char *)(&dstSkipIntra[64 + 16 + 4]);
|
||||
char value = dstIntra_4x4[get_local_id(0)];
|
||||
char value_low = (value)&0xf;
|
||||
char value_high = (value >> 4) & 0xf;
|
||||
intra_search_predictor_modes[index_low + 5] = value_low;
|
||||
intra_search_predictor_modes[index_high + 5] = value_high;
|
||||
}
|
||||
|
||||
// Write out the 8x8 intra modes
|
||||
if (get_local_id(0) < 4) {
|
||||
__local char *dstIntra_8x8 =
|
||||
(__local char *)(&dstSkipIntra[64 + 8 + 4]);
|
||||
char value = dstIntra_8x8[get_local_id(0) * 2];
|
||||
char value_low = (value)&0xf;
|
||||
int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width);
|
||||
intra_search_predictor_modes[index + 1] = value_low;
|
||||
}
|
||||
|
||||
// Write out the 16x16 intra modes
|
||||
if (get_local_id(0) < 1) {
|
||||
__local char *dstIntra_16x16 =
|
||||
(__local char *)(&dstSkipIntra[64 + 0 + 4]);
|
||||
char value = dstIntra_16x16[get_local_id(0)];
|
||||
char value_low = (value)&0xf;
|
||||
intra_search_predictor_modes[index_low] = value_low;
|
||||
}
|
||||
|
||||
// Get the intra residuals.
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (intra_residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
int index = (gid_0 * 4) + (gid_1 * 4 * width);
|
||||
|
||||
if (get_local_id(0) < 1) {
|
||||
__local ushort *distIntra_4x4 = (__local ushort *)(&dstSkipIntra[64 + 16 + 3]);
|
||||
__local ushort *distIntra_8x8 = (__local ushort *)(&dstSkipIntra[64 + 8 + 3]);
|
||||
__local ushort *distIntra_16x16 = (__local ushort *)(&dstSkipIntra[64 + 0 + 3]);
|
||||
intra_residuals[index + 2] = distIntra_4x4[0];
|
||||
intra_residuals[index + 1] = distIntra_8x8[0];
|
||||
intra_residuals[index + 0] = distIntra_16x16[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
|
||||
block_advanced_motion_estimate_check_intel(
|
||||
sampler_t accelerator, __read_only image2d_t srcImg,
|
||||
__read_only image2d_t refImg, uint flags, uint skip_block_type,
|
||||
uint search_cost_penalty, uint search_cost_precision,
|
||||
__global short2 *count_motion_vector_buffer,
|
||||
__global short2 *predictors_buffer,
|
||||
__global short2 *skip_motion_vector_buffer,
|
||||
__global short2 *motion_vector_buffer,
|
||||
__global char *intra_search_predictor_modes, __global ushort *residuals,
|
||||
__global ushort *skip_residuals, __global ushort *intra_residuals) {
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
|
||||
block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg,
|
||||
__read_only image2d_t refImg,
|
||||
__global short2 *prediction_motion_vector_buffer,
|
||||
__global short2 *motion_vector_buffer,
|
||||
__global ushort *residuals, int height, int width,
|
||||
int stride) {
|
||||
__local uint dst[64];
|
||||
__local ushort *dist = (__local ushort *)&dst[8 * 5];
|
||||
|
||||
int sid_0 = stride * get_group_id(0);
|
||||
int gid_0 = sid_0 / height;
|
||||
int gid_1 = sid_0 % height;
|
||||
for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height;
|
||||
sid++, gid_0 = sid / height, gid_1 = sid % height) {
|
||||
int2 srcCoord = 0;
|
||||
int2 refCoord = 0;
|
||||
|
||||
srcCoord.x = gid_0 * 16 + get_global_offset(0);
|
||||
srcCoord.y = gid_1 * 16 + get_global_offset(1);
|
||||
|
||||
short2 predMV = 0;
|
||||
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (prediction_motion_vector_buffer != NULL)
|
||||
#endif
|
||||
{
|
||||
predMV = prediction_motion_vector_buffer[gid_0 + gid_1 * width];
|
||||
refCoord.x = predMV.x / 4;
|
||||
refCoord.y = predMV.y / 4;
|
||||
refCoord.y = refCoord.y & 0xFFFE;
|
||||
}
|
||||
|
||||
{
|
||||
intel_work_group_vme_mb_query(dst, srcCoord, refCoord, srcImg, refImg,
|
||||
accelerator);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// Write Out Result
|
||||
|
||||
// 4x4
|
||||
if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) {
|
||||
int x = get_local_id(0) % 4;
|
||||
int y = get_local_id(0) / 4;
|
||||
int index = (gid_0 * 4 + x) + (gid_1 * 4 + y) * width * 4;
|
||||
|
||||
short2 val = as_short2(dst[8 + (y * 4 + x) * 2]);
|
||||
motion_vector_buffer[index] = val;
|
||||
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
residuals[index] = dist[y * 4 + x];
|
||||
}
|
||||
}
|
||||
|
||||
// 8x8
|
||||
if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) {
|
||||
if (get_local_id(0) < 4) {
|
||||
int x = get_local_id(0) % 2;
|
||||
int y = get_local_id(0) / 2;
|
||||
int index = (gid_0 * 2 + x) + (gid_1 * 2 + y) * width * 2;
|
||||
short2 val = as_short2(dst[8 + (y * 2 + x) * 8]);
|
||||
motion_vector_buffer[index] = val;
|
||||
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
residuals[index] = dist[(y * 2 + x) * 4];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 16x16
|
||||
if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) {
|
||||
if (get_local_id(0) == 0) {
|
||||
int index = gid_0 + gid_1 * width;
|
||||
|
||||
short2 val = as_short2(dst[8]);
|
||||
motion_vector_buffer[index] = val;
|
||||
|
||||
#ifndef HW_NULL_CHECK
|
||||
if (residuals != NULL)
|
||||
#endif
|
||||
{
|
||||
residuals[index] = dist[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)==="
|
||||
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void
|
||||
block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg,
|
||||
__read_only image2d_t refImg,
|
||||
__global short2 *prediction_motion_vector_buffer,
|
||||
__global short2 *motion_vector_buffer,
|
||||
__global ushort *residuals) {
|
||||
}
|
||||
)==="
|
||||
40
runtime/built_ins/registry/CMakeLists.txt
Normal file
40
runtime/built_ins/registry/CMakeLists.txt
Normal file
@@ -0,0 +1,40 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
set (NEO_REGISTERED_BUILTINS_SOURCES
|
||||
register_copy_kernels_source.cpp
|
||||
register_ext_vme_source.cpp
|
||||
CMakeLists.txt
|
||||
)
|
||||
|
||||
add_library(${BUILTINS_SOURCES_LIB_NAME} OBJECT
|
||||
${NEO_REGISTERED_BUILTINS_SOURCES}
|
||||
)
|
||||
|
||||
set_target_properties(${BUILTINS_SOURCES_LIB_NAME}
|
||||
PROPERTIES POSITION_INDEPENDENT_CODE ON
|
||||
)
|
||||
|
||||
target_include_directories(${BUILTINS_SOURCES_LIB_NAME} PRIVATE
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${UMKM_SHAREDDATA_INCLUDE_PATHS}
|
||||
${IGDRCL__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
42
runtime/built_ins/registry/built_ins_registry.h
Normal file
42
runtime/built_ins/registry/built_ins_registry.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
struct RegisterEmbeddedResource {
|
||||
RegisterEmbeddedResource(const char *name, const char *resource, size_t resourceLength) {
|
||||
auto &storageRegistry = EmbeddedStorageRegistry::getInstance();
|
||||
storageRegistry.store(name, createBuiltinResource(resource, resourceLength));
|
||||
}
|
||||
|
||||
RegisterEmbeddedResource(const char *name, std::string &&resource)
|
||||
: RegisterEmbeddedResource(name, resource.data(), resource.size() + 1) {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OCLRT
|
||||
127
runtime/built_ins/registry/register_copy_kernels_source.cpp
Normal file
127
runtime/built_ins/registry/register_copy_kernels_source.cpp
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include "runtime/built_ins/registry/built_ins_registry.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToBuffer,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/copy_buffer_to_buffer.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferRectSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferRect,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/copy_buffer_rect.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillBuffer,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/fill_buffer.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToImage3d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/copy_buffer_to_image3d.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImage3dToBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImage3dToBuffer,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/copy_image3d_to_buffer.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImageToImage1dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImageToImage1d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/copy_image_to_image1d.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImageToImage2dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImageToImage2d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/copy_image_to_image2d.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImageToImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImageToImage3d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/copy_image_to_image3d.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillImage1dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillImage1d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/fill_image1d.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillImage2dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillImage2d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/fill_image2d.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillImage3d,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/fill_image3d.igdrcl_built_in"
|
||||
));
|
||||
|
||||
} // namespace OCLRT
|
||||
55
runtime/built_ins/registry/register_ext_vme_source.cpp
Normal file
55
runtime/built_ins/registry/register_ext_vme_source.cpp
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include "runtime/built_ins/registry/built_ins_registry.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
static RegisterEmbeddedResource registerVmeSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::VmeBlockMotionEstimateIntel,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/vme_block_motion_estimate_intel.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerVmeAdvancedSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/vme_block_advanced_motion_estimate_check_intel.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerVmeAdvancedBidirectionalSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.igdrcl_built_in"
|
||||
));
|
||||
|
||||
} // namespace OCLRT
|
||||
81
runtime/built_ins/sip.cpp
Normal file
81
runtime/built_ins/sip.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/built_ins/sip.h"
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
const char *getSipKernelCompilerInternalOptions(SipKernelType kernel) {
|
||||
switch (kernel) {
|
||||
default:
|
||||
DEBUG_BREAK_IF(true);
|
||||
return "";
|
||||
case SipKernelType::Csr:
|
||||
return "-cl-include-sip-csr";
|
||||
}
|
||||
}
|
||||
|
||||
const char *getSipLlSrc(const Device &device) {
|
||||
#define M_DUMMY_LL_SRC \
|
||||
"define void @f() { \n" \
|
||||
" ret void \n" \
|
||||
"} \n" \
|
||||
"!opencl.compiler.options = !{!0} \n" \
|
||||
"!opencl.kernels = !{!1} \n" \
|
||||
"!0 = !{} \n" \
|
||||
"!1 = !{void()* @f, !2, !3, !4, !5, !6, !7} \n" \
|
||||
"!2 = !{!\"kernel_arg_addr_space\"} \n" \
|
||||
"!3 = !{!\"kernel_arg_access_qual\"} \n" \
|
||||
"!4 = !{!\"kernel_arg_type\"} \n" \
|
||||
"!5 = !{!\"kernel_arg_type_qual\"} \n" \
|
||||
"!6 = !{!\"kernel_arg_base_type\"} \n" \
|
||||
"!7 = !{!\"kernel_arg_name\"} \n"
|
||||
|
||||
constexpr const char *llDummySrc32 =
|
||||
"target datalayout = \"e-p:32:32:32\" \n"
|
||||
"target triple = \"spir\" \n" M_DUMMY_LL_SRC;
|
||||
|
||||
constexpr const char *llDummySrc64 =
|
||||
"target datalayout = \"e-p:64:64:64\" \n"
|
||||
"target triple = \"spir64\" \n" M_DUMMY_LL_SRC;
|
||||
|
||||
#undef M_DUMMY_LL_SRC
|
||||
|
||||
const uint32_t ptrSize = device.getDeviceInfo().force32BitAddressess ? 4 : sizeof(void *);
|
||||
|
||||
return (ptrSize == 8) ? llDummySrc64 : llDummySrc32;
|
||||
}
|
||||
|
||||
SipKernel::SipKernel(SipKernelType type, const void *binary, size_t binarySize)
|
||||
: type(type) {
|
||||
|
||||
UNRECOVERABLE_IF(binary == nullptr);
|
||||
UNRECOVERABLE_IF(binarySize == 0);
|
||||
|
||||
this->binary.reset(new char[binarySize]);
|
||||
memcpy_s(this->binary.get(), binarySize, binary, binarySize);
|
||||
this->binarySize = binarySize;
|
||||
}
|
||||
}
|
||||
61
runtime/built_ins/sip.h
Normal file
61
runtime/built_ins/sip.h
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cinttypes>
|
||||
#include <memory>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
class Device;
|
||||
|
||||
enum class SipKernelType : std::uint32_t {
|
||||
Csr = 0,
|
||||
COUNT
|
||||
};
|
||||
|
||||
const char *getSipKernelCompilerInternalOptions(SipKernelType kernel);
|
||||
|
||||
const char *getSipLlSrc(const Device &device);
|
||||
|
||||
class SipKernel {
|
||||
public:
|
||||
SipKernel(SipKernelType type, const void *binary, size_t binarySize);
|
||||
SipKernel(const SipKernel &) = delete;
|
||||
SipKernel &operator=(const SipKernel &) = delete;
|
||||
SipKernel(SipKernel &&) = default;
|
||||
SipKernel &operator=(SipKernel &&) = default;
|
||||
|
||||
const char *getBinary() const {
|
||||
return binary.get();
|
||||
}
|
||||
size_t getBinarySize() const {
|
||||
return binarySize;
|
||||
}
|
||||
|
||||
protected:
|
||||
SipKernelType type = SipKernelType::COUNT;
|
||||
std::unique_ptr<char[]> binary = nullptr;
|
||||
size_t binarySize = 0;
|
||||
};
|
||||
}
|
||||
492
runtime/built_ins/vme_dispatch_builder.h
Normal file
492
runtime/built_ins/vme_dispatch_builder.h
Normal file
@@ -0,0 +1,492 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/accelerators/intel_accelerator.h"
|
||||
#include "runtime/accelerators/intel_motion_estimation.h"
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/helpers/dispatch_info_builder.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
|
||||
namespace OCLRT {
|
||||
template <typename HWFamily>
|
||||
class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
VmeBuiltinDispatchInfoBuilder(BuiltIns &kernelsLib, Context &context, Device &device, EBuiltInOps builtinOp,
|
||||
const char *kernelName)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib) {
|
||||
populate(context, device, builtinOp,
|
||||
mediaKernelsBuildOptions,
|
||||
kernelName, vmeKernel);
|
||||
widthArgNum = vmeKernel->getKernelInfo().getArgNumByName("width");
|
||||
heightArgNum = vmeKernel->getKernelInfo().getArgNumByName("height");
|
||||
strideArgNum = vmeKernel->getKernelInfo().getArgNumByName("stride");
|
||||
acceleratorArgNum = vmeKernel->getKernelInfo().getArgNumByName("accelerator");
|
||||
srcImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("srcImg");
|
||||
refImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("refImg");
|
||||
motionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("motion_vector_buffer");
|
||||
predictionMotionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("prediction_motion_vector_buffer");
|
||||
residualsArgNum = vmeKernel->getKernelInfo().getArgNumByName("residuals");
|
||||
}
|
||||
|
||||
void getBlkTraits(const Vec3<size_t> &inGws, size_t &gwWidthInBlk, size_t &gwHeightInBlk) const {
|
||||
const size_t vmeMacroBlockWidth = 16;
|
||||
const size_t vmeMacroBlockHeight = 16;
|
||||
gwWidthInBlk = (inGws.x + vmeMacroBlockWidth - 1) / vmeMacroBlockWidth;
|
||||
gwHeightInBlk = (inGws.y + vmeMacroBlockHeight - 1) / vmeMacroBlockHeight;
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, Kernel *kern,
|
||||
const uint32_t inDim, const Vec3<size_t> &inGws, const Vec3<size_t> &inLws, const Vec3<size_t> &inOffset) const override {
|
||||
if (kern == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t gwWidthInBlk = 0;
|
||||
size_t gwHeightInBlk = 0;
|
||||
getBlkTraits(inGws, gwWidthInBlk, gwHeightInBlk);
|
||||
|
||||
cl_int height = (cl_int)gwHeightInBlk;
|
||||
cl_int width = (cl_int)gwWidthInBlk;
|
||||
cl_int stride = height;
|
||||
size_t numThreadsX = gwWidthInBlk;
|
||||
const size_t simdWidth = vmeKernel->getKernelInfo().getMaxSimdSize();
|
||||
stride = (height * width + (cl_int)numThreadsX - 1) / (cl_int)numThreadsX;
|
||||
|
||||
// update implicit args
|
||||
vmeKernel->setArg(heightArgNum, sizeof(height), &height);
|
||||
vmeKernel->setArg(widthArgNum, sizeof(width), &width);
|
||||
vmeKernel->setArg(strideArgNum, sizeof(stride), &stride);
|
||||
|
||||
// Update global work size to force macro-block to HW thread execution model
|
||||
Vec3<size_t> gws = {numThreadsX * simdWidth, 1, 1};
|
||||
Vec3<size_t> lws = {vmeKernel->getKernelInfo().reqdWorkGroupSize[0], 1, 1};
|
||||
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d2D, SplitDispatch::SplitMode::NoSplit> builder;
|
||||
builder.setDispatchGeometry(gws, lws, inOffset, gws, lws);
|
||||
builder.setKernel(vmeKernel);
|
||||
builder.bake(multiDispatchInfo);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override {
|
||||
DEBUG_BREAK_IF(!((argIndex != widthArgNum) && (argIndex != heightArgNum) && (argIndex != strideArgNum)));
|
||||
if ((argIndex == acceleratorArgNum) && (argVal == nullptr)) {
|
||||
err = CL_INVALID_ACCELERATOR_INTEL;
|
||||
return false;
|
||||
}
|
||||
err = vmeKernel->setArg(argIndex, argSize, argVal);
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_int validateDispatch(Kernel *kernel, uint32_t inworkDim, const Vec3<size_t> &inGws, const Vec3<size_t> &inLws, const Vec3<size_t> &inOffset) const override {
|
||||
if (inworkDim != 2) {
|
||||
return CL_INVALID_WORK_DIMENSION;
|
||||
}
|
||||
|
||||
size_t gwWidthInBlk = 0;
|
||||
size_t gwHeightInBlk = 0;
|
||||
getBlkTraits(inGws, gwWidthInBlk, gwHeightInBlk);
|
||||
|
||||
size_t BlkNum = gwWidthInBlk * gwHeightInBlk;
|
||||
size_t BlkMul = 1;
|
||||
IntelAccelerator *accelerator = castToObject<IntelAccelerator>((cl_accelerator_intel)vmeKernel->getKernelArg(acceleratorArgNum));
|
||||
if (accelerator == nullptr) {
|
||||
return CL_INVALID_KERNEL_ARGS; // accelerator was not set
|
||||
}
|
||||
DEBUG_BREAK_IF(accelerator->getDescriptorSize() != sizeof(cl_motion_estimation_desc_intel));
|
||||
const cl_motion_estimation_desc_intel *acceleratorDesc = reinterpret_cast<const cl_motion_estimation_desc_intel *>(accelerator->getDescriptor());
|
||||
switch (acceleratorDesc->mb_block_type) {
|
||||
case CL_ME_MB_TYPE_8x8_INTEL:
|
||||
BlkMul = 4;
|
||||
break;
|
||||
case CL_ME_MB_TYPE_4x4_INTEL:
|
||||
BlkMul = 16;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return validateVmeDispatch(inGws, inOffset, BlkNum, BlkMul);
|
||||
}
|
||||
|
||||
// notes on corner cases :
|
||||
// * if arg not available in kernels - returns true
|
||||
// * if arg set to nullptr - returns true
|
||||
bool validateBufferSize(int32_t bufferArgNum, size_t minimumSizeExpected) const {
|
||||
if (bufferArgNum == -1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto buff = castToObject<Buffer>((cl_mem)vmeKernel->getKernelArg(bufferArgNum));
|
||||
if (buff == nullptr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t bufferSize = buff->getSize();
|
||||
if (bufferSize < minimumSizeExpected) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename EnumBaseType>
|
||||
bool validateEnumVal(EnumBaseType val) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename EnumBaseType, typename ExpectedValType, typename... ExpectedValsTypes>
|
||||
bool validateEnumVal(EnumBaseType val, ExpectedValType expectedVal, ExpectedValsTypes... expVals) const {
|
||||
return (val == static_cast<EnumBaseType>(expectedVal)) || validateEnumVal<EnumBaseType, ExpectedValsTypes...>(val, expVals...);
|
||||
}
|
||||
|
||||
// notes on corner cases :
|
||||
// * if arg not available in kernels - returns true
|
||||
template <typename EnumBaseType, typename... ExpectedValsTypes>
|
||||
bool validateEnumArg(int32_t argNum, ExpectedValsTypes... expVals) const {
|
||||
if (argNum == -1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
EnumBaseType val = this->getKernelArgByValValue<EnumBaseType>(static_cast<uint32_t>(argNum));
|
||||
return validateEnumVal<EnumBaseType, ExpectedValsTypes...>(val, expVals...);
|
||||
}
|
||||
|
||||
template <typename RetType>
|
||||
RetType getKernelArgByValValue(uint32_t argNum) const {
|
||||
auto &kai = vmeKernel->getKernelInfo().kernelArgInfo[argNum];
|
||||
DEBUG_BREAK_IF(kai.kernelArgPatchInfoVector.size() != 1);
|
||||
const KernelArgPatchInfo &patchInfo = kai.kernelArgPatchInfoVector[0];
|
||||
DEBUG_BREAK_IF(sizeof(RetType) > patchInfo.size);
|
||||
return *(RetType *)(vmeKernel->getCrossThreadData() + patchInfo.crossthreadOffset);
|
||||
}
|
||||
|
||||
cl_int validateImages(Vec3<size_t> inputRegion, Vec3<size_t> offset) const {
|
||||
Image *srcImg = castToObject<Image>((cl_mem)vmeKernel->getKernelArg(srcImgArgNum));
|
||||
Image *refImg = castToObject<Image>((cl_mem)vmeKernel->getKernelArg(refImgArgNum));
|
||||
|
||||
if ((srcImg == nullptr) || (refImg == nullptr)) {
|
||||
return CL_INVALID_KERNEL_ARGS;
|
||||
}
|
||||
|
||||
for (Image *img : {srcImg, refImg}) {
|
||||
const cl_image_format &imgFormat = img->getImageFormat();
|
||||
if ((imgFormat.image_channel_order != CL_R) || (imgFormat.image_channel_data_type != CL_UNORM_INT8)) {
|
||||
return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
|
||||
}
|
||||
|
||||
if (false == img->isTiledImage) {
|
||||
//VME only works with tiled images.
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const cl_image_desc &srcImgDesc = srcImg->getImageDesc();
|
||||
|
||||
size_t srcImageWidth = srcImgDesc.image_width;
|
||||
size_t srcImageHeight = srcImgDesc.image_height;
|
||||
if (((inputRegion.x + offset.x) > srcImageWidth) ||
|
||||
((inputRegion.y + offset.y) > srcImageHeight)) {
|
||||
return CL_INVALID_IMAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset, size_t blkNum, size_t blkMul) const {
|
||||
{
|
||||
cl_int imageValidationStatus = validateImages(inputRegion, offset);
|
||||
if (imageValidationStatus != CL_SUCCESS) {
|
||||
return imageValidationStatus;
|
||||
}
|
||||
}
|
||||
|
||||
size_t numPredictors = 1;
|
||||
std::pair<int32_t, size_t> bufferRequirements[] = {
|
||||
std::make_pair(motionVectorBufferArgNum, (blkNum * blkMul * 2 * sizeof(cl_short))),
|
||||
std::make_pair(predictionMotionVectorBufferArgNum, (blkNum * numPredictors * 2 * sizeof(cl_short))),
|
||||
std::make_pair(residualsArgNum, (blkNum * blkMul * sizeof(cl_ushort)))};
|
||||
for (const auto &req : bufferRequirements) {
|
||||
if (false == validateBufferSize(req.first, req.second)) {
|
||||
return CL_INVALID_BUFFER_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
protected:
|
||||
uint32_t heightArgNum;
|
||||
uint32_t widthArgNum;
|
||||
uint32_t strideArgNum;
|
||||
uint32_t acceleratorArgNum;
|
||||
uint32_t srcImgArgNum;
|
||||
uint32_t refImgArgNum;
|
||||
int32_t motionVectorBufferArgNum;
|
||||
int32_t predictionMotionVectorBufferArgNum;
|
||||
int32_t residualsArgNum;
|
||||
Kernel *vmeKernel;
|
||||
};
|
||||
|
||||
template <typename HWFamily>
|
||||
class BuiltInOp<HWFamily, EBuiltInOps::VmeBlockMotionEstimateIntel> : public VmeBuiltinDispatchInfoBuilder<HWFamily> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: VmeBuiltinDispatchInfoBuilder<HWFamily>(kernelsLib, context, device,
|
||||
EBuiltInOps::VmeBlockMotionEstimateIntel, "block_motion_estimate_intel") {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename HWFamily>
|
||||
class AdvancedVmeBuiltinDispatchInfoBuilder : public VmeBuiltinDispatchInfoBuilder<HWFamily> {
|
||||
public:
|
||||
AdvancedVmeBuiltinDispatchInfoBuilder(BuiltIns &kernelsLib, Context &context, Device &device, EBuiltInOps builtinOp,
|
||||
const char *kernelName)
|
||||
: VmeBuiltinDispatchInfoBuilder<HWFamily>(kernelsLib, context, device, builtinOp,
|
||||
kernelName) {
|
||||
flagsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("flags");
|
||||
intraSrcImgArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intraSrcImg");
|
||||
skipBlockTypeArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_block_type");
|
||||
searchCostPenaltyArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_penalty");
|
||||
searchCostPrecisionArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_precision");
|
||||
bidirWeightArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("bidir_weight");
|
||||
predictorsBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("predictors_buffer");
|
||||
countMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("count_motion_vector_buffer");
|
||||
skipMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_motion_vector_buffer");
|
||||
intraSearchPredictorModesArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_search_predictor_modes");
|
||||
skipResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_residuals");
|
||||
intraResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_residuals");
|
||||
}
|
||||
|
||||
bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override {
|
||||
DEBUG_BREAK_IF(argIndex == intraSrcImgArgNum);
|
||||
if (argIndex == this->srcImgArgNum) {
|
||||
// rebind also as media block image
|
||||
this->vmeKernel->setArg(intraSrcImgArgNum, argSize, argVal);
|
||||
}
|
||||
return VmeBuiltinDispatchInfoBuilder<HWFamily>::setExplicitArg(argIndex, argSize, argVal, err);
|
||||
}
|
||||
|
||||
virtual bool isBidirKernel() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool validateFlags(uint32_t &outSkipBlockType) const {
|
||||
uint32_t flagsVal = VmeBuiltinDispatchInfoBuilder<HWFamily>::template getKernelArgByValValue<uint32_t>(flagsArgNum);
|
||||
|
||||
if ((flagsVal & CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL) == CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (flagsVal == CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL) {
|
||||
outSkipBlockType = CL_ME_MB_TYPE_16x16_INTEL;
|
||||
} else if ((flagsVal & CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL) == CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL) {
|
||||
outSkipBlockType = CL_ME_MB_TYPE_8x8_INTEL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool validateSkipBlockTypeArg(uint32_t &outSkipBlockType) const {
|
||||
if (skipBlockTypeArgNum == -1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
outSkipBlockType = VmeBuiltinDispatchInfoBuilder<HWFamily>::template getKernelArgByValValue<uint32_t>(static_cast<uint32_t>(skipBlockTypeArgNum));
|
||||
|
||||
switch (outSkipBlockType) {
|
||||
case CL_ME_MB_TYPE_16x16_INTEL:
|
||||
break;
|
||||
case CL_ME_MB_TYPE_8x8_INTEL:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t getIntraSearchPredictorModesBuffExpSize(size_t blkNum) const {
|
||||
// vector size is 22 - 1 (16x16 luma block) + 4 (8x8 luma block) + 16 (4x4 luma block) + 1 (8x8 chroma block)
|
||||
int VectorSize = 22;
|
||||
size_t intraSearchPredictorModesBuffExpSize = blkNum * VectorSize;
|
||||
return intraSearchPredictorModesBuffExpSize;
|
||||
}
|
||||
|
||||
size_t getSkipMotionVectorBufferExpSize(uint32_t skipBlockType, size_t blkNum) const {
|
||||
// vector size is either 1 (16x16 block) or 4 (8x8 block)
|
||||
// 0 to 8 skip MVs per MB
|
||||
// may be null if all MBs in frame have 0 skip check MVs in which case VME skip checks are not performed
|
||||
// layout assumes 4 (for bidir) or 8 (otherwise) skip check MVs per MB
|
||||
// row-major block layout; all MVs for a block are contiguous
|
||||
// buffer size depends on the block and frame size .
|
||||
int vectorSize = (skipBlockType == CL_ME_MB_TYPE_16x16_INTEL) ? 1 : 4;
|
||||
int numChecks = (isBidirKernel() ? 4 : 8);
|
||||
size_t skipMotionVectorBufferExpSize = blkNum * numChecks * vectorSize * 2 * sizeof(cl_short);
|
||||
return skipMotionVectorBufferExpSize;
|
||||
}
|
||||
|
||||
size_t getSkipResidualsBuffExpSize(uint32_t skipBlockType, size_t blkNum) const {
|
||||
/* output buffer of vectors of unsigned short SAD adjusted values corresponding to the input skip check MVs
|
||||
may be null if skip_motion_vector_buffer is null
|
||||
vector size is either 1 (16x16 block) or 4 (8x8 block)
|
||||
0 to 8 skip check residuals per MB
|
||||
layout always assumes 8 skip check residuals per MB
|
||||
row major block layout; all MVs for a block are contiguous
|
||||
buffer size depends on the block and frame size */
|
||||
int vectorSize = 1;
|
||||
switch (skipBlockType) {
|
||||
case CL_ME_MB_TYPE_16x16_INTEL:
|
||||
vectorSize = 1;
|
||||
break;
|
||||
case CL_ME_MB_TYPE_8x8_INTEL:
|
||||
vectorSize = 4;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
};
|
||||
|
||||
int numChecks = (isBidirKernel() ? 4 : 8);
|
||||
size_t skipResidualsBuffExpSize = blkNum * vectorSize * numChecks * sizeof(cl_ushort);
|
||||
return skipResidualsBuffExpSize;
|
||||
}
|
||||
|
||||
size_t getIntraResidualsBuffExpSize(size_t blkNum) const {
|
||||
/* output buffer of vectors of unsigned short SAD adjusted values
|
||||
may be null in which case the intra residuals corresponding not returned
|
||||
vector size is 4 - 1 (16x16 luma block) + 1 (8x8 luma block) + 1 (4x4 luma block) + 1 (8x8 chroma block)
|
||||
1 vector per MB
|
||||
buffer size depends on the frame size */
|
||||
int vectorSize = 4;
|
||||
size_t intraResidualsBuffExpSize = (blkNum * sizeof(cl_ushort) * vectorSize);
|
||||
return intraResidualsBuffExpSize;
|
||||
}
|
||||
|
||||
size_t getPredictorsBufferExpSize(size_t blkNum) const {
|
||||
size_t numPredictors = 8;
|
||||
size_t predictorsBufferExpSize = (blkNum * numPredictors * 2 * sizeof(cl_short));
|
||||
return predictorsBufferExpSize;
|
||||
}
|
||||
|
||||
cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset, size_t blkNum, size_t blkMul) const override {
|
||||
cl_int basicVmeValidationStatus = VmeBuiltinDispatchInfoBuilder<HWFamily>::validateVmeDispatch(inputRegion, offset, blkNum, blkMul);
|
||||
if (basicVmeValidationStatus != CL_SUCCESS) {
|
||||
return basicVmeValidationStatus;
|
||||
}
|
||||
|
||||
uint32_t skipBlockType = CL_ME_MB_TYPE_16x16_INTEL;
|
||||
if (false == validateFlags(skipBlockType)) {
|
||||
return CL_INVALID_KERNEL_ARGS;
|
||||
}
|
||||
|
||||
if (false == validateSkipBlockTypeArg(skipBlockType)) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
if (false == VmeBuiltinDispatchInfoBuilder<HWFamily>::template validateEnumArg<uint32_t>(searchCostPenaltyArgNum, CL_ME_COST_PENALTY_NONE_INTEL, CL_ME_COST_PENALTY_LOW_INTEL, CL_ME_COST_PENALTY_NORMAL_INTEL,
|
||||
CL_ME_COST_PENALTY_HIGH_INTEL)) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
if (false == VmeBuiltinDispatchInfoBuilder<HWFamily>::template validateEnumArg<uint32_t>(searchCostPrecisionArgNum, CL_ME_COST_PRECISION_QPEL_INTEL, CL_ME_COST_PRECISION_HPEL_INTEL, CL_ME_COST_PRECISION_PEL_INTEL,
|
||||
CL_ME_COST_PRECISION_DPEL_INTEL)) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
if (false == VmeBuiltinDispatchInfoBuilder<HWFamily>::template validateEnumArg<uint8_t>(bidirWeightArgNum, 0, CL_ME_BIDIR_WEIGHT_QUARTER_INTEL, CL_ME_BIDIR_WEIGHT_THIRD_INTEL, CL_ME_BIDIR_WEIGHT_HALF_INTEL,
|
||||
CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL, CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL)) {
|
||||
return CL_INVALID_KERNEL_ARGS;
|
||||
}
|
||||
|
||||
std::pair<int32_t, size_t> bufferRequirements[] = {
|
||||
std::make_pair(countMotionVectorBufferArgNum, (blkNum * 2 * sizeof(cl_short))),
|
||||
std::make_pair(skipMotionVectorBufferArgNum, getSkipMotionVectorBufferExpSize(skipBlockType, blkNum)),
|
||||
std::make_pair(intraSearchPredictorModesArgNum, getIntraSearchPredictorModesBuffExpSize(blkNum)),
|
||||
std::make_pair(skipResidualsArgNum, getSkipResidualsBuffExpSize(skipBlockType, blkNum)),
|
||||
std::make_pair(intraResidualsArgNum, getIntraResidualsBuffExpSize(blkNum)),
|
||||
std::make_pair(predictorsBufferArgNum, getPredictorsBufferExpSize(blkNum))};
|
||||
for (const auto &req : bufferRequirements) {
|
||||
if (false == this->validateBufferSize(req.first, req.second)) {
|
||||
return CL_INVALID_BUFFER_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
protected:
|
||||
uint32_t flagsArgNum;
|
||||
int32_t skipBlockTypeArgNum;
|
||||
uint32_t searchCostPenaltyArgNum;
|
||||
uint32_t searchCostPrecisionArgNum;
|
||||
int32_t bidirWeightArgNum;
|
||||
int32_t predictorsBufferArgNum;
|
||||
uint32_t countMotionVectorBufferArgNum;
|
||||
uint32_t skipMotionVectorBufferArgNum;
|
||||
uint32_t intraSearchPredictorModesArgNum;
|
||||
uint32_t skipResidualsArgNum;
|
||||
uint32_t intraResidualsArgNum;
|
||||
uint32_t intraSrcImgArgNum;
|
||||
};
|
||||
|
||||
template <typename HWFamily>
|
||||
class BuiltInOp<HWFamily, EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel> : public AdvancedVmeBuiltinDispatchInfoBuilder<HWFamily> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: AdvancedVmeBuiltinDispatchInfoBuilder<HWFamily>(kernelsLib, context, device, EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel,
|
||||
"block_advanced_motion_estimate_check_intel") {
|
||||
}
|
||||
|
||||
cl_int validateVmeDispatch(Vec3<size_t> inputRegion, Vec3<size_t> offset,
|
||||
size_t gwWidthInBlk, size_t gwHeightInBlk) const override {
|
||||
cl_int basicAdvVmeValidationStatus = AdvancedVmeBuiltinDispatchInfoBuilder<HWFamily>::validateVmeDispatch(inputRegion, offset, gwWidthInBlk, gwHeightInBlk);
|
||||
if (basicAdvVmeValidationStatus != CL_SUCCESS) {
|
||||
return basicAdvVmeValidationStatus;
|
||||
}
|
||||
|
||||
auto countMotionVectorBuff = castToObject<Buffer>((cl_mem)this->vmeKernel->getKernelArg(this->countMotionVectorBufferArgNum));
|
||||
if (countMotionVectorBuff == nullptr) {
|
||||
return CL_INVALID_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename HWFamily>
|
||||
class BuiltInOp<HWFamily, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel> : public AdvancedVmeBuiltinDispatchInfoBuilder<HWFamily> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: AdvancedVmeBuiltinDispatchInfoBuilder<HWFamily>(kernelsLib, context, device, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel,
|
||||
"block_advanced_motion_estimate_bidirectional_check_intel") {
|
||||
}
|
||||
|
||||
bool isBidirKernel() const override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
74
runtime/builtin_kernels_simulation/CMakeLists.txt
Normal file
74
runtime/builtin_kernels_simulation/CMakeLists.txt
Normal file
@@ -0,0 +1,74 @@
|
||||
# Copyright (c) 2017, Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
set(BUILTIN_KERNELS_SIMULATION_SRCS
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.h"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.inl"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.h"
|
||||
)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
|
||||
string(REPLACE "/WX" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REGEX REPLACE "-Werror[^ \t\n]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wsometimes-uninitialized" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wsign-compare" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wunused-variable" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
|
||||
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-everything")
|
||||
endif()
|
||||
|
||||
if(COMPILER_SUPPORTS_CXX11)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
elseif(COMPILER_SUPPORTS_CXX0X)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive -fPIC")
|
||||
endif()
|
||||
|
||||
ENABLE_WUD()
|
||||
|
||||
list (APPEND HEADER_INCLUDES ${IGDRCL_SOURCE_DIR}/runtime ${UMKM_SHAREDDATA_INCLUDE_PATHS})
|
||||
|
||||
|
||||
foreach(GEN_NUM RANGE ${MAX_GEN} 0 -1)
|
||||
GEN_CONTAINS_PLATFORMS("SUPPORTED" ${GEN_NUM} GENX_HAS_PLATFORMS)
|
||||
if(${GENX_HAS_PLATFORMS})
|
||||
list(APPEND DEFAULT_GEN_PLATFORMS_DEFITIONS DEFAULT_GEN${GEN_NUM}_PLATFORM=${DEFAULT_SUPPORTED_GEN${GEN_NUM}_PLATFORM})
|
||||
list (APPEND HEADER_INCLUDES ${IGDRCL_SOURCE_DIR}/runtime/gen${GEN_NUM})
|
||||
list (APPEND BUILTIN_KERNELS_SIMULATION_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/gen${GEN_NUM}/scheduler_simulation.cpp)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
add_library(${BIKSIM_LIB_NAME} OBJECT ${BUILTIN_KERNELS_SIMULATION_SRCS})
|
||||
target_include_directories(${BIKSIM_LIB_NAME} BEFORE PRIVATE ${HEADER_INCLUDES})
|
||||
target_include_directories(${BIKSIM_LIB_NAME} PRIVATE
|
||||
${KHRONOS_HEADERS_DIR}
|
||||
${IGDRCL__IGC_INCLUDE_DIR}
|
||||
${THIRD_PARTY_DIR}
|
||||
)
|
||||
set_target_properties(${BIKSIM_LIB_NAME} PROPERTIES FOLDER "built_ins")
|
||||
target_compile_definitions(${BIKSIM_LIB_NAME} PUBLIC ${SUPPORTED_GEN_FLAGS_DEFINITONS} ${DEFAULT_GEN_PLATFORMS_DEFITIONS})
|
||||
105
runtime/builtin_kernels_simulation/gen8/scheduler_simulation.cpp
Normal file
105
runtime/builtin_kernels_simulation/gen8/scheduler_simulation.cpp
Normal file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "runtime/builtin_kernels_simulation/opencl_c.h"
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.inl"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/gen8/hw_cmds.h"
|
||||
#include "runtime/execution_model/device_enqueue.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
using namespace BuiltinKernelsSimulation;
|
||||
|
||||
namespace Gen8SchedulerSimulation {
|
||||
|
||||
#define SCHEDULER_EMULATION
|
||||
|
||||
uint GetNextPowerof2(uint number);
|
||||
|
||||
float __intel__getProfilingTimerResolution() {
|
||||
return static_cast<float>(DEFAULT_GEN8_PLATFORM::hwInfo.capabilityTable.defaultProfilingTimerResolution);
|
||||
}
|
||||
|
||||
#include "runtime/gen8/device_enqueue.h"
|
||||
#include "runtime/gen8/scheduler_definitions.h"
|
||||
#include "runtime/gen8/scheduler_igdrcl_built_in.inl"
|
||||
#include "runtime/scheduler/scheduler.cl"
|
||||
}
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
template <>
|
||||
void SchedulerSimulation<BDWFamily>::startScheduler(uint32_t index,
|
||||
GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
|
||||
threadIDToLocalIDmap.insert(std::make_pair(std::this_thread::get_id(), index));
|
||||
|
||||
while (!conditionReady) {
|
||||
}
|
||||
|
||||
Gen8SchedulerSimulation::SchedulerParallel20((IGIL_CommandQueue *)queue->getUnderlyingBuffer(),
|
||||
(uint *)commandsStack->getUnderlyingBuffer(),
|
||||
(IGIL_EventPool *)eventsPool->getUnderlyingBuffer(),
|
||||
(uint *)secondaryBatchBuffer->getUnderlyingBuffer(),
|
||||
(char *)dsh->getUnderlyingBuffer(),
|
||||
(IGIL_KernelDataHeader *)reflectionSurface->getUnderlyingBuffer(),
|
||||
(uint *)queueStorageBuffer->getUnderlyingBuffer(),
|
||||
(char *)ssh->getUnderlyingBuffer(),
|
||||
debugQueue != nullptr ? (DebugDataBuffer *)debugQueue->getUnderlyingBuffer() : nullptr);
|
||||
}
|
||||
|
||||
template <>
|
||||
void SchedulerSimulation<BDWFamily>::patchGpGpuWalker(uint secondLevelBatchOffset,
|
||||
__global uint *secondaryBatchBuffer,
|
||||
uint interfaceDescriptorOffset,
|
||||
uint simdSize,
|
||||
uint totalLocalWorkSize,
|
||||
uint3 dimSize,
|
||||
uint3 startPoint,
|
||||
uint numberOfHwThreadsPerWg,
|
||||
uint indirectPayloadSize,
|
||||
uint ioHoffset) {
|
||||
Gen8SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset,
|
||||
secondaryBatchBuffer,
|
||||
interfaceDescriptorOffset,
|
||||
simdSize,
|
||||
totalLocalWorkSize,
|
||||
dimSize,
|
||||
startPoint,
|
||||
numberOfHwThreadsPerWg,
|
||||
indirectPayloadSize,
|
||||
ioHoffset);
|
||||
}
|
||||
|
||||
template class SchedulerSimulation<BDWFamily>;
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
||||
104
runtime/builtin_kernels_simulation/gen9/scheduler_simulation.cpp
Normal file
104
runtime/builtin_kernels_simulation/gen9/scheduler_simulation.cpp
Normal file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "runtime/builtin_kernels_simulation/opencl_c.h"
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.inl"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/gen9/hw_cmds.h"
|
||||
#include "runtime/execution_model/device_enqueue.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
using namespace BuiltinKernelsSimulation;
|
||||
|
||||
namespace OCLRT {
|
||||
struct SKLFamily;
|
||||
}
|
||||
|
||||
namespace Gen9SchedulerSimulation {
|
||||
|
||||
#define SCHEDULER_EMULATION
|
||||
|
||||
float __intel__getProfilingTimerResolution() {
|
||||
return static_cast<float>(DEFAULT_GEN9_PLATFORM::hwInfo.capabilityTable.defaultProfilingTimerResolution);
|
||||
}
|
||||
|
||||
#include "runtime/gen9/device_enqueue.h"
|
||||
#include "runtime/gen9/scheduler_definitions.h"
|
||||
#include "runtime/gen9/scheduler_igdrcl_built_in.inl"
|
||||
#include "runtime/scheduler/scheduler.cl"
|
||||
}
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
template <>
|
||||
void SchedulerSimulation<SKLFamily>::startScheduler(uint32_t index,
|
||||
GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
|
||||
threadIDToLocalIDmap.insert(std::make_pair(std::this_thread::get_id(), index));
|
||||
|
||||
while (!conditionReady) {
|
||||
}
|
||||
|
||||
Gen9SchedulerSimulation::SchedulerParallel20((IGIL_CommandQueue *)queue->getUnderlyingBuffer(),
|
||||
(uint *)commandsStack->getUnderlyingBuffer(),
|
||||
(IGIL_EventPool *)eventsPool->getUnderlyingBuffer(),
|
||||
(uint *)secondaryBatchBuffer->getUnderlyingBuffer(),
|
||||
(char *)dsh->getUnderlyingBuffer(),
|
||||
(IGIL_KernelDataHeader *)reflectionSurface->getUnderlyingBuffer(),
|
||||
(uint *)queueStorageBuffer->getUnderlyingBuffer(),
|
||||
(char *)ssh->getUnderlyingBuffer(),
|
||||
debugQueue != nullptr ? (DebugDataBuffer *)debugQueue->getUnderlyingBuffer() : nullptr);
|
||||
}
|
||||
template <>
|
||||
void SchedulerSimulation<SKLFamily>::patchGpGpuWalker(uint secondLevelBatchOffset,
|
||||
__global uint *secondaryBatchBuffer,
|
||||
uint interfaceDescriptorOffset,
|
||||
uint simdSize,
|
||||
uint totalLocalWorkSize,
|
||||
uint3 dimSize,
|
||||
uint3 startPoint,
|
||||
uint numberOfHwThreadsPerWg,
|
||||
uint indirectPayloadSize,
|
||||
uint ioHoffset) {
|
||||
Gen9SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset,
|
||||
secondaryBatchBuffer,
|
||||
interfaceDescriptorOffset,
|
||||
simdSize,
|
||||
totalLocalWorkSize,
|
||||
dimSize,
|
||||
startPoint,
|
||||
numberOfHwThreadsPerWg,
|
||||
indirectPayloadSize,
|
||||
ioHoffset);
|
||||
}
|
||||
template class SchedulerSimulation<SKLFamily>;
|
||||
} // namespace BuiltinKernelsSimulation
|
||||
152
runtime/builtin_kernels_simulation/opencl_c.cpp
Normal file
152
runtime/builtin_kernels_simulation/opencl_c.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "CL/cl.h"
|
||||
#include "opencl_c.h"
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
#define SCHEDULER_EMULATION 1
|
||||
|
||||
// globals
|
||||
std::mutex gMutex;
|
||||
unsigned int globalID[3];
|
||||
unsigned int localID[3];
|
||||
unsigned int localSize[3];
|
||||
|
||||
std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;
|
||||
|
||||
SynchronizationBarrier *pGlobalBarrier = nullptr;
|
||||
|
||||
uint4 operator+(uint4 const &a, uint4 const &b) {
|
||||
uint4 c(0, 0, 0, 0);
|
||||
c.x = a.x + b.x;
|
||||
c.y = a.y + b.y;
|
||||
c.z = a.z + b.z;
|
||||
c.w = a.w + b.w;
|
||||
return c;
|
||||
}
|
||||
|
||||
int4 operator+(int4 const &a, int4 const &b) {
|
||||
int4 c(0, 0, 0, 0);
|
||||
c.x = a.x + b.x;
|
||||
c.y = a.y + b.y;
|
||||
c.z = a.z + b.z;
|
||||
c.w = a.w + b.w;
|
||||
return c;
|
||||
}
|
||||
|
||||
uint get_local_id(int dim) {
|
||||
uint LID = 0;
|
||||
|
||||
// use thread id
|
||||
if (threadIDToLocalIDmap.size() > 0) {
|
||||
std::thread::id id = std::this_thread::get_id();
|
||||
LID = threadIDToLocalIDmap[id] % 24;
|
||||
}
|
||||
// use id from loop iteration
|
||||
else {
|
||||
LID = localID[dim];
|
||||
}
|
||||
return LID;
|
||||
}
|
||||
|
||||
uint get_global_id(int dim) {
|
||||
uint GID = 0;
|
||||
|
||||
// use thread id
|
||||
if (threadIDToLocalIDmap.size() > 0) {
|
||||
std::thread::id id = std::this_thread::get_id();
|
||||
GID = threadIDToLocalIDmap[id];
|
||||
}
|
||||
// use id from loop iteration
|
||||
else {
|
||||
GID = globalID[dim];
|
||||
}
|
||||
return GID;
|
||||
}
|
||||
|
||||
uint get_local_size(int dim) {
|
||||
return localSize[dim];
|
||||
}
|
||||
|
||||
uint get_num_groups(int dim) {
|
||||
return NUM_OF_THREADS / 24;
|
||||
}
|
||||
|
||||
uint get_group_id(int dim) {
|
||||
return get_global_id(dim) / 24;
|
||||
}
|
||||
|
||||
void barrier(int x) {
|
||||
pGlobalBarrier->enter();
|
||||
|
||||
// int LID = get_local_id(0);
|
||||
volatile int BreakPointHere = 0;
|
||||
|
||||
// PUT BREAKPOINT HERE to stop after each barrier
|
||||
BreakPointHere++;
|
||||
}
|
||||
|
||||
uint4 read_imageui(image *im, int4 coord) {
|
||||
uint4 color = {0, 0, 0, 1};
|
||||
|
||||
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
|
||||
|
||||
char *temp = &im->ptr[offset];
|
||||
char *colorDst = (char *)&color;
|
||||
|
||||
for (uint i = 0; i < im->channels; i++) {
|
||||
memcpy_s(colorDst, sizeof(uint4), temp, im->bytesPerChannel);
|
||||
temp += im->bytesPerChannel;
|
||||
colorDst += 4;
|
||||
}
|
||||
return color;
|
||||
}
|
||||
|
||||
uint4 write_imageui(image *im, uint4 coord, uint4 color) {
|
||||
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
|
||||
|
||||
char *temp = &im->ptr[offset];
|
||||
char *colorSrc = (char *)&color;
|
||||
|
||||
size_t size = im->width * im->height * im->depth * im->bytesPerChannel * im->channels;
|
||||
|
||||
for (uint i = 0; i < im->channels; i++) {
|
||||
memcpy_s(temp, size - offset, colorSrc, im->bytesPerChannel);
|
||||
temp += im->bytesPerChannel;
|
||||
colorSrc += 4;
|
||||
}
|
||||
return *(uint4 *)temp;
|
||||
}
|
||||
|
||||
uchar convert_uchar_sat(uint c) {
|
||||
return (uchar)c;
|
||||
}
|
||||
|
||||
ushort convert_ushort_sat(uint c) {
|
||||
return (ushort)c;
|
||||
}
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
||||
304
runtime/builtin_kernels_simulation/opencl_c.h
Normal file
304
runtime/builtin_kernels_simulation/opencl_c.h
Normal file
@@ -0,0 +1,304 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <map>
|
||||
#include <thread>
|
||||
#include <string.h>
|
||||
#include <cstdint>
|
||||
|
||||
// OpenCL Types
|
||||
typedef uint32_t uint;
|
||||
typedef uint8_t uchar;
|
||||
typedef uint16_t ushort;
|
||||
typedef uint64_t ulong;
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
// number of threads in wkg
|
||||
#define NUM_OF_THREADS 24
|
||||
|
||||
#define CLK_GLOBAL_MEM_FENCE 1
|
||||
#define CLK_LOCAL_MEM_FENCE 2
|
||||
|
||||
class SynchronizationBarrier {
|
||||
public:
|
||||
SynchronizationBarrier(int count) : m_InitialCount(count) {
|
||||
m_Count = count;
|
||||
m_BarrierCounter = 0;
|
||||
}
|
||||
|
||||
~SynchronizationBarrier() {
|
||||
}
|
||||
|
||||
void enter() {
|
||||
std::unique_lock<std::mutex> lck(m_Mutex);
|
||||
|
||||
m_Count--;
|
||||
|
||||
unsigned int BarrierCount = m_BarrierCounter;
|
||||
|
||||
if (m_Count > 0) {
|
||||
while (BarrierCount == m_BarrierCounter) {
|
||||
m_AllHitBarrierCondition.wait(lck);
|
||||
}
|
||||
} else {
|
||||
m_Count = m_InitialCount;
|
||||
m_BarrierCounter++;
|
||||
m_AllHitBarrierCondition.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex m_Mutex;
|
||||
std::condition_variable m_AllHitBarrierCondition;
|
||||
int m_Count;
|
||||
const int m_InitialCount;
|
||||
unsigned int m_BarrierCounter;
|
||||
};
|
||||
|
||||
// globals
|
||||
extern std::mutex gMutex;
|
||||
extern unsigned int globalID[3];
|
||||
extern unsigned int localID[3];
|
||||
extern unsigned int localSize[3];
|
||||
extern std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;
|
||||
extern SynchronizationBarrier *pGlobalBarrier;
|
||||
|
||||
typedef struct taguint2 {
|
||||
taguint2(uint x, uint y) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
}
|
||||
taguint2() {
|
||||
this->x = 0;
|
||||
this->y = 0;
|
||||
}
|
||||
uint x;
|
||||
uint y;
|
||||
} uint2;
|
||||
|
||||
typedef struct taguint3 {
|
||||
taguint3(uint x, uint y, uint z) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
}
|
||||
taguint3() {
|
||||
this->x = 0;
|
||||
this->y = 0;
|
||||
this->z = 0;
|
||||
}
|
||||
uint x;
|
||||
uint y;
|
||||
uint z;
|
||||
} uint3;
|
||||
|
||||
typedef struct taguint4 {
|
||||
taguint4(uint x, uint y, uint z, uint w) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
this->w = w;
|
||||
}
|
||||
uint x;
|
||||
uint y;
|
||||
uint z;
|
||||
uint w;
|
||||
} uint4;
|
||||
|
||||
typedef struct tagint2 {
|
||||
tagint2(int x, int y) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
}
|
||||
int x;
|
||||
int y;
|
||||
} int2;
|
||||
|
||||
typedef struct tagint3 {
|
||||
tagint3(int x, int y, int z) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
}
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
} int3;
|
||||
|
||||
typedef struct tagint4 {
|
||||
tagint4(int x, int y, int z, int w) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
this->z = z;
|
||||
this->w = w;
|
||||
}
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
int w;
|
||||
} int4;
|
||||
|
||||
typedef struct tagushort2 {
|
||||
tagushort2(ushort x, ushort y) {
|
||||
this->x = x;
|
||||
this->y = y;
|
||||
}
|
||||
unsigned short x;
|
||||
unsigned short y;
|
||||
} ushort2;
|
||||
|
||||
typedef struct tagushort8 {
|
||||
unsigned short xxx[8];
|
||||
} ushort8;
|
||||
|
||||
typedef struct tagushort16 {
|
||||
unsigned short xxx[16];
|
||||
} ushort16;
|
||||
|
||||
uint4 operator+(uint4 const &a, uint4 const &b);
|
||||
int4 operator+(int4 const &a, int4 const &b);
|
||||
|
||||
typedef struct tagimage {
|
||||
char *ptr;
|
||||
uint width;
|
||||
uint height;
|
||||
uint depth;
|
||||
uint bytesPerChannel;
|
||||
uint channels;
|
||||
} image;
|
||||
|
||||
// images as pointer
|
||||
typedef image *image1d_t;
|
||||
typedef image *image2d_t;
|
||||
typedef image *image3d_t;
|
||||
|
||||
// OpenCL keywords
|
||||
#define __global
|
||||
#define __local
|
||||
#define __private
|
||||
#define __kernel
|
||||
#define __attribute__(...)
|
||||
#define __read_only
|
||||
#define __write_only
|
||||
#define queue_t void *
|
||||
|
||||
struct clk_event_t {
|
||||
clk_event_t() {
|
||||
value = 0;
|
||||
}
|
||||
clk_event_t(void *v) {
|
||||
value = static_cast<uint>(reinterpret_cast<uintptr_t>(v));
|
||||
}
|
||||
|
||||
explicit operator void *() const {
|
||||
return reinterpret_cast<void *>(static_cast<uintptr_t>(value));
|
||||
}
|
||||
|
||||
operator uint() {
|
||||
return (uint)value;
|
||||
}
|
||||
|
||||
void operator=(uint input) {
|
||||
value = input;
|
||||
}
|
||||
|
||||
uint value;
|
||||
};
|
||||
|
||||
// OpenCL builtins
|
||||
#define __builtin_astype(var, type) \
|
||||
( \
|
||||
(type)var)
|
||||
|
||||
#define select(a, b, c) (c ? b : a)
|
||||
|
||||
uint get_local_id(int dim);
|
||||
uint get_global_id(int dim);
|
||||
uint get_local_size(int dim);
|
||||
uint get_num_groups(int dim);
|
||||
uint get_group_id(int dim);
|
||||
void barrier(int x);
|
||||
uint4 read_imageui(image *im, int4 coord);
|
||||
uint4 write_imageui(image *im, uint4 coord, uint4 color);
|
||||
uchar convert_uchar_sat(uint c);
|
||||
ushort convert_ushort_sat(uint c);
|
||||
|
||||
#define EMULATION_ENTER_FUNCTION() \
|
||||
uint __LOCAL_ID__ = 0; \
|
||||
__LOCAL_ID__ = get_local_id(0);
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
void atomic_xchg(TYPE *dest, TYPE2 val) {
|
||||
gMutex.lock();
|
||||
dest[0] = (TYPE)val;
|
||||
gMutex.unlock();
|
||||
}
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
TYPE atomic_add(TYPE *first, TYPE2 second) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = (TYPE)(temp + (TYPE)second);
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
TYPE atomic_sub(TYPE *first, TYPE2 second) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = temp - second;
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE>
|
||||
TYPE atomic_inc(TYPE *first) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = temp + 1;
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE>
|
||||
TYPE atomic_dec(TYPE *first) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = temp - 1;
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
|
||||
template <class TYPE, class TYPE2>
|
||||
TYPE atomic_min(TYPE *first, TYPE2 second) {
|
||||
gMutex.lock();
|
||||
TYPE temp = first[0];
|
||||
first[0] = (TYPE)((TYPE)second < temp ? (TYPE)second : temp);
|
||||
gMutex.unlock();
|
||||
return temp;
|
||||
}
|
||||
}
|
||||
36
runtime/builtin_kernels_simulation/scheduler_simulation.cpp
Normal file
36
runtime/builtin_kernels_simulation/scheduler_simulation.cpp
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "runtime/builtin_kernels_simulation/opencl_c.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
using namespace std;
|
||||
using namespace OCLRT;
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
bool conditionReady = false;
|
||||
std::thread threads[NUM_OF_THREADS];
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
||||
92
runtime/builtin_kernels_simulation/scheduler_simulation.h
Normal file
92
runtime/builtin_kernels_simulation/scheduler_simulation.h
Normal file
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <thread>
|
||||
|
||||
#include "runtime/builtin_kernels_simulation/opencl_c.h"
|
||||
namespace OCLRT {
|
||||
class GraphicsAllocation;
|
||||
}
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
extern bool conditionReady;
|
||||
extern std::thread threads[];
|
||||
|
||||
template <typename GfxFamily>
|
||||
class SchedulerSimulation {
|
||||
public:
|
||||
void runSchedulerSimulation(OCLRT::GraphicsAllocation *queue,
|
||||
OCLRT::GraphicsAllocation *commandsStack,
|
||||
OCLRT::GraphicsAllocation *eventsPool,
|
||||
OCLRT::GraphicsAllocation *secondaryBatchBuffer,
|
||||
OCLRT::GraphicsAllocation *dsh,
|
||||
OCLRT::GraphicsAllocation *reflectionSurface,
|
||||
OCLRT::GraphicsAllocation *queueStorageBuffer,
|
||||
OCLRT::GraphicsAllocation *ssh,
|
||||
OCLRT::GraphicsAllocation *debugQueue);
|
||||
|
||||
void cleanSchedulerSimulation();
|
||||
|
||||
static void startScheduler(uint32_t index,
|
||||
OCLRT::GraphicsAllocation *queue,
|
||||
OCLRT::GraphicsAllocation *commandsStack,
|
||||
OCLRT::GraphicsAllocation *eventsPool,
|
||||
OCLRT::GraphicsAllocation *secondaryBatchBuffer,
|
||||
OCLRT::GraphicsAllocation *dsh,
|
||||
OCLRT::GraphicsAllocation *reflectionSurface,
|
||||
OCLRT::GraphicsAllocation *queueStorageBuffer,
|
||||
OCLRT::GraphicsAllocation *ssh,
|
||||
OCLRT::GraphicsAllocation *debugQueue);
|
||||
|
||||
void initializeSchedulerSimulation(OCLRT::GraphicsAllocation *queue,
|
||||
OCLRT::GraphicsAllocation *commandsStack,
|
||||
OCLRT::GraphicsAllocation *eventsPool,
|
||||
OCLRT::GraphicsAllocation *secondaryBatchBuffer,
|
||||
OCLRT::GraphicsAllocation *dsh,
|
||||
OCLRT::GraphicsAllocation *reflectionSurface,
|
||||
OCLRT::GraphicsAllocation *queueStorageBuffer,
|
||||
OCLRT::GraphicsAllocation *ssh,
|
||||
OCLRT::GraphicsAllocation *debugQueue);
|
||||
|
||||
static void patchGpGpuWalker(uint secondLevelBatchOffset,
|
||||
__global uint *secondaryBatchBuffer,
|
||||
uint interfaceDescriptorOffset,
|
||||
uint simdSize,
|
||||
uint totalLocalWorkSize,
|
||||
uint3 dimSize,
|
||||
uint3 startPoint,
|
||||
uint numberOfHwThreadsPerWg,
|
||||
uint indirectPayloadSize,
|
||||
uint ioHoffset);
|
||||
static bool enabled;
|
||||
static bool simulationRun;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool SchedulerSimulation<GfxFamily>::enabled = true;
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool SchedulerSimulation<GfxFamily>::simulationRun = false;
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
||||
112
runtime/builtin_kernels_simulation/scheduler_simulation.inl
Normal file
112
runtime/builtin_kernels_simulation/scheduler_simulation.inl
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
using namespace std;
|
||||
using namespace OCLRT;
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::cleanSchedulerSimulation() {
|
||||
threadIDToLocalIDmap.clear();
|
||||
delete pGlobalBarrier;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::initializeSchedulerSimulation(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
|
||||
localSize[0] = NUM_OF_THREADS;
|
||||
localSize[1] = 1;
|
||||
localSize[2] = 1;
|
||||
|
||||
threadIDToLocalIDmap.clear();
|
||||
pGlobalBarrier = new SynchronizationBarrier(NUM_OF_THREADS);
|
||||
|
||||
// Spawn Thread ID == 0 on main thread
|
||||
for (uint32_t i = 1; i < NUM_OF_THREADS; i++) {
|
||||
threads[i] = std::thread(startScheduler, i, queue, commandsStack, eventsPool, secondaryBatchBuffer, dsh, reflectionSurface, queueStorageBuffer, ssh, debugQueue);
|
||||
}
|
||||
|
||||
conditionReady = true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::runSchedulerSimulation(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
simulationRun = true;
|
||||
if (enabled) {
|
||||
initializeSchedulerSimulation(queue,
|
||||
commandsStack,
|
||||
eventsPool,
|
||||
secondaryBatchBuffer,
|
||||
dsh,
|
||||
reflectionSurface,
|
||||
queueStorageBuffer,
|
||||
ssh,
|
||||
debugQueue);
|
||||
|
||||
// start main thread with LID == 0
|
||||
startScheduler(0,
|
||||
queue,
|
||||
commandsStack,
|
||||
eventsPool,
|
||||
secondaryBatchBuffer,
|
||||
dsh,
|
||||
reflectionSurface,
|
||||
queueStorageBuffer,
|
||||
ssh,
|
||||
debugQueue);
|
||||
|
||||
// Wait for all threads on main thread
|
||||
if (threadIDToLocalIDmap[std::this_thread::get_id()] == 0) {
|
||||
|
||||
for (uint32_t i = 1; i < NUM_OF_THREADS; i++)
|
||||
threads[i].join();
|
||||
|
||||
cleanSchedulerSimulation();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
||||
456
runtime/command_queue/command_queue.cpp
Normal file
456
runtime/command_queue/command_queue.cpp
Normal file
@@ -0,0 +1,456 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/context/context.h"
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/device_queue/device_queue.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/array_count.h"
|
||||
#include "runtime/helpers/get_info.h"
|
||||
#include "hw_info.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/helpers/ptr_math.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/helpers/surface_formats.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "CL/cl_ext.h"
|
||||
#include "runtime/utilities/api_intercept.h"
|
||||
#include "runtime/helpers/convert_color.h"
|
||||
#include "runtime/helpers/queue_helpers.h"
|
||||
#include <map>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
// Global table of create functions
|
||||
CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE] = {};
|
||||
|
||||
CommandQueue *CommandQueue::create(Context *context,
|
||||
Device *device,
|
||||
const cl_queue_properties *properties,
|
||||
cl_int &retVal) {
|
||||
retVal = CL_SUCCESS;
|
||||
|
||||
auto funcCreate = commandQueueFactory[device->getRenderCoreFamily()];
|
||||
DEBUG_BREAK_IF(nullptr == funcCreate);
|
||||
|
||||
return funcCreate(context, device, properties);
|
||||
}
|
||||
|
||||
CommandQueue::CommandQueue() : CommandQueue(nullptr, nullptr, 0) {
|
||||
}
|
||||
|
||||
CommandQueue::CommandQueue(Context *context,
|
||||
Device *deviceId,
|
||||
const cl_queue_properties *properties) : low_priority(false),
|
||||
taskCount(0),
|
||||
taskLevel(0),
|
||||
virtualEvent(nullptr),
|
||||
context(context),
|
||||
device(deviceId),
|
||||
perfCountersEnabled(false),
|
||||
perfCountersConfig(UINT32_MAX),
|
||||
perfCountersUserRegistersNumber(0),
|
||||
perfConfigurationData(nullptr),
|
||||
perfCountersRegsCfgHandle(0),
|
||||
perfCountersRegsCfgPending(false),
|
||||
commandStream(nullptr) {
|
||||
if (context) {
|
||||
context->incRefInternal();
|
||||
}
|
||||
for (int i = 0; i < NUM_HEAPS; ++i) {
|
||||
indirectHeap[i] = nullptr;
|
||||
}
|
||||
commandQueueProperties = getCmdQueueProperties<cl_command_queue_properties>(properties);
|
||||
flushStamp.reset(new FlushStampTracker(true));
|
||||
}
|
||||
|
||||
CommandQueue::~CommandQueue() {
|
||||
if (virtualEvent) {
|
||||
UNRECOVERABLE_IF(this->virtualEvent->getCommandQueue() != this && this->virtualEvent->getCommandQueue() != nullptr);
|
||||
virtualEvent->setCurrentCmdQVirtualEvent(false);
|
||||
virtualEvent->decRefInternal();
|
||||
}
|
||||
|
||||
if (device) {
|
||||
auto memoryManager = device->getMemoryManager();
|
||||
DEBUG_BREAK_IF(nullptr == memoryManager);
|
||||
|
||||
if (commandStream && commandStream->getGraphicsAllocation()) {
|
||||
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(commandStream->getGraphicsAllocation()), REUSABLE_ALLOCATION);
|
||||
commandStream->replaceGraphicsAllocation(nullptr);
|
||||
}
|
||||
delete commandStream;
|
||||
|
||||
for (int i = 0; i < NUM_HEAPS; ++i) {
|
||||
if (indirectHeap[i] != nullptr) {
|
||||
auto allocation = indirectHeap[i]->getGraphicsAllocation();
|
||||
if (allocation != nullptr) {
|
||||
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(allocation), REUSABLE_ALLOCATION);
|
||||
}
|
||||
delete indirectHeap[i];
|
||||
}
|
||||
}
|
||||
if (perfConfigurationData) {
|
||||
delete perfConfigurationData;
|
||||
}
|
||||
if (this->perfCountersEnabled) {
|
||||
device->getPerformanceCounters()->shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
if (context && !context->isSpecialQueue(this)) {
|
||||
context->decRefInternal();
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t CommandQueue::getHwTag() const {
|
||||
uint32_t tag = *getHwTagAddress();
|
||||
return tag;
|
||||
}
|
||||
|
||||
volatile uint32_t *CommandQueue::getHwTagAddress() const {
|
||||
DEBUG_BREAK_IF(!this->device);
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
auto tag_address = commandStreamReceiver.getTagAddress();
|
||||
commandStreamReceiver.makeCoherent((void *)tag_address, sizeof(tag_address));
|
||||
return tag_address;
|
||||
}
|
||||
|
||||
bool CommandQueue::isCompleted(uint32_t taskCount) const {
|
||||
uint32_t tag = getHwTag();
|
||||
DEBUG_BREAK_IF(tag == Event::eventNotReady);
|
||||
return tag >= taskCount;
|
||||
}
|
||||
|
||||
void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait) {
|
||||
WAIT_ENTER()
|
||||
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", taskCountToWait);
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag());
|
||||
|
||||
device->getCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait);
|
||||
|
||||
DEBUG_BREAK_IF(getHwTag() < taskCountToWait);
|
||||
latestTaskCountWaited = taskCountToWait;
|
||||
WAIT_LEAVE()
|
||||
}
|
||||
|
||||
bool CommandQueue::isQueueBlocked() {
|
||||
TakeOwnershipWrapper<CommandQueue> takeOwnershipWrapper(*this);
|
||||
//check if we have user event and if so, if it is in blocked state.
|
||||
if (this->virtualEvent) {
|
||||
if (this->virtualEvent->peekIsCompleted()) {
|
||||
UNRECOVERABLE_IF(this->virtualEvent == nullptr);
|
||||
|
||||
if (this->virtualEvent->peekIsCompletedByTermination() == false) {
|
||||
taskCount = this->virtualEvent->peekTaskCount();
|
||||
flushStamp->setStamp(this->virtualEvent->flushStamp->peekStamp());
|
||||
taskLevel = this->virtualEvent->taskLevel;
|
||||
// If this isn't an OOQ, update the taskLevel for the queue
|
||||
if (!isOOQEnabled()) {
|
||||
taskLevel++;
|
||||
}
|
||||
} else {
|
||||
//at this point we may reset queue TaskCount, since all command previous to this were aborted
|
||||
taskCount = 0;
|
||||
flushStamp->setStamp(0);
|
||||
taskLevel = getDevice().getCommandStreamReceiver().peekTaskLevel();
|
||||
}
|
||||
|
||||
DebugManager.log(DebugManager.flags.EventsDebugEnable.get(), "isQueueBlocked taskLevel change from", taskLevel, "to new from virtualEvent", this->virtualEvent, "new tasklevel", this->virtualEvent->taskLevel.load());
|
||||
|
||||
//close the access to virtual event, driver added only 1 ref count.
|
||||
this->virtualEvent->decRefInternal();
|
||||
this->virtualEvent = nullptr;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_int CommandQueue::getCommandQueueInfo(cl_command_queue_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet) {
|
||||
return getQueueInfo<CommandQueue>(this, paramName, paramValueSize, paramValue, paramValueSizeRet);
|
||||
}
|
||||
|
||||
uint32_t CommandQueue::getTaskLevelFromWaitList(uint32_t taskLevel,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList) {
|
||||
for (auto iEvent = 0u; iEvent < numEventsInWaitList; ++iEvent) {
|
||||
auto pEvent = (Event *)(eventWaitList[iEvent]);
|
||||
uint32_t eventTaskLevel = pEvent->taskLevel;
|
||||
taskLevel = std::max(taskLevel, eventTaskLevel);
|
||||
}
|
||||
return taskLevel;
|
||||
}
|
||||
|
||||
IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType,
|
||||
size_t minRequiredSize) {
|
||||
DEBUG_BREAK_IF(static_cast<uint32_t>(heapType) >= ARRAY_COUNT(indirectHeap));
|
||||
auto &heap = indirectHeap[heapType];
|
||||
GraphicsAllocation *heapMemory = nullptr;
|
||||
|
||||
DEBUG_BREAK_IF(nullptr == device);
|
||||
auto memoryManager = device->getMemoryManager();
|
||||
DEBUG_BREAK_IF(nullptr == memoryManager);
|
||||
|
||||
if (heap)
|
||||
heapMemory = heap->getGraphicsAllocation();
|
||||
|
||||
if (heap && heap->getAvailableSpace() < minRequiredSize && heapMemory) {
|
||||
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(heapMemory), REUSABLE_ALLOCATION);
|
||||
heapMemory = nullptr;
|
||||
}
|
||||
|
||||
if (!heapMemory) {
|
||||
// Heap should be at least minHeapSize unless we're requesting an empty heap
|
||||
size_t minHeapSize = 64 * KB;
|
||||
if (IndirectHeap::SURFACE_STATE == heapType) {
|
||||
minHeapSize -= MemoryConstants::pageSize;
|
||||
}
|
||||
|
||||
minRequiredSize = minRequiredSize ? std::max(minRequiredSize, minHeapSize) : 0;
|
||||
minRequiredSize = minRequiredSize > 0 ? alignUp(minRequiredSize, MemoryConstants::cacheLineSize) : 0;
|
||||
|
||||
const size_t heapAlignment = MemoryConstants::pageSize;
|
||||
heapMemory = memoryManager->obtainReusableAllocation(minRequiredSize).release();
|
||||
|
||||
if (!heapMemory) {
|
||||
heapMemory = memoryManager->allocateGraphicsMemory(minRequiredSize, heapAlignment);
|
||||
}
|
||||
|
||||
if (heap) {
|
||||
heap->replaceBuffer(heapMemory->getUnderlyingBuffer(), minRequiredSize);
|
||||
heap->replaceGraphicsAllocation(heapMemory);
|
||||
} else {
|
||||
heap = new IndirectHeap(heapMemory);
|
||||
heap->overrideMaxSize(minRequiredSize);
|
||||
}
|
||||
}
|
||||
|
||||
return *heap;
|
||||
}
|
||||
|
||||
void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) {
|
||||
DEBUG_BREAK_IF(static_cast<uint32_t>(heapType) >= ARRAY_COUNT(indirectHeap));
|
||||
auto &heap = indirectHeap[heapType];
|
||||
|
||||
DEBUG_BREAK_IF(nullptr == device);
|
||||
auto memoryManager = device->getMemoryManager();
|
||||
DEBUG_BREAK_IF(nullptr == memoryManager);
|
||||
|
||||
if (heap) {
|
||||
auto heapMemory = heap->getGraphicsAllocation();
|
||||
if (heapMemory != nullptr)
|
||||
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(heapMemory), REUSABLE_ALLOCATION);
|
||||
heap->replaceBuffer(nullptr, 0);
|
||||
heap->replaceGraphicsAllocation(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
LinearStream &CommandQueue::getCS(size_t minRequiredSize) {
|
||||
DEBUG_BREAK_IF(nullptr == device);
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
auto memoryManager = commandStreamReceiver.getMemoryManager();
|
||||
DEBUG_BREAK_IF(nullptr == memoryManager);
|
||||
|
||||
if (!commandStream) {
|
||||
commandStream = new LinearStream(nullptr);
|
||||
}
|
||||
|
||||
// Make sure we have enough room for any CSR additions
|
||||
minRequiredSize += CSRequirements::minCommandQueueCommandStreamSize;
|
||||
|
||||
if (commandStream->getAvailableSpace() < minRequiredSize) {
|
||||
// If not, allocate a new block. allocate full pages
|
||||
minRequiredSize = alignUp(minRequiredSize, MemoryConstants::pageSize);
|
||||
|
||||
auto requiredSize = minRequiredSize + CSRequirements::csOverfetchSize;
|
||||
|
||||
GraphicsAllocation *allocation = memoryManager->obtainReusableAllocation(requiredSize).release();
|
||||
|
||||
if (!allocation) {
|
||||
allocation = memoryManager->allocateGraphicsMemory(requiredSize, MemoryConstants::pageSize);
|
||||
}
|
||||
|
||||
// Deallocate the old block, if not null
|
||||
auto oldAllocation = commandStream->getGraphicsAllocation();
|
||||
|
||||
if (oldAllocation) {
|
||||
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(oldAllocation), REUSABLE_ALLOCATION);
|
||||
}
|
||||
commandStream->replaceBuffer(allocation->getUnderlyingBuffer(), minRequiredSize - CSRequirements::minCommandQueueCommandStreamSize);
|
||||
commandStream->replaceGraphicsAllocation(allocation);
|
||||
}
|
||||
|
||||
return *commandStream;
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueAcquireSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType) {
|
||||
|
||||
for (unsigned int object = 0; object < numObjects; object++) {
|
||||
auto memObject = castToObjectOrAbort<MemObj>(memObjects[object]);
|
||||
memObject->peekSharingHandler()->acquire(memObject);
|
||||
memObject->acquireCount++;
|
||||
}
|
||||
auto status = enqueueMarkerWithWaitList(
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
oclEvent);
|
||||
|
||||
if (oclEvent) {
|
||||
castToObjectOrAbort<Event>(*oclEvent)->setCmdType(cmdType);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueReleaseSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType) {
|
||||
for (unsigned int object = 0; object < numObjects; object++) {
|
||||
auto memObject = castToObjectOrAbort<MemObj>(memObjects[object]);
|
||||
memObject->peekSharingHandler()->release(memObject);
|
||||
DEBUG_BREAK_IF(memObject->acquireCount <= 0);
|
||||
memObject->acquireCount--;
|
||||
}
|
||||
auto status = enqueueMarkerWithWaitList(
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
oclEvent);
|
||||
|
||||
if (oclEvent) {
|
||||
castToObjectOrAbort<Event>(*oclEvent)->setCmdType(cmdType);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
void CommandQueue::updateFromCompletionStamp(const CompletionStamp &completionStamp) {
|
||||
taskCount = completionStamp.taskCount;
|
||||
flushStamp->setStamp(completionStamp.flushStamp);
|
||||
this->taskLevel = completionStamp.taskLevel;
|
||||
}
|
||||
|
||||
void CommandQueue::flushWaitList(
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
bool ndRangeKernel) {
|
||||
|
||||
bool isQBlocked = false;
|
||||
|
||||
//as long as queue is blocked we need to stall.
|
||||
if (!isOOQEnabled()) {
|
||||
while ((isQBlocked = isQueueBlocked()))
|
||||
;
|
||||
}
|
||||
|
||||
TakeOwnershipWrapper<Device> deviceOwnership(*device);
|
||||
device->getCommandStreamReceiver().flushBatchedSubmissions();
|
||||
|
||||
if (!isQBlocked) {
|
||||
auto taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList);
|
||||
auto &commandStream = getCS();
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
|
||||
bool flushTask = false;
|
||||
|
||||
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {
|
||||
Event *event = (Event *)eventWaitList[eventId];
|
||||
if (event->peekTaskCount() > commandStreamReceiver.peekLatestSentTaskCount()) {
|
||||
flushTask = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (flushTask) {
|
||||
DispatchFlags dispatchFlags;
|
||||
dispatchFlags.GSBA32BitRequired = ndRangeKernel;
|
||||
dispatchFlags.low_priority = low_priority;
|
||||
dispatchFlags.implicitFlush = true;
|
||||
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(*device, nullptr);
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
commandStreamReceiver.flushTask(
|
||||
commandStream,
|
||||
commandStream.getUsed(),
|
||||
getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0),
|
||||
getIndirectHeap(IndirectHeap::INSTRUCTION, 0),
|
||||
getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0),
|
||||
getIndirectHeap(IndirectHeap::SURFACE_STATE, 0),
|
||||
taskLevel + 1,
|
||||
dispatchFlags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandQueue::setPerfCountersEnabled(bool perfCountersEnabled, cl_uint configuration) {
|
||||
DEBUG_BREAK_IF(device == nullptr);
|
||||
if (perfCountersEnabled == this->perfCountersEnabled) {
|
||||
return true;
|
||||
}
|
||||
auto perfCounters = device->getPerformanceCounters();
|
||||
if (perfCountersEnabled) {
|
||||
perfCounters->enable();
|
||||
if (!perfCounters->isAvailable()) {
|
||||
perfCounters->shutdown();
|
||||
return false;
|
||||
}
|
||||
perfConfigurationData = perfCounters->getPmRegsCfg(configuration);
|
||||
if (perfConfigurationData == nullptr) {
|
||||
perfCounters->shutdown();
|
||||
return false;
|
||||
}
|
||||
InstrReadRegsCfg *pUserCounters = &perfConfigurationData->readRegs;
|
||||
for (uint32_t i = 0; i < pUserCounters->regsCount; ++i) {
|
||||
perfCountersUserRegistersNumber++;
|
||||
if (pUserCounters->reg[i].bitSize > 32) {
|
||||
perfCountersUserRegistersNumber++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (perfCounters->isAvailable()) {
|
||||
perfCounters->shutdown();
|
||||
}
|
||||
}
|
||||
this->perfCountersConfig = configuration;
|
||||
this->perfCountersEnabled = perfCountersEnabled;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PerformanceCounters *CommandQueue::getPerfCounters() {
|
||||
return device->getPerformanceCounters();
|
||||
}
|
||||
|
||||
bool CommandQueue::sendPerfCountersConfig() {
|
||||
return getPerfCounters()->sendPmRegsCfgCommands(perfConfigurationData, &perfCountersRegsCfgHandle, &perfCountersRegsCfgPending);
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
422
runtime/command_queue/command_queue.h
Normal file
422
runtime/command_queue/command_queue.h
Normal file
@@ -0,0 +1,422 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/api/cl_types.h"
|
||||
#include "runtime/indirect_heap/indirect_heap.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
#include "runtime/helpers/completion_stamp.h"
|
||||
#include "runtime/helpers/flush_stamp.h"
|
||||
#include "runtime/event/user_event.h"
|
||||
#include "runtime/os_interface/performance_counters.h"
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
|
||||
namespace OCLRT {
|
||||
class Buffer;
|
||||
class LinearStream;
|
||||
class Context;
|
||||
class Device;
|
||||
class Image;
|
||||
class IndirectHeap;
|
||||
class Kernel;
|
||||
class MemObj;
|
||||
|
||||
template <>
|
||||
struct OpenCLObjectMapper<_cl_command_queue> {
|
||||
typedef class CommandQueue DerivedType;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// CommandQueue - Core implementation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
public:
|
||||
static const cl_ulong objectMagic = 0x1234567890987654LL;
|
||||
enum { NUM_HEAPS = IndirectHeap::NUM_TYPES };
|
||||
|
||||
bool low_priority;
|
||||
|
||||
static CommandQueue *create(Context *context, Device *device,
|
||||
const cl_queue_properties *properties,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
CommandQueue();
|
||||
|
||||
CommandQueue(Context *context, Device *device,
|
||||
const cl_queue_properties *properties);
|
||||
|
||||
CommandQueue &operator=(const CommandQueue &) = delete;
|
||||
CommandQueue(const CommandQueue &) = delete;
|
||||
|
||||
~CommandQueue() override;
|
||||
|
||||
// API entry points
|
||||
virtual cl_int
|
||||
enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t srcOrigin[3],
|
||||
const size_t dstOrigin[3], const size_t region[3],
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueFillImage(Image *image, const void *fillColor,
|
||||
const size_t *origin, const size_t *region,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueFillBuffer(Buffer *buffer, const void *pattern,
|
||||
size_t patternSize, size_t offset,
|
||||
size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueKernel(cl_kernel kernel, cl_uint workDim,
|
||||
const size_t *globalWorkOffset,
|
||||
const size_t *globalWorkSize,
|
||||
const size_t *localWorkSize,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap,
|
||||
cl_map_flags mapFlags, size_t offset,
|
||||
size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event,
|
||||
cl_int &errcodeRet) {
|
||||
errcodeRet = CL_SUCCESS;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual void *enqueueMapImage(cl_mem image, cl_bool blockingMap,
|
||||
cl_map_flags mapFlags, const size_t *origin,
|
||||
const size_t *region, size_t *imageRowPitch,
|
||||
size_t *imageSlicePitch,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event,
|
||||
cl_int &errcodeRet) {
|
||||
errcodeRet = CL_SUCCESS;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags,
|
||||
void *svmPtr, size_t size,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueSVMUnmap(void *svmPtr,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueSVMFree(cl_uint numSvmPointers,
|
||||
void *svmPointers[],
|
||||
void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue,
|
||||
cl_uint numSvmPointers,
|
||||
void *svmPointers[],
|
||||
void *userData),
|
||||
void *userData,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
void *dstPtr,
|
||||
const void *srcPtr,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueSVMMemFill(void *svmPtr,
|
||||
const void *pattern,
|
||||
size_t patternSize,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueMigrateMemObjects(cl_uint numMemObjects,
|
||||
const cl_mem *memObjects,
|
||||
cl_mem_migration_flags flags,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueSVMMigrateMem(cl_uint numSvmPointers,
|
||||
const void **svmPointers,
|
||||
const size_t *sizes,
|
||||
const cl_mem_migration_flags flags,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueCopyBuffer(Buffer *srcBuffer, Buffer *dstBuffer,
|
||||
size_t srcOffset, size_t dstOffset,
|
||||
size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead,
|
||||
size_t offset, size_t size, void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead,
|
||||
const size_t *origin, const size_t *region,
|
||||
size_t rowPitch, size_t slicePitch, void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite,
|
||||
size_t offset, size_t cb, const void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite,
|
||||
const size_t *origin, const size_t *region,
|
||||
size_t inputRowPitch, size_t inputSlicePitch,
|
||||
const void *ptr, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int
|
||||
enqueueCopyBufferRect(Buffer *srcBuffer, Buffer *dstBuffer,
|
||||
const size_t *srcOrigin, const size_t *dstOrigin,
|
||||
const size_t *region, size_t srcRowPitch,
|
||||
size_t srcSlicePitch, size_t dstRowPitch,
|
||||
size_t dstSlicePitch, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueWriteBufferRect(
|
||||
Buffer *buffer, cl_bool blockingWrite, const size_t *bufferOrigin,
|
||||
const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch,
|
||||
size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch,
|
||||
const void *ptr, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueReadBufferRect(
|
||||
Buffer *buffer, cl_bool blockingRead, const size_t *bufferOrigin,
|
||||
const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch,
|
||||
size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch,
|
||||
void *ptr, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int
|
||||
enqueueCopyBufferToImage(Buffer *srcBuffer, Image *dstImage, size_t srcOffset,
|
||||
const size_t *dstOrigin, const size_t *region,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int
|
||||
enqueueCopyImageToBuffer(Image *srcImage, Buffer *dstBuffer,
|
||||
const size_t *srcOrigin, const size_t *region,
|
||||
size_t dstOffset, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int enqueueAcquireSharedObjects(cl_uint numObjects,
|
||||
const cl_mem *memObjects,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *oclEvent,
|
||||
cl_uint cmdType);
|
||||
|
||||
cl_int enqueueReleaseSharedObjects(cl_uint numObjects,
|
||||
const cl_mem *memObjects,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *oclEvent,
|
||||
cl_uint cmdType);
|
||||
|
||||
virtual cl_int finish(bool dcFlush) { return CL_SUCCESS; }
|
||||
|
||||
virtual cl_int flush() { return CL_SUCCESS; }
|
||||
|
||||
void updateFromCompletionStamp(const CompletionStamp &completionStamp);
|
||||
|
||||
cl_int getCommandQueueInfo(cl_command_queue_info paramName,
|
||||
size_t paramValueSize, void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
uint32_t getHwTag() const;
|
||||
|
||||
volatile uint32_t *getHwTagAddress() const;
|
||||
|
||||
bool isCompleted(uint32_t taskCount) const;
|
||||
|
||||
MOCKABLE_VIRTUAL bool isQueueBlocked();
|
||||
|
||||
void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait);
|
||||
|
||||
void flushWaitList(cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
bool ndRangeKernel);
|
||||
|
||||
static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList);
|
||||
|
||||
Device &getDevice() { return *device; }
|
||||
Context &getContext() { return *context; }
|
||||
Context *getContextPtr() { return context; }
|
||||
|
||||
LinearStream &getCS(size_t minRequiredSize = 1024u);
|
||||
IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType,
|
||||
size_t minRequiredSize = 0u);
|
||||
|
||||
MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType);
|
||||
|
||||
cl_command_queue_properties getCommandQueueProperties() const {
|
||||
return commandQueueProperties;
|
||||
}
|
||||
|
||||
bool isProfilingEnabled() {
|
||||
return !!(this->getCommandQueueProperties() & CL_QUEUE_PROFILING_ENABLE);
|
||||
}
|
||||
|
||||
bool isOOQEnabled() {
|
||||
return !!(this->getCommandQueueProperties() & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
|
||||
}
|
||||
|
||||
bool isPerfCountersEnabled() {
|
||||
return perfCountersEnabled;
|
||||
}
|
||||
|
||||
InstrPmRegsCfg *getPerfCountersConfigData() {
|
||||
return perfConfigurationData;
|
||||
}
|
||||
|
||||
PerformanceCounters *getPerfCounters();
|
||||
|
||||
bool sendPerfCountersConfig();
|
||||
|
||||
bool setPerfCountersEnabled(bool perfCountersEnabled, cl_uint configuration);
|
||||
|
||||
uint16_t getPerfCountersUserRegistersNumber() {
|
||||
return perfCountersUserRegistersNumber;
|
||||
}
|
||||
|
||||
// taskCount of last task
|
||||
uint32_t taskCount;
|
||||
|
||||
// current taskLevel. Used for determining if a PIPE_CONTROL is needed.
|
||||
uint32_t taskLevel;
|
||||
|
||||
std::unique_ptr<FlushStampTracker> flushStamp;
|
||||
|
||||
std::atomic<uint32_t> latestTaskCountWaited{(uint32_t)-1};
|
||||
|
||||
// virtual event that holds last Enqueue information
|
||||
Event *virtualEvent;
|
||||
|
||||
protected:
|
||||
Context *context;
|
||||
Device *device;
|
||||
|
||||
cl_command_queue_properties commandQueueProperties;
|
||||
|
||||
bool perfCountersEnabled;
|
||||
cl_uint perfCountersConfig;
|
||||
uint32_t perfCountersUserRegistersNumber;
|
||||
InstrPmRegsCfg *perfConfigurationData;
|
||||
uint32_t perfCountersRegsCfgHandle;
|
||||
bool perfCountersRegsCfgPending;
|
||||
|
||||
LinearStream *commandStream;
|
||||
IndirectHeap *indirectHeap[NUM_HEAPS];
|
||||
|
||||
bool mapDcFlushRequired = false;
|
||||
};
|
||||
|
||||
typedef CommandQueue *(*CommandQueueCreateFunc)(
|
||||
Context *context, Device *device, const cl_queue_properties *properties);
|
||||
|
||||
template <typename GfxFamily, unsigned int eventType>
|
||||
LinearStream &getCommandStream(CommandQueue &commandQueue,
|
||||
bool reserveProfilingCmdsSpace,
|
||||
bool reservePerfCounterCmdsSpace,
|
||||
const Kernel *pKernel);
|
||||
|
||||
template <typename GfxFamily, IndirectHeap::Type heapType>
|
||||
IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const Kernel &kernel);
|
||||
} // namespace OCLRT
|
||||
391
runtime/command_queue/command_queue_hw.h
Normal file
391
runtime/command_queue/command_queue_hw.h
Normal file
@@ -0,0 +1,391 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
#include "runtime/mem_obj/mem_obj.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/program/printf_handler.h"
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
#include "runtime/command_stream/preemption.h"
|
||||
#include "runtime/helpers/queue_helpers.h"
|
||||
#include <memory>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
class EventBuilder;
|
||||
|
||||
template <typename GfxFamily>
|
||||
class CommandQueueHw : public CommandQueue {
|
||||
typedef CommandQueue BaseClass;
|
||||
|
||||
public:
|
||||
CommandQueueHw(Context *context,
|
||||
Device *device,
|
||||
const cl_queue_properties *properties) : BaseClass(context, device, properties) {
|
||||
if (getCmdQueueProperties<cl_queue_priority_khr>(properties, CL_QUEUE_PRIORITY_KHR) & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) {
|
||||
low_priority = true;
|
||||
}
|
||||
if (getCmdQueueProperties<cl_queue_properties>(properties, CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
|
||||
device->getCommandStreamReceiver().overrideDispatchPolicy(CommandStreamReceiver::BatchedDispatch);
|
||||
}
|
||||
}
|
||||
|
||||
static CommandQueue *create(Context *context,
|
||||
Device *device,
|
||||
const cl_queue_properties *properties) {
|
||||
return new CommandQueueHw<GfxFamily>(context, device, properties);
|
||||
}
|
||||
|
||||
cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueCopyBuffer(Buffer *srcBuffer,
|
||||
Buffer *dstBuffer,
|
||||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueCopyBufferRect(Buffer *srcBuffer,
|
||||
Buffer *dstBuffer,
|
||||
const size_t *srcOrigin,
|
||||
const size_t *dstOrigin,
|
||||
const size_t *region,
|
||||
size_t srcRowPitch,
|
||||
size_t srcSlicePitch,
|
||||
size_t dstRowPitch,
|
||||
size_t dstSlicePitch,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueCopyImage(Image *srcImage,
|
||||
Image *dstImage,
|
||||
const size_t srcOrigin[3],
|
||||
const size_t dstOrigin[3],
|
||||
const size_t region[3],
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueFillBuffer(Buffer *buffer,
|
||||
const void *pattern,
|
||||
size_t patternSize,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueFillImage(Image *image,
|
||||
const void *fillColor,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueKernel(cl_kernel kernel,
|
||||
cl_uint workDim,
|
||||
const size_t *globalWorkOffset,
|
||||
const size_t *globalWorkSize,
|
||||
const size_t *localWorkSize,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags,
|
||||
size_t offset, size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) override;
|
||||
|
||||
void *enqueueMapSharedBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags,
|
||||
size_t offset, size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet);
|
||||
|
||||
void *enqueueMapImage(cl_mem image,
|
||||
cl_bool blockingMap,
|
||||
cl_map_flags mapFlags,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
size_t *imageRowPitch,
|
||||
size_t *imageSlicePitch,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event,
|
||||
cl_int &errcodeRet) override;
|
||||
|
||||
cl_int enqueueSVMMap(cl_bool blockingMap,
|
||||
cl_map_flags mapFlags,
|
||||
void *svmPtr,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueSVMUnmap(void *svmPtr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueSVMFree(cl_uint numSvmPointers,
|
||||
void *svmPointers[],
|
||||
void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue,
|
||||
cl_uint numSvmPointers,
|
||||
void *svmPointers[],
|
||||
void *userData),
|
||||
void *userData,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
void *dstPtr,
|
||||
const void *srcPtr,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueSVMMemFill(void *svmPtr,
|
||||
const void *pattern,
|
||||
size_t patternSize,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueMigrateMemObjects(cl_uint numMemObjects,
|
||||
const cl_mem *memObjects,
|
||||
cl_mem_migration_flags flags,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueSVMMigrateMem(cl_uint numSvmPointers,
|
||||
const void **svmPointers,
|
||||
const size_t *sizes,
|
||||
const cl_mem_migration_flags flags,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueReadBuffer(Buffer *buffer,
|
||||
cl_bool blockingRead,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueReadBufferRect(Buffer *buffer,
|
||||
cl_bool blockingRead,
|
||||
const size_t *bufferOrigin,
|
||||
const size_t *hostOrigin,
|
||||
const size_t *region,
|
||||
size_t bufferRowPitch,
|
||||
size_t bufferSlicePitch,
|
||||
size_t hostRowPitch,
|
||||
size_t hostSlicePitch,
|
||||
void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueReadImage(Image *srcImage,
|
||||
cl_bool blockingRead,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
size_t rowPitch,
|
||||
size_t slicePitch,
|
||||
void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueUnmapMemObject(MemObj *memObj,
|
||||
void *mappedPtr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override {
|
||||
cl_int retVal;
|
||||
if (memObj->allowTiling() || memObj->peekSharingHandler()) {
|
||||
retVal = memObj->unmapObj(this, mappedPtr, numEventsInWaitList, eventWaitList, event);
|
||||
} else {
|
||||
cpuDataTransferHandler(memObj,
|
||||
CL_COMMAND_UNMAP_MEM_OBJECT,
|
||||
CL_FALSE,
|
||||
0,
|
||||
0,
|
||||
mappedPtr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event,
|
||||
retVal);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int enqueueWriteBuffer(Buffer *buffer,
|
||||
cl_bool blockingWrite,
|
||||
size_t offset,
|
||||
size_t cb,
|
||||
const void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueWriteBufferRect(Buffer *buffer,
|
||||
cl_bool blockingWrite,
|
||||
const size_t *bufferOrigin,
|
||||
const size_t *hostOrigin,
|
||||
const size_t *region,
|
||||
size_t bufferRowPitch,
|
||||
size_t bufferSlicePitch,
|
||||
size_t hostRowPitch,
|
||||
size_t hostSlicePitch,
|
||||
const void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueWriteImage(Image *dstImage,
|
||||
cl_bool blockingWrite,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
size_t inputRowPitch,
|
||||
size_t inputSlicePitch,
|
||||
const void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueCopyBufferToImage(Buffer *srcBuffer,
|
||||
Image *dstImage,
|
||||
size_t srcOffset,
|
||||
const size_t *dstOrigin,
|
||||
const size_t *region,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueCopyImageToBuffer(Image *srcImage,
|
||||
Buffer *dstBuffer,
|
||||
const size_t *srcOrigin,
|
||||
const size_t *region,
|
||||
size_t dstOffset,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
cl_int finish(bool dcFlush) override;
|
||||
cl_int flush() override;
|
||||
|
||||
template <unsigned int enqueueType>
|
||||
void enqueueHandler(Surface **surfacesForResidency,
|
||||
size_t numSurfaceForResidency,
|
||||
bool blocking,
|
||||
const MultiDispatchInfo &dispatchInfo,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
template <unsigned int enqueueType, size_t size>
|
||||
void enqueueHandler(Surface *(&surfacesForResidency)[size],
|
||||
bool blocking,
|
||||
const MultiDispatchInfo &dispatchInfo,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
enqueueHandler<enqueueType>(surfacesForResidency, size, blocking, dispatchInfo, numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
|
||||
template <unsigned int enqueueType, size_t size>
|
||||
void enqueueHandler(Surface *(&surfacesForResidency)[size],
|
||||
bool blocking,
|
||||
Kernel *kernel,
|
||||
cl_uint workDim,
|
||||
const size_t globalOffsets[3],
|
||||
const size_t workItems[3],
|
||||
const size_t *localWorkSizesIn,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event);
|
||||
|
||||
template <unsigned int commandType>
|
||||
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
|
||||
size_t surfaceCount,
|
||||
LinearStream &commandStream,
|
||||
size_t commandStreamStart,
|
||||
bool &blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
EventBuilder &eventBuilder,
|
||||
uint32_t taskLevel,
|
||||
bool slmUsed,
|
||||
PrintfHandler *printfHandler);
|
||||
|
||||
template <unsigned int commandType>
|
||||
void enqueueBlocked(Surface **surfacesForResidency,
|
||||
size_t surfacesCount,
|
||||
bool &blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
KernelOperation *blockedCommandsData,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
bool slmUsed,
|
||||
EventBuilder &externalEventBuilder,
|
||||
std::unique_ptr<PrintfHandler> printfHandler);
|
||||
|
||||
void addMapUnmapToWaitlistEventsDependencies(const cl_event *eventWaitList,
|
||||
size_t numEventsInWaitlist,
|
||||
MapOperationType opType,
|
||||
MemObj *memObj,
|
||||
EventBuilder &externalEventBuilder);
|
||||
|
||||
void *cpuDataTransferHandler(MemObj *memObj,
|
||||
cl_command_type cmdType,
|
||||
cl_bool blocking,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event,
|
||||
cl_int &retVal);
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo);
|
||||
|
||||
private:
|
||||
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
|
||||
|
||||
void forceDispatchScheduler(OCLRT::MultiDispatchInfo &multiDispatchInfo);
|
||||
};
|
||||
} // namespace OCLRT
|
||||
46
runtime/command_queue/command_queue_hw.inl
Normal file
46
runtime/command_queue/command_queue_hw.inl
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/command_queue/dispatch_walker.h"
|
||||
#include "runtime/command_queue/enqueue_barrier.h"
|
||||
#include "runtime/command_queue/enqueue_copy_buffer.h"
|
||||
#include "runtime/command_queue/enqueue_copy_buffer_rect.h"
|
||||
#include "runtime/command_queue/enqueue_copy_buffer_to_image.h"
|
||||
#include "runtime/command_queue/enqueue_copy_image_to_buffer.h"
|
||||
#include "runtime/command_queue/enqueue_copy_image.h"
|
||||
#include "runtime/command_queue/enqueue_fill_buffer.h"
|
||||
#include "runtime/command_queue/enqueue_fill_image.h"
|
||||
#include "runtime/command_queue/enqueue_kernel.h"
|
||||
#include "runtime/command_queue/enqueue_map_buffer.h"
|
||||
#include "runtime/command_queue/enqueue_map_image.h"
|
||||
#include "runtime/command_queue/enqueue_svm.h"
|
||||
#include "runtime/command_queue/enqueue_marker.h"
|
||||
#include "runtime/command_queue/enqueue_migrate_mem_objects.h"
|
||||
#include "runtime/command_queue/enqueue_read_buffer.h"
|
||||
#include "runtime/command_queue/enqueue_read_buffer_rect.h"
|
||||
#include "runtime/command_queue/enqueue_read_image.h"
|
||||
#include "runtime/command_queue/enqueue_write_buffer.h"
|
||||
#include "runtime/command_queue/enqueue_write_buffer_rect.h"
|
||||
#include "runtime/command_queue/enqueue_write_image.h"
|
||||
#include "runtime/command_queue/cpu_data_transfer_handler.h"
|
||||
#include "runtime/command_queue/finish.h"
|
||||
#include "runtime/command_queue/flush.h"
|
||||
171
runtime/command_queue/cpu_data_transfer_handler.h
Normal file
171
runtime/command_queue/cpu_data_transfer_handler.h
Normal file
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/event/event_builder.h"
|
||||
|
||||
namespace OCLRT {
|
||||
template <typename GfxFamily>
|
||||
void *CommandQueueHw<GfxFamily>::cpuDataTransferHandler(MemObj *memObj,
|
||||
cl_command_type cmdType,
|
||||
cl_bool blocking,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event,
|
||||
cl_int &retVal) {
|
||||
EventBuilder eventBuilder;
|
||||
bool blockQueue = false;
|
||||
bool eventCompleted = false;
|
||||
ErrorCodeHelper err(&retVal, CL_SUCCESS);
|
||||
|
||||
if (event) {
|
||||
eventBuilder.create<Event>(this, cmdType, Event::eventNotReady, Event::eventNotReady);
|
||||
eventBuilder.getEvent()->setQueueTimeStamp();
|
||||
eventBuilder.getEvent()->setCPUProfilingPath(true);
|
||||
*event = eventBuilder.getEvent();
|
||||
}
|
||||
|
||||
TakeOwnershipWrapper<Device> deviceOwnership(*device);
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
|
||||
auto taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList);
|
||||
auto updateTaskLevel = isTaskLevelUpdateRequired(taskLevel, eventWaitList, numEventsInWaitList, cmdType);
|
||||
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "taskLevel", taskLevel);
|
||||
|
||||
if (updateTaskLevel) {
|
||||
taskLevel++;
|
||||
this->taskLevel = taskLevel;
|
||||
}
|
||||
|
||||
if (event) {
|
||||
eventBuilder.getEvent()->taskLevel = taskLevel;
|
||||
}
|
||||
|
||||
blockQueue = ((taskLevel == Event::eventNotReady) || isQueueBlocked());
|
||||
|
||||
if (blockQueue &&
|
||||
(cmdType == CL_COMMAND_MAP_BUFFER || cmdType == CL_COMMAND_UNMAP_MEM_OBJECT)) {
|
||||
|
||||
addMapUnmapToWaitlistEventsDependencies(eventWaitList,
|
||||
static_cast<size_t>(numEventsInWaitList),
|
||||
cmdType == CL_COMMAND_MAP_BUFFER ? MAP : UNMAP,
|
||||
memObj,
|
||||
eventBuilder);
|
||||
}
|
||||
|
||||
queueOwnership.unlock();
|
||||
deviceOwnership.unlock();
|
||||
|
||||
// read/write buffers are always blocking
|
||||
if (!blockQueue || blocking) {
|
||||
err.set(Event::waitForEvents(numEventsInWaitList, eventWaitList));
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp();
|
||||
}
|
||||
//wait for the completness of previous commands
|
||||
if (cmdType != CL_COMMAND_UNMAP_MEM_OBJECT) {
|
||||
if (!memObj->isMemObjZeroCopy() || blocking) {
|
||||
finish(true);
|
||||
eventCompleted = true;
|
||||
}
|
||||
}
|
||||
|
||||
auto bufferStorage = ptrOffset(memObj->getCpuAddressForMemoryTransfer(), offset);
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->setStartTimeStamp();
|
||||
}
|
||||
|
||||
switch (cmdType) {
|
||||
case CL_COMMAND_MAP_BUFFER:
|
||||
if (!memObj->isMemObjZeroCopy()) {
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj));
|
||||
}
|
||||
memObj->transferDataToHostPtr();
|
||||
eventCompleted = true;
|
||||
} else {
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(memObj));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case CL_COMMAND_UNMAP_MEM_OBJECT:
|
||||
if (!memObj->isMemObjZeroCopy()) {
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, ptr, static_cast<cl_mem>(memObj));
|
||||
}
|
||||
memObj->transferDataFromHostPtrToMemoryStorage();
|
||||
eventCompleted = true;
|
||||
} else {
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, ptr);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case CL_COMMAND_READ_BUFFER:
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj), ptr);
|
||||
}
|
||||
memcpy_s(ptr, size, bufferStorage, size);
|
||||
eventCompleted = true;
|
||||
break;
|
||||
case CL_COMMAND_WRITE_BUFFER:
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj), ptr);
|
||||
}
|
||||
memcpy_s(bufferStorage, size, ptr, size);
|
||||
eventCompleted = true;
|
||||
break;
|
||||
default:
|
||||
err.set(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->setEndTimeStamp();
|
||||
eventBuilder.getEvent()->updateTaskCount(this->taskCount);
|
||||
if (eventCompleted) {
|
||||
eventBuilder.getEvent()->setStatus(CL_COMPLETE);
|
||||
} else {
|
||||
eventBuilder.getEvent()->updateExecutionStatus();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cmdType == CL_COMMAND_MAP_BUFFER) {
|
||||
return memObj->setAndReturnMappedPtr(offset);
|
||||
}
|
||||
|
||||
if (cmdType == CL_COMMAND_UNMAP_MEM_OBJECT) {
|
||||
err.set(ptr == memObj->getMappedPtr() ? CL_SUCCESS : CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
return nullptr; // only map returns pointer
|
||||
}
|
||||
} // namespace OCLRT
|
||||
937
runtime/command_queue/dispatch_walker.h
Normal file
937
runtime/command_queue/dispatch_walker.h
Normal file
@@ -0,0 +1,937 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/context/context.h"
|
||||
#include "runtime/gen9/gen9_cmd_def.h"
|
||||
#include "runtime/command_queue/local_id_gen.h"
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
#include "runtime/command_queue/dispatch_walker_helper.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/device/device_info.h"
|
||||
#include "runtime/device_queue/device_queue_hw.h"
|
||||
#include "runtime/event/perf_counter.h"
|
||||
#include "runtime/event/user_event.h"
|
||||
#include "runtime/indirect_heap/indirect_heap.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/task_information.h"
|
||||
#include "runtime/helpers/validators.h"
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "runtime/mem_obj/mem_obj.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
void computeWorkgroupSize1D(
|
||||
uint32_t maxWorkGroupSize,
|
||||
size_t workGroupSize[3],
|
||||
const size_t workItems[3],
|
||||
size_t simdSize);
|
||||
|
||||
void computeWorkgroupSizeND(
|
||||
WorkSizeInfo wsInfo,
|
||||
size_t workGroupSize[3],
|
||||
const size_t workItems[3],
|
||||
const uint32_t workDim);
|
||||
|
||||
void computeWorkgroupSize2D(
|
||||
uint32_t maxWorkGroupSize,
|
||||
size_t workGroupSize[3],
|
||||
const size_t workItems[3],
|
||||
size_t simdSize);
|
||||
|
||||
void computeWorkgroupSizeSquared(
|
||||
uint32_t maxWorkGroupSize,
|
||||
size_t workGroupSize[3],
|
||||
const size_t workItems[3],
|
||||
size_t simdSize,
|
||||
const uint32_t workDim);
|
||||
|
||||
Vec3<size_t> computeWorkgroupSize(
|
||||
const DispatchInfo &dispatchInfo);
|
||||
|
||||
Vec3<size_t> generateWorkgroupSize(
|
||||
const DispatchInfo &dispatchInfo);
|
||||
|
||||
Vec3<size_t> computeWorkgroupsNumber(
|
||||
const Vec3<size_t> gws,
|
||||
const Vec3<size_t> lws);
|
||||
|
||||
Vec3<size_t> generateWorkgroupsNumber(
|
||||
const Vec3<size_t> gws,
|
||||
const Vec3<size_t> lws);
|
||||
|
||||
Vec3<size_t> generateWorkgroupsNumber(
|
||||
const DispatchInfo &dispatchInfo);
|
||||
|
||||
Vec3<size_t> canonizeWorkgroup(
|
||||
Vec3<size_t> workgroup);
|
||||
|
||||
inline uint32_t calculateDispatchDim(Vec3<size_t> dispatchSize, Vec3<size_t> dispatchOffset) {
|
||||
return std::max(1U, std::max(dispatchSize.getSimplifiedDim(), dispatchOffset.getSimplifiedDim()));
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline size_t setGpgpuWalkerThreadData(
|
||||
typename GfxFamily::GPGPU_WALKER *pCmd,
|
||||
const size_t globalOffsets[3],
|
||||
const size_t startWorkGroups[3],
|
||||
const size_t numWorkGroups[3],
|
||||
const size_t localWorkSizesIn[3],
|
||||
uint32_t simd) {
|
||||
typedef typename GfxFamily::GPGPU_WALKER GPGPU_WALKER;
|
||||
|
||||
auto localWorkSize = localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2];
|
||||
|
||||
auto threadsPerWorkGroup = getThreadsPerWG(simd, localWorkSize);
|
||||
pCmd->setThreadWidthCounterMaximum((uint32_t)threadsPerWorkGroup);
|
||||
|
||||
pCmd->setThreadGroupIdXDimension((uint32_t)numWorkGroups[0]);
|
||||
pCmd->setThreadGroupIdYDimension((uint32_t)numWorkGroups[1]);
|
||||
pCmd->setThreadGroupIdZDimension((uint32_t)numWorkGroups[2]);
|
||||
|
||||
// compute RightExecutionMask
|
||||
auto remainderSimdLanes = localWorkSize & (simd - 1);
|
||||
uint64_t executionMask = (1ull << remainderSimdLanes) - 1;
|
||||
if (!executionMask)
|
||||
executionMask = ~executionMask;
|
||||
|
||||
pCmd->setRightExecutionMask((uint32_t)executionMask);
|
||||
|
||||
pCmd->setBottomExecutionMask((uint32_t)0xffffffff);
|
||||
pCmd->setSimdSize((typename GPGPU_WALKER::SIMD_SIZE)(simd >> 4));
|
||||
|
||||
pCmd->setThreadGroupIdStartingX((uint32_t)startWorkGroups[0]);
|
||||
pCmd->setThreadGroupIdStartingY((uint32_t)startWorkGroups[1]);
|
||||
pCmd->setThreadGroupIdStartingResumeZ((uint32_t)startWorkGroups[2]);
|
||||
|
||||
return localWorkSize;
|
||||
}
|
||||
|
||||
inline cl_uint computeDimensions(const size_t workItems[3]) {
|
||||
return (workItems[2] > 1) ? 3 : (workItems[1] > 1) ? 2 : 1;
|
||||
}
|
||||
|
||||
void provideLocalWorkGroupSizeHints(Context *context, uint32_t maxWorkGroupSize, DispatchInfo dispatchInfo);
|
||||
|
||||
template <typename SizeAndAllocCalcT, typename... CalcArgsT>
|
||||
IndirectHeap *allocateIndirectHeap(SizeAndAllocCalcT &&calc, CalcArgsT &&... args) {
|
||||
size_t alignment = MemoryConstants::pageSize;
|
||||
size_t size = calc(std::forward<CalcArgsT>(args)...);
|
||||
return new IndirectHeap(alignedMalloc(size, alignment), size);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchProfilingCommandsStart(
|
||||
HwTimeStamps &hwTimeStamps,
|
||||
OCLRT::LinearStream *commandStream) {
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
// PIPE_CONTROL for global timestamp
|
||||
uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.GlobalStartTS));
|
||||
|
||||
auto pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
|
||||
*pPipeControlCmd = PIPE_CONTROL::sInit();
|
||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
pPipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP);
|
||||
pPipeControlCmd->setAddress(static_cast<uint32_t>(TimeStampAddress & 0x0000FFFFFFFFULL));
|
||||
pPipeControlCmd->setAddressHigh(static_cast<uint32_t>(TimeStampAddress >> 32));
|
||||
|
||||
//MI_STORE_REGISTER_MEM for context local timestamp
|
||||
TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextStartTS));
|
||||
|
||||
//low part
|
||||
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pMICmdLow = MI_STORE_REGISTER_MEM::sInit();
|
||||
pMICmdLow->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
pMICmdLow->setMemoryAddress(TimeStampAddress);
|
||||
|
||||
//hi part
|
||||
TimeStampAddress += sizeof(uint32_t);
|
||||
auto pMICmdHigh = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pMICmdHigh = MI_STORE_REGISTER_MEM::sInit();
|
||||
pMICmdHigh->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_HIGH);
|
||||
pMICmdHigh->setMemoryAddress(TimeStampAddress);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchProfilingCommandsEnd(
|
||||
HwTimeStamps &hwTimeStamps,
|
||||
OCLRT::LinearStream *commandStream) {
|
||||
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
// PIPE_CONTROL for global timestamp
|
||||
auto pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
|
||||
*pPipeControlCmd = PIPE_CONTROL::sInit();
|
||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
|
||||
//MI_STORE_REGISTER_MEM for context local timestamp
|
||||
uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextEndTS));
|
||||
|
||||
//low part
|
||||
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pMICmdLow = MI_STORE_REGISTER_MEM::sInit();
|
||||
pMICmdLow->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
pMICmdLow->setMemoryAddress(TimeStampAddress);
|
||||
|
||||
//hi part
|
||||
TimeStampAddress += sizeof(uint32_t);
|
||||
auto pMICmdHi = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pMICmdHi = MI_STORE_REGISTER_MEM::sInit();
|
||||
pMICmdHi->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_HIGH);
|
||||
pMICmdHi->setMemoryAddress(TimeStampAddress);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchPerfCountersNoopidRegisterCommands(
|
||||
CommandQueue &commandQueue,
|
||||
OCLRT::HwPerfCounter &hwPerfCounter,
|
||||
OCLRT::LinearStream *commandStream,
|
||||
bool start) {
|
||||
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
|
||||
uint64_t address = start ? reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.dmaFenceIdBegin))
|
||||
: reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.dmaFenceIdEnd));
|
||||
|
||||
auto pNoopIdRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pNoopIdRegister = MI_STORE_REGISTER_MEM::sInit();
|
||||
pNoopIdRegister->setRegisterAddress(OCLRT::INSTR_MMIO_NOOPID);
|
||||
pNoopIdRegister->setMemoryAddress(address);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchPerfCountersReadFreqRegisterCommands(
|
||||
CommandQueue &commandQueue,
|
||||
OCLRT::HwPerfCounter &hwPerfCounter,
|
||||
OCLRT::LinearStream *commandStream,
|
||||
bool start) {
|
||||
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
|
||||
uint64_t address = start ? reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.coreFreqBegin))
|
||||
: reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.coreFreqEnd));
|
||||
|
||||
auto pCoreFreqRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pCoreFreqRegister = MI_STORE_REGISTER_MEM::sInit();
|
||||
pCoreFreqRegister->setRegisterAddress(OCLRT::INSTR_MMIO_RPSTAT1);
|
||||
pCoreFreqRegister->setMemoryAddress(address);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchPerfCountersGeneralPurposeCounterCommands(
|
||||
CommandQueue &commandQueue,
|
||||
OCLRT::HwPerfCounter &hwPerfCounter,
|
||||
OCLRT::LinearStream *commandStream,
|
||||
bool start) {
|
||||
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
uint64_t address = 0;
|
||||
const uint64_t baseAddress = start ? reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.reportBegin.gp))
|
||||
: reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.reportEnd.gp));
|
||||
|
||||
// Read General Purpose counters
|
||||
for (uint16_t i = 0; i < OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT; i++) {
|
||||
auto pGeneralPurposeRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pGeneralPurposeRegister = MI_STORE_REGISTER_MEM::sInit();
|
||||
uint32_t regAddr = INSTR_GFX_OFFSETS::INSTR_PERF_CNT_1_DW0 + i * sizeof(cl_uint);
|
||||
pGeneralPurposeRegister->setRegisterAddress(regAddr);
|
||||
//Gp field is 2*uint64 wide so it can hold 4 uint32
|
||||
address = baseAddress + i * sizeof(cl_uint);
|
||||
pGeneralPurposeRegister->setMemoryAddress(address);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchPerfCountersUserCounterCommands(
|
||||
CommandQueue &commandQueue,
|
||||
OCLRT::HwPerfCounter &hwPerfCounter,
|
||||
OCLRT::LinearStream *commandStream,
|
||||
bool start) {
|
||||
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
|
||||
uint64_t address = 0;
|
||||
const uint64_t baseAddr = start ? reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.reportBegin.user))
|
||||
: reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.reportEnd.user));
|
||||
uint32_t cmdNum = 0;
|
||||
uint32_t regAddr = 0;
|
||||
auto configData = commandQueue.getPerfCountersConfigData();
|
||||
auto userRegs = &configData->readRegs;
|
||||
|
||||
for (uint32_t i = 0; i < userRegs->regsCount; i++) {
|
||||
auto pRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pRegister = MI_STORE_REGISTER_MEM::sInit();
|
||||
|
||||
regAddr = userRegs->reg[i].offset;
|
||||
pRegister->setRegisterAddress(regAddr);
|
||||
//offset between base (low) registers is cl_ulong wide
|
||||
address = baseAddr + i * sizeof(cl_ulong);
|
||||
pRegister->setMemoryAddress(address);
|
||||
cmdNum++;
|
||||
|
||||
if (userRegs->reg[i].bitSize > 32) {
|
||||
pRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pRegister = MI_STORE_REGISTER_MEM::sInit();
|
||||
|
||||
regAddr += sizeof(cl_uint);
|
||||
pRegister->setRegisterAddress(regAddr);
|
||||
address += sizeof(cl_uint);
|
||||
pRegister->setMemoryAddress(address);
|
||||
cmdNum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchPerfCountersOABufferStateCommands(
|
||||
CommandQueue &commandQueue,
|
||||
OCLRT::HwPerfCounter &hwPerfCounter,
|
||||
OCLRT::LinearStream *commandStream) {
|
||||
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
|
||||
uint64_t address = 0;
|
||||
//OA Status
|
||||
auto pOaRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pOaRegister = MI_STORE_REGISTER_MEM::sInit();
|
||||
pOaRegister->setRegisterAddress(INSTR_GFX_OFFSETS::INSTR_OA_STATUS);
|
||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.oaStatus));
|
||||
pOaRegister->setMemoryAddress(address);
|
||||
|
||||
//OA Head
|
||||
pOaRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pOaRegister = MI_STORE_REGISTER_MEM::sInit();
|
||||
pOaRegister->setRegisterAddress(INSTR_GFX_OFFSETS::INSTR_OA_HEAD_PTR);
|
||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.oaHead));
|
||||
pOaRegister->setMemoryAddress(address);
|
||||
|
||||
//OA Tail
|
||||
pOaRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*pOaRegister = MI_STORE_REGISTER_MEM::sInit();
|
||||
pOaRegister->setRegisterAddress(INSTR_GFX_OFFSETS::INSTR_OA_TAIL_PTR);
|
||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.oaTail));
|
||||
pOaRegister->setMemoryAddress(address);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchPerfCountersCommandsStart(
|
||||
CommandQueue &commandQueue,
|
||||
OCLRT::HwPerfCounter &hwPerfCounter,
|
||||
OCLRT::LinearStream *commandStream) {
|
||||
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT;
|
||||
|
||||
auto perfCounters = commandQueue.getPerfCounters();
|
||||
|
||||
uint32_t currentReportId = perfCounters->getCurrentReportId();
|
||||
uint64_t address = 0;
|
||||
//flush command streamer
|
||||
auto pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
|
||||
*pPipeControlCmd = PIPE_CONTROL::sInit();
|
||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
|
||||
//Store value of NOOPID register
|
||||
dispatchPerfCountersNoopidRegisterCommands<GfxFamily>(commandQueue, hwPerfCounter, commandStream, true);
|
||||
|
||||
//Read Core Frequency
|
||||
dispatchPerfCountersReadFreqRegisterCommands<GfxFamily>(commandQueue, hwPerfCounter, commandStream, true);
|
||||
|
||||
dispatchPerfCountersGeneralPurposeCounterCommands<GfxFamily>(commandQueue, hwPerfCounter, commandStream, true);
|
||||
|
||||
auto pReportPerfCount = (MI_REPORT_PERF_COUNT *)commandStream->getSpace(sizeof(MI_REPORT_PERF_COUNT));
|
||||
*pReportPerfCount = MI_REPORT_PERF_COUNT::sInit();
|
||||
pReportPerfCount->setReportId(currentReportId);
|
||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.reportBegin.oa));
|
||||
pReportPerfCount->setMemoryAddress(address);
|
||||
|
||||
//Timestamp: Global Start
|
||||
pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
|
||||
*pPipeControlCmd = PIPE_CONTROL::sInit();
|
||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
pPipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP);
|
||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWTimeStamp.GlobalStartTS));
|
||||
pPipeControlCmd->setAddress(static_cast<uint32_t>(address & ((uint64_t)UINT32_MAX)));
|
||||
pPipeControlCmd->setAddressHigh(static_cast<uint32_t>(address >> 32));
|
||||
|
||||
dispatchPerfCountersUserCounterCommands<GfxFamily>(commandQueue, hwPerfCounter, commandStream, true);
|
||||
|
||||
commandQueue.sendPerfCountersConfig();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchPerfCountersCommandsEnd(
|
||||
CommandQueue &commandQueue,
|
||||
OCLRT::HwPerfCounter &hwPerfCounter,
|
||||
OCLRT::LinearStream *commandStream) {
|
||||
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT;
|
||||
|
||||
auto perfCounters = commandQueue.getPerfCounters();
|
||||
|
||||
uint32_t currentReportId = perfCounters->getCurrentReportId();
|
||||
uint64_t address = 0;
|
||||
|
||||
//flush command streamer
|
||||
auto pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
|
||||
*pPipeControlCmd = PIPE_CONTROL::sInit();
|
||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
|
||||
dispatchPerfCountersOABufferStateCommands<GfxFamily>(commandQueue, hwPerfCounter, commandStream);
|
||||
|
||||
//Timestamp: Global End
|
||||
pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
|
||||
*pPipeControlCmd = PIPE_CONTROL::sInit();
|
||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
pPipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP);
|
||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWTimeStamp.GlobalEndTS));
|
||||
pPipeControlCmd->setAddress(static_cast<uint32_t>(address & ((uint64_t)UINT32_MAX)));
|
||||
pPipeControlCmd->setAddressHigh(static_cast<uint32_t>(address >> 32));
|
||||
|
||||
auto pReportPerfCount = (MI_REPORT_PERF_COUNT *)commandStream->getSpace(sizeof(MI_REPORT_PERF_COUNT));
|
||||
*pReportPerfCount = MI_REPORT_PERF_COUNT::sInit();
|
||||
pReportPerfCount->setReportId(currentReportId);
|
||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.reportEnd.oa));
|
||||
pReportPerfCount->setMemoryAddress(address);
|
||||
|
||||
dispatchPerfCountersGeneralPurposeCounterCommands<GfxFamily>(commandQueue, hwPerfCounter, commandStream, false);
|
||||
|
||||
//Store value of NOOPID register
|
||||
dispatchPerfCountersNoopidRegisterCommands<GfxFamily>(commandQueue, hwPerfCounter, commandStream, false);
|
||||
|
||||
//Read Core Frequency
|
||||
dispatchPerfCountersReadFreqRegisterCommands<GfxFamily>(commandQueue, hwPerfCounter, commandStream, false);
|
||||
|
||||
dispatchPerfCountersUserCounterCommands<GfxFamily>(commandQueue, hwPerfCounter, commandStream, false);
|
||||
|
||||
perfCounters->setCpuTimestamp();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchWalker(
|
||||
CommandQueue &commandQueue,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
KernelOperation **blockedCommandsData,
|
||||
HwTimeStamps *hwTimeStamps,
|
||||
OCLRT::HwPerfCounter *hwPerfCounter,
|
||||
bool blockQueue = false,
|
||||
unsigned int commandType = 0) {
|
||||
|
||||
OCLRT::LinearStream *commandStream = nullptr;
|
||||
OCLRT::IndirectHeap *dsh = nullptr, *ish = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
bool executionModelKernel = multiDispatchInfo.begin()->getKernel()->isParentKernel;
|
||||
|
||||
// Allocate command stream and indirect heaps
|
||||
if (blockQueue) {
|
||||
using KCH = KernelCommandsHelper<GfxFamily>;
|
||||
commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), MemoryConstants::pageSize);
|
||||
if (executionModelKernel) {
|
||||
uint32_t offsetDsh = commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset();
|
||||
uint32_t colorCalcSize = commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize;
|
||||
|
||||
dsh = allocateIndirectHeap([&multiDispatchInfo, offsetDsh] { return KCH::getTotalSizeRequiredDSH(multiDispatchInfo) + KCH::getTotalSizeRequiredIOH(multiDispatchInfo) + offsetDsh; });
|
||||
dsh->getSpace(colorCalcSize);
|
||||
ioh = dsh;
|
||||
} else {
|
||||
dsh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredDSH(multiDispatchInfo); });
|
||||
ioh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredIOH(multiDispatchInfo); });
|
||||
}
|
||||
ish = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredIH(multiDispatchInfo); });
|
||||
ssh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredSSH(multiDispatchInfo); });
|
||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh),
|
||||
UniqueIH(ish), UniqueIH(ioh), UniqueIH(ssh));
|
||||
if (executionModelKernel)
|
||||
(*blockedCommandsData)->doNotFreeISH = true;
|
||||
} else {
|
||||
commandStream = &commandQueue.getCS(0);
|
||||
if (executionModelKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) {
|
||||
commandQueue.releaseIndirectHeap(IndirectHeap::SURFACE_STATE);
|
||||
}
|
||||
dsh = &getIndirectHeap<GfxFamily, IndirectHeap::DYNAMIC_STATE>(commandQueue, multiDispatchInfo);
|
||||
ish = &getIndirectHeap<GfxFamily, IndirectHeap::INSTRUCTION>(commandQueue, multiDispatchInfo);
|
||||
ioh = &getIndirectHeap<GfxFamily, IndirectHeap::INDIRECT_OBJECT>(commandQueue, multiDispatchInfo);
|
||||
ssh = &getIndirectHeap<GfxFamily, IndirectHeap::SURFACE_STATE>(commandQueue, multiDispatchInfo);
|
||||
}
|
||||
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
dsh->align(KernelCommandsHelper<GfxFamily>::alignInterfaceDescriptorData);
|
||||
|
||||
const size_t offsetInterfaceDescriptorTable = dsh->getUsed();
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
size_t totalInterfaceDescriptorTableSize = sizeof(INTERFACE_DESCRIPTOR_DATA);
|
||||
size_t numDispatches = multiDispatchInfo.size();
|
||||
totalInterfaceDescriptorTableSize *= numDispatches;
|
||||
|
||||
if (!executionModelKernel) {
|
||||
dsh->getSpace(totalInterfaceDescriptorTableSize);
|
||||
} else {
|
||||
dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed());
|
||||
}
|
||||
|
||||
// Program media interface descriptor load
|
||||
KernelCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
|
||||
*commandStream,
|
||||
offsetInterfaceDescriptorTable,
|
||||
totalInterfaceDescriptorTableSize);
|
||||
|
||||
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
auto &kernel = *dispatchInfo.getKernel();
|
||||
|
||||
DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3));
|
||||
DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3));
|
||||
DEBUG_BREAK_IF(!(dispatchInfo.getGWS().y == 1 || dispatchInfo.getDim() >= 2));
|
||||
DEBUG_BREAK_IF(!(dispatchInfo.getOffset().z == 0 || dispatchInfo.getDim() == 3));
|
||||
DEBUG_BREAK_IF(!(dispatchInfo.getOffset().y == 0 || dispatchInfo.getDim() >= 2));
|
||||
|
||||
// Determine SIMD size
|
||||
uint32_t simd = kernel.getKernelInfo().getMaxSimdSize();
|
||||
|
||||
// If we don't have a required WGS, compute one opportunistically
|
||||
auto maxWorkGroupSize = static_cast<uint32_t>(commandQueue.getDevice().getDeviceInfo().maxWorkGroupSize);
|
||||
if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
|
||||
provideLocalWorkGroupSizeHints(commandQueue.getContextPtr(), maxWorkGroupSize, dispatchInfo);
|
||||
}
|
||||
|
||||
//Get dispatch geometry
|
||||
uint32_t dim = dispatchInfo.getDim();
|
||||
Vec3<size_t> gws = dispatchInfo.getGWS();
|
||||
Vec3<size_t> offset = dispatchInfo.getOffset();
|
||||
Vec3<size_t> swgs = dispatchInfo.getStartOfWorkgroups();
|
||||
|
||||
// Compute local workgroup sizes
|
||||
Vec3<size_t> lws = (dispatchInfo.getLocalWorkgroupSize().x > 0) ? dispatchInfo.getLocalWorkgroupSize() : generateWorkgroupSize(dispatchInfo);
|
||||
Vec3<size_t> elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws;
|
||||
|
||||
// Compute number of work groups
|
||||
Vec3<size_t> twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups() : generateWorkgroupsNumber(gws, lws);
|
||||
Vec3<size_t> nwgs = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : twgs;
|
||||
|
||||
// Patch our kernel constants
|
||||
*kernel.globalWorkOffsetX = static_cast<uint32_t>(offset.x);
|
||||
*kernel.globalWorkOffsetY = static_cast<uint32_t>(offset.y);
|
||||
*kernel.globalWorkOffsetZ = static_cast<uint32_t>(offset.z);
|
||||
|
||||
*kernel.globalWorkSizeX = static_cast<uint32_t>(gws.x);
|
||||
*kernel.globalWorkSizeY = static_cast<uint32_t>(gws.y);
|
||||
*kernel.globalWorkSizeZ = static_cast<uint32_t>(gws.z);
|
||||
|
||||
if ((&dispatchInfo == &*multiDispatchInfo.begin()) || (kernel.localWorkSizeX2 == &Kernel::dummyPatchLocation)) {
|
||||
*kernel.localWorkSizeX = static_cast<uint32_t>(lws.x);
|
||||
*kernel.localWorkSizeY = static_cast<uint32_t>(lws.y);
|
||||
*kernel.localWorkSizeZ = static_cast<uint32_t>(lws.z);
|
||||
}
|
||||
|
||||
*kernel.localWorkSizeX2 = static_cast<uint32_t>(lws.x);
|
||||
*kernel.localWorkSizeY2 = static_cast<uint32_t>(lws.y);
|
||||
*kernel.localWorkSizeZ2 = static_cast<uint32_t>(lws.z);
|
||||
|
||||
*kernel.enqueuedLocalWorkSizeX = static_cast<uint32_t>(elws.x);
|
||||
*kernel.enqueuedLocalWorkSizeY = static_cast<uint32_t>(elws.y);
|
||||
*kernel.enqueuedLocalWorkSizeZ = static_cast<uint32_t>(elws.z);
|
||||
|
||||
if (&dispatchInfo == &*multiDispatchInfo.begin()) {
|
||||
*kernel.numWorkGroupsX = static_cast<uint32_t>(twgs.x);
|
||||
*kernel.numWorkGroupsY = static_cast<uint32_t>(twgs.y);
|
||||
*kernel.numWorkGroupsZ = static_cast<uint32_t>(twgs.z);
|
||||
}
|
||||
|
||||
*kernel.workDim = dim;
|
||||
|
||||
// Send our indirect object data
|
||||
size_t localWorkSizes[3] = {lws.x, lws.y, lws.z};
|
||||
|
||||
auto offsetCrossThreadData = KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
*commandStream,
|
||||
*dsh,
|
||||
*ish,
|
||||
*ioh,
|
||||
*ssh,
|
||||
kernel,
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
interfaceDescriptorIndex);
|
||||
|
||||
if (&dispatchInfo == &*multiDispatchInfo.begin()) {
|
||||
// If hwTimeStampAlloc is passed (not nullptr), then we know that profiling is enabled
|
||||
if (hwTimeStamps != nullptr) {
|
||||
dispatchProfilingCommandsStart<GfxFamily>(*hwTimeStamps, commandStream);
|
||||
}
|
||||
if (hwPerfCounter != nullptr) {
|
||||
dispatchPerfCountersCommandsStart<GfxFamily>(commandQueue, *hwPerfCounter, commandStream);
|
||||
}
|
||||
}
|
||||
|
||||
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
|
||||
applyWADisableLSQCROPERFforOCL<GfxFamily>(commandStream, kernel, true);
|
||||
|
||||
// Program the walker. Invokes execution so all state should already be programmed
|
||||
typedef typename GfxFamily::GPGPU_WALKER GPGPU_WALKER;
|
||||
auto pGpGpuWalkerCmd = (GPGPU_WALKER *)commandStream->getSpace(sizeof(GPGPU_WALKER));
|
||||
*pGpGpuWalkerCmd = GfxFamily::cmdInitGpgpuWalker;
|
||||
|
||||
size_t globalOffsets[3] = {offset.x, offset.y, offset.z};
|
||||
size_t startWorkGroups[3] = {swgs.x, swgs.y, swgs.z};
|
||||
size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z};
|
||||
auto localWorkSize = setGpgpuWalkerThreadData<GfxFamily>(pGpGpuWalkerCmd, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizes, simd);
|
||||
|
||||
pGpGpuWalkerCmd->setIndirectDataStartAddress((uint32_t)offsetCrossThreadData);
|
||||
DEBUG_BREAK_IF(offsetCrossThreadData % 64 != 0);
|
||||
pGpGpuWalkerCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++);
|
||||
|
||||
auto threadPayload = kernel.getKernelInfo().patchInfo.threadPayload;
|
||||
DEBUG_BREAK_IF(nullptr == threadPayload);
|
||||
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
|
||||
auto localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, numChannels);
|
||||
localIdSizePerThread = std::max(localIdSizePerThread, sizeof(GRF));
|
||||
|
||||
auto sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkSize) * localIdSizePerThread;
|
||||
DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group
|
||||
|
||||
auto sizeCrossThreadData = kernel.getCrossThreadDataSize();
|
||||
auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||
pGpGpuWalkerCmd->setIndirectDataLength(IndirectDataLength);
|
||||
|
||||
// Implement disabling special WA DisableLSQCROPERFforOCL if needed
|
||||
applyWADisableLSQCROPERFforOCL<GfxFamily>(commandStream, kernel, false);
|
||||
}
|
||||
|
||||
// If hwTimeStamps is passed (not nullptr), then we know that profiling is enabled
|
||||
if (hwTimeStamps != nullptr) {
|
||||
dispatchProfilingCommandsEnd<GfxFamily>(*hwTimeStamps, commandStream);
|
||||
}
|
||||
if (hwPerfCounter != nullptr) {
|
||||
dispatchPerfCountersCommandsEnd<GfxFamily>(commandQueue, *hwPerfCounter, commandStream);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchWalker(
|
||||
CommandQueue &commandQueue,
|
||||
const Kernel &kernel,
|
||||
cl_uint workDim,
|
||||
const size_t globalOffsets[3],
|
||||
const size_t workItems[3],
|
||||
const size_t *localWorkSizesIn,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
KernelOperation **blockedCommandsData,
|
||||
HwTimeStamps *hwTimeStamps,
|
||||
HwPerfCounter *hwPerfCounter,
|
||||
bool blockQueue = false) {
|
||||
|
||||
DispatchInfo dispatchInfo(const_cast<Kernel *>(&kernel), workDim, workItems, localWorkSizesIn, globalOffsets);
|
||||
dispatchWalker<GfxFamily>(commandQueue, dispatchInfo, numEventsInWaitList, eventWaitList,
|
||||
blockedCommandsData, hwTimeStamps, hwPerfCounter, blockQueue);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchScheduler(
|
||||
CommandQueue &commandQueue,
|
||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||
SchedulerKernel &scheduler) {
|
||||
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
|
||||
OCLRT::LinearStream *commandStream = nullptr;
|
||||
OCLRT::IndirectHeap *dsh = nullptr, *ish = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
|
||||
commandStream = &commandQueue.getCS(0);
|
||||
// note : below code assumes that caller to dispatchScheduler "preallocated" memory
|
||||
// required for execution model in below heap managers
|
||||
dsh = devQueueHw.getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ish = &commandQueue.getIndirectHeap(IndirectHeap::INSTRUCTION);
|
||||
ssh = &commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE);
|
||||
|
||||
bool dcFlush = false;
|
||||
commandQueue.getDevice().getCommandStreamReceiver().addPipeControl(*commandStream, dcFlush);
|
||||
|
||||
uint32_t interfaceDescriptorIndex = devQueueHw.schedulerIDIndex;
|
||||
const size_t offsetInterfaceDescriptorTable = devQueueHw.colorCalcStateSize;
|
||||
const size_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable;
|
||||
const size_t totalInterfaceDescriptorTableSize = devQueueHw.interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA);
|
||||
|
||||
// Program media interface descriptor load
|
||||
KernelCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
|
||||
*commandStream,
|
||||
offsetInterfaceDescriptor,
|
||||
totalInterfaceDescriptorTableSize);
|
||||
|
||||
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
|
||||
|
||||
// Determine SIMD size
|
||||
uint32_t simd = scheduler.getKernelInfo().getMaxSimdSize();
|
||||
DEBUG_BREAK_IF(simd != PARALLEL_SCHEDULER_COMPILATION_SIZE_20);
|
||||
|
||||
// Patch our kernel constants
|
||||
*scheduler.globalWorkOffsetX = 0;
|
||||
*scheduler.globalWorkOffsetY = 0;
|
||||
*scheduler.globalWorkOffsetZ = 0;
|
||||
|
||||
*scheduler.globalWorkSizeX = (uint32_t)scheduler.getGws();
|
||||
*scheduler.globalWorkSizeY = 1;
|
||||
*scheduler.globalWorkSizeZ = 1;
|
||||
|
||||
*scheduler.localWorkSizeX = (uint32_t)scheduler.getLws();
|
||||
*scheduler.localWorkSizeY = 1;
|
||||
*scheduler.localWorkSizeZ = 1;
|
||||
|
||||
*scheduler.localWorkSizeX2 = (uint32_t)scheduler.getLws();
|
||||
*scheduler.localWorkSizeY2 = 1;
|
||||
*scheduler.localWorkSizeZ2 = 1;
|
||||
|
||||
*scheduler.enqueuedLocalWorkSizeX = (uint32_t)scheduler.getLws();
|
||||
*scheduler.enqueuedLocalWorkSizeY = 1;
|
||||
*scheduler.enqueuedLocalWorkSizeZ = 1;
|
||||
|
||||
*scheduler.numWorkGroupsX = (uint32_t)(scheduler.getGws() / scheduler.getLws());
|
||||
*scheduler.numWorkGroupsY = 0;
|
||||
*scheduler.numWorkGroupsZ = 0;
|
||||
|
||||
*scheduler.workDim = 1;
|
||||
|
||||
// Send our indirect object data
|
||||
size_t localWorkSizes[3] = {scheduler.getLws(), 1, 1};
|
||||
|
||||
// Create indirectHeap for IOH that is located at the end of device enqueue DSH
|
||||
size_t curbeOffset = devQueueHw.setSchedulerCrossThreadData(scheduler);
|
||||
IndirectHeap indirectObjectHeap(dsh->getBase(), dsh->getMaxAvailableSpace());
|
||||
indirectObjectHeap.getSpace(curbeOffset);
|
||||
ioh = &indirectObjectHeap;
|
||||
|
||||
auto offsetCrossThreadData = KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
*commandStream,
|
||||
*dsh,
|
||||
*ish,
|
||||
*ioh,
|
||||
*ssh,
|
||||
scheduler,
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
interfaceDescriptorIndex);
|
||||
|
||||
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
|
||||
applyWADisableLSQCROPERFforOCL<GfxFamily>(commandStream, scheduler, true);
|
||||
|
||||
// Program the walker. Invokes execution so all state should already be programmed
|
||||
auto pGpGpuWalkerCmd = (GPGPU_WALKER *)commandStream->getSpace(sizeof(GPGPU_WALKER));
|
||||
*pGpGpuWalkerCmd = GfxFamily::cmdInitGpgpuWalker;
|
||||
|
||||
size_t globalOffsets[3] = {0, 0, 0};
|
||||
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
|
||||
auto localWorkSize = setGpgpuWalkerThreadData<GfxFamily>(pGpGpuWalkerCmd, globalOffsets, globalOffsets, workGroups, localWorkSizes, simd);
|
||||
|
||||
pGpGpuWalkerCmd->setIndirectDataStartAddress((uint32_t)offsetCrossThreadData);
|
||||
DEBUG_BREAK_IF(offsetCrossThreadData % 64 != 0);
|
||||
pGpGpuWalkerCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex);
|
||||
|
||||
auto threadPayload = scheduler.getKernelInfo().patchInfo.threadPayload;
|
||||
DEBUG_BREAK_IF(nullptr == threadPayload);
|
||||
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
|
||||
auto localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, numChannels);
|
||||
localIdSizePerThread = std::max(localIdSizePerThread, sizeof(GRF));
|
||||
|
||||
auto sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkSize) * localIdSizePerThread;
|
||||
DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group
|
||||
|
||||
auto sizeCrossThreadData = scheduler.getCrossThreadDataSize();
|
||||
auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||
pGpGpuWalkerCmd->setIndirectDataLength(IndirectDataLength);
|
||||
|
||||
// Implement disabling special WA DisableLSQCROPERFforOCL if needed
|
||||
applyWADisableLSQCROPERFforOCL<GfxFamily>(commandStream, scheduler, false);
|
||||
|
||||
// Do not put BB_START only when returning in first Scheduler run
|
||||
if (devQueueHw.getSchedulerReturnInstance() != 1) {
|
||||
|
||||
commandQueue.getDevice().getCommandStreamReceiver().addPipeControl(*commandStream, true);
|
||||
|
||||
// Add BB Start Cmd to the SLB in the Primary Batch Buffer
|
||||
auto *bbStart = (MI_BATCH_BUFFER_START *)commandStream->getSpace(sizeof(MI_BATCH_BUFFER_START));
|
||||
*bbStart = MI_BATCH_BUFFER_START::sInit();
|
||||
bbStart->setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH);
|
||||
uint64_t slbAddress = devQueueHw.getSlbBuffer()->getGpuAddress();
|
||||
bbStart->setBatchBufferStartAddressGraphicsaddress472(slbAddress);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily, unsigned int eventType>
|
||||
struct EnqueueOperation {
|
||||
static_assert(eventType != CL_COMMAND_NDRANGE_KERNEL, "for eventType CL_COMMAND_NDRANGE_KERNEL use specialization class");
|
||||
static_assert(eventType != CL_COMMAND_MARKER, "for eventType CL_COMMAND_MARKER use specialization class");
|
||||
static_assert(eventType != CL_COMMAND_MIGRATE_MEM_OBJECTS, "for eventType CL_COMMAND_MIGRATE_MEM_OBJECTS use specialization class");
|
||||
static size_t getTotalSizeRequiredCS(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
size_t size = KernelCommandsHelper<GfxFamily>::getSizeRequiredCS() +
|
||||
sizeof(typename GfxFamily::PIPE_CONTROL) * (KernelCommandsHelper<GfxFamily>::isPipeControlWArequired() ? 2 : 1);
|
||||
if (reserveProfilingCmdsSpace) {
|
||||
size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
}
|
||||
if (reservePerfCounters) {
|
||||
//start cmds
|
||||
//P_C: flush CS & TimeStamp BEGIN
|
||||
size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
//SRM NOOPID & Frequency
|
||||
size += 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//gp registers
|
||||
size += OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//report perf count
|
||||
size += sizeof(typename GfxFamily::MI_REPORT_PERF_COUNT);
|
||||
//user registers
|
||||
size += commandQueue.getPerfCountersUserRegistersNumber() * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
|
||||
//end cmds
|
||||
//P_C: flush CS & TimeStamp END;
|
||||
size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
//OA buffer (status head, tail)
|
||||
size += 3 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//report perf count
|
||||
size += sizeof(typename GfxFamily::MI_REPORT_PERF_COUNT);
|
||||
//gp registers
|
||||
size += OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//SRM NOOPID & Frequency
|
||||
size += 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//user registers
|
||||
size += commandQueue.getPerfCountersUserRegistersNumber() * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
}
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
auto &kernel = *dispatchInfo.getKernel();
|
||||
size += sizeof(typename GfxFamily::GPGPU_WALKER);
|
||||
size += getSizeForWADisableLSQCROPERFforOCL<GfxFamily>(&kernel);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static size_t getSizeRequiredCS(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) {
|
||||
size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + KernelCommandsHelper<GfxFamily>::getSizeRequiredCS() +
|
||||
sizeof(typename GfxFamily::PIPE_CONTROL) * (KernelCommandsHelper<GfxFamily>::isPipeControlWArequired() ? 2 : 1);
|
||||
|
||||
if (reserveProfilingCmdsSpace) {
|
||||
size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
}
|
||||
if (reservePerfCounters) {
|
||||
//start cmds
|
||||
//P_C: flush CS & TimeStamp BEGIN
|
||||
size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
//SRM NOOPID & Frequency
|
||||
size += 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//gp registers
|
||||
size += OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//report perf count
|
||||
size += sizeof(typename GfxFamily::MI_REPORT_PERF_COUNT);
|
||||
//user registers
|
||||
size += commandQueue.getPerfCountersUserRegistersNumber() * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
|
||||
//end cmds
|
||||
//P_C: flush CS & TimeStamp END;
|
||||
size += 2 * sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
//OA buffer (status head, tail)
|
||||
size += 3 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//report perf count
|
||||
size += sizeof(typename GfxFamily::MI_REPORT_PERF_COUNT);
|
||||
//gp registers
|
||||
size += OCLRT::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//SRM NOOPID & Frequency
|
||||
size += 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//user registers
|
||||
size += commandQueue.getPerfCountersUserRegistersNumber() * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
}
|
||||
size += getSizeForWADisableLSQCROPERFforOCL<GfxFamily>(pKernel);
|
||||
|
||||
return size;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename GfxFamily, unsigned int eventType>
|
||||
LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, const Kernel *pKernel) {
|
||||
auto expectedSizeCS = EnqueueOperation<GfxFamily, eventType>::getSizeRequiredCS(reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, pKernel);
|
||||
return commandQueue.getCS(expectedSizeCS);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, unsigned int eventType>
|
||||
LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
size_t expectedSizeCS = 0;
|
||||
Kernel *parentKernel = multiDispatchInfo.size() > 0 ? multiDispatchInfo.begin()->getKernel() : nullptr;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily, eventType>::getSizeRequiredCS(reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, dispatchInfo.getKernel());
|
||||
}
|
||||
if (parentKernel && parentKernel->isParentKernel) {
|
||||
SchedulerKernel &scheduler = BuiltIns::getInstance().getSchedulerKernel(parentKernel->getContext());
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily, eventType>::getSizeRequiredCS(reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, &scheduler);
|
||||
}
|
||||
return commandQueue.getCS(expectedSizeCS);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, IndirectHeap::Type heapType>
|
||||
IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
size_t expectedSize = 0;
|
||||
IndirectHeap *ih = nullptr;
|
||||
|
||||
// clang-format off
|
||||
switch(heapType) {
|
||||
case IndirectHeap::DYNAMIC_STATE: expectedSize = KernelCommandsHelper<GfxFamily>::getTotalSizeRequiredDSH(multiDispatchInfo); break;
|
||||
case IndirectHeap::INSTRUCTION: expectedSize = KernelCommandsHelper<GfxFamily>::getTotalSizeRequiredIH( multiDispatchInfo); break;
|
||||
case IndirectHeap::INDIRECT_OBJECT: expectedSize = KernelCommandsHelper<GfxFamily>::getTotalSizeRequiredIOH(multiDispatchInfo); break;
|
||||
case IndirectHeap::SURFACE_STATE: expectedSize = KernelCommandsHelper<GfxFamily>::getTotalSizeRequiredSSH(multiDispatchInfo); break;
|
||||
}
|
||||
// clang-format on
|
||||
|
||||
if (multiDispatchInfo.begin()->getKernel()->isParentKernel) {
|
||||
if (heapType == IndirectHeap::INSTRUCTION || heapType == IndirectHeap::SURFACE_STATE) {
|
||||
expectedSize += KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<heapType>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
|
||||
} else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT)
|
||||
{
|
||||
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(commandQueue.getContext().getDefaultDeviceQueue());
|
||||
DEBUG_BREAK_IF(pDevQueue == nullptr);
|
||||
ih = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
}
|
||||
}
|
||||
|
||||
if (ih == nullptr)
|
||||
ih = &commandQueue.getIndirectHeap(heapType, expectedSize);
|
||||
|
||||
return *ih;
|
||||
}
|
||||
} // namespace OCLRT
|
||||
35
runtime/command_queue/dispatch_walker_helper.h
Normal file
35
runtime/command_queue/dispatch_walker_helper.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
#define L3SQC_BIT_LQSC_RO_PERF_DIS 0x08000000
|
||||
#define L3SQC_REG4 0xB118
|
||||
|
||||
template <typename GfxFamily>
|
||||
void applyWADisableLSQCROPERFforOCL(OCLRT::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode);
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel);
|
||||
}
|
||||
112
runtime/command_queue/dispatch_walker_helper.inl
Normal file
112
runtime/command_queue/dispatch_walker_helper.inl
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
#define CS_GPR_R0 0x2600
|
||||
#define CS_GPR_R1 0x2608
|
||||
|
||||
#define NUM_ALU_INST_FOR_READ_MODIFY_WRITE 4
|
||||
#define ALU_OPCODE_LOAD 0x080
|
||||
#define ALU_OPCODE_STORE 0x180
|
||||
#define ALU_OPCODE_OR 0x103
|
||||
#define ALU_OPCODE_AND 0x102
|
||||
|
||||
#define ALU_REGISTER_R_0 0x0
|
||||
#define ALU_REGISTER_R_1 0x1
|
||||
#define ALU_REGISTER_R_SRCA 0x20
|
||||
#define ALU_REGISTER_R_SRCB 0x21
|
||||
#define ALU_REGISTER_R_ACCU 0x31
|
||||
|
||||
// Performs ReadModifyWrite operation on value of a register: Register = Register Operation Mask
|
||||
template <typename GfxFamily>
|
||||
void addAluReadModifyWriteRegister(
|
||||
OCLRT::LinearStream *pCommandStream,
|
||||
uint32_t aluRegister,
|
||||
uint32_t operation,
|
||||
uint32_t mask) {
|
||||
// Load "Register" value into CS_GPR_R0
|
||||
typedef typename GfxFamily::MI_LOAD_REGISTER_REG MI_LOAD_REGISTER_REG;
|
||||
typedef typename GfxFamily::MI_MATH MI_MATH;
|
||||
typedef typename GfxFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
|
||||
auto pCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_REG)));
|
||||
*pCmd = MI_LOAD_REGISTER_REG::sInit();
|
||||
pCmd->setSourceRegisterAddress(aluRegister);
|
||||
pCmd->setDestinationRegisterAddress(CS_GPR_R0);
|
||||
|
||||
// Load "Mask" into CS_GPR_R1
|
||||
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
||||
auto pCmd2 = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)));
|
||||
*pCmd2 = MI_LOAD_REGISTER_IMM::sInit();
|
||||
pCmd2->setRegisterOffset(CS_GPR_R1);
|
||||
pCmd2->setDataDword(mask);
|
||||
|
||||
// Add instruction MI_MATH with 4 MI_MATH_ALU_INST_INLINE operands
|
||||
auto pCmd3 = reinterpret_cast<uint32_t *>(pCommandStream->getSpace(sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd3)->DW0.Value = 0x0;
|
||||
reinterpret_cast<MI_MATH *>(pCmd3)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
||||
reinterpret_cast<MI_MATH *>(pCmd3)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
|
||||
// 0x3 - 5 Dwords length cmd (-2): 1 for MI_MATH, 4 for MI_MATH_ALU_INST_INLINE
|
||||
reinterpret_cast<MI_MATH *>(pCmd3)->DW0.BitField.DwordLength = NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1;
|
||||
pCmd3++;
|
||||
MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(pCmd3);
|
||||
|
||||
// Setup first operand of MI_MATH - load CS_GPR_R0 into register A
|
||||
pAluParam->DW0.BitField.ALUOpcode = ALU_OPCODE_LOAD;
|
||||
pAluParam->DW0.BitField.Operand1 = ALU_REGISTER_R_SRCA;
|
||||
pAluParam->DW0.BitField.Operand2 = ALU_REGISTER_R_0;
|
||||
pAluParam++;
|
||||
|
||||
// Setup second operand of MI_MATH - load CS_GPR_R1 into register B
|
||||
pAluParam->DW0.BitField.ALUOpcode = ALU_OPCODE_LOAD;
|
||||
pAluParam->DW0.BitField.Operand1 = ALU_REGISTER_R_SRCB;
|
||||
pAluParam->DW0.BitField.Operand2 = ALU_REGISTER_R_1;
|
||||
pAluParam++;
|
||||
|
||||
// Setup third operand of MI_MATH - "Operation" on registers A and B
|
||||
pAluParam->DW0.BitField.ALUOpcode = operation;
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
|
||||
// Setup fourth operand of MI_MATH - store result into CS_GPR_R0
|
||||
pAluParam->DW0.BitField.ALUOpcode = ALU_OPCODE_STORE;
|
||||
pAluParam->DW0.BitField.Operand1 = ALU_REGISTER_R_0;
|
||||
pAluParam->DW0.BitField.Operand2 = ALU_REGISTER_R_ACCU;
|
||||
|
||||
// LOAD value of CS_GPR_R0 into "Register"
|
||||
auto pCmd4 = reinterpret_cast<MI_LOAD_REGISTER_REG *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_REG)));
|
||||
*pCmd4 = MI_LOAD_REGISTER_REG::sInit();
|
||||
pCmd4->setSourceRegisterAddress(CS_GPR_R0);
|
||||
pCmd4->setDestinationRegisterAddress(aluRegister);
|
||||
|
||||
// Add PIPE_CONTROL to flush caches
|
||||
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
auto pCmd5 = reinterpret_cast<PIPE_CONTROL *>(pCommandStream->getSpace(sizeof(PIPE_CONTROL)));
|
||||
*pCmd5 = PIPE_CONTROL::sInit();
|
||||
pCmd5->setCommandStreamerStallEnable(true);
|
||||
pCmd5->setDcFlushEnable(true);
|
||||
pCmd5->setTextureCacheInvalidationEnable(true);
|
||||
pCmd5->setPipeControlFlushEnable(true);
|
||||
pCmd5->setStateCacheInvalidationEnable(true);
|
||||
}
|
||||
}
|
||||
55
runtime/command_queue/enqueue_barrier.h
Normal file
55
runtime/command_queue/enqueue_barrier.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include <new>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueBarrierWithWaitList(
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
cl_uint dimensions = 1;
|
||||
enqueueHandler<CL_COMMAND_BARRIER>(surfaces,
|
||||
false,
|
||||
nullptr,
|
||||
dimensions,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
687
runtime/command_queue/enqueue_common.h
Normal file
687
runtime/command_queue/enqueue_common.h
Normal file
@@ -0,0 +1,687 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_queue/dispatch_walker.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/event/event_builder.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/dispatch_info_builder.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/helpers/array_count.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/helpers/task_information.h"
|
||||
#include "runtime/program/printf_handler.h"
|
||||
#include "runtime/program/block_kernel_manager.h"
|
||||
#include "runtime/utilities/range.h"
|
||||
#include <new>
|
||||
#include <memory>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
inline bool shouldFlushDC(unsigned int commandType, PrintfHandler *printfHandler) {
|
||||
return (commandType == CL_COMMAND_READ_BUFFER ||
|
||||
commandType == CL_COMMAND_READ_BUFFER_RECT ||
|
||||
commandType == CL_COMMAND_READ_IMAGE ||
|
||||
commandType == CL_COMMAND_SVM_MAP ||
|
||||
printfHandler);
|
||||
}
|
||||
|
||||
inline bool isCommandWithoutKernel(unsigned int commandType) {
|
||||
return ((commandType == CL_COMMAND_BARRIER) || (commandType == CL_COMMAND_MARKER) ||
|
||||
(commandType == CL_COMMAND_MIGRATE_MEM_OBJECTS) ||
|
||||
(commandType == CL_COMMAND_SVM_MAP) ||
|
||||
(commandType == CL_COMMAND_SVM_UNMAP) ||
|
||||
(commandType == CL_COMMAND_SVM_FREE));
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) {}
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <unsigned int commandType, size_t surfaceCount>
|
||||
void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount],
|
||||
bool blocking,
|
||||
Kernel *kernel,
|
||||
cl_uint workDim,
|
||||
const size_t globalOffsets[3],
|
||||
const size_t workItems[3],
|
||||
const size_t *localWorkSizesIn,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
if (kernel == nullptr) {
|
||||
enqueueHandler<commandType>(surfaces, blocking, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event);
|
||||
} else {
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
|
||||
if (DebugManager.flags.ForceDispatchScheduler.get()) {
|
||||
forceDispatchScheduler(multiDispatchInfo);
|
||||
} else {
|
||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder;
|
||||
builder.setDispatchGeometry(workDim, workItems, localWorkSizesIn, globalOffsets);
|
||||
builder.setKernel(kernel);
|
||||
builder.bake(multiDispatchInfo);
|
||||
} else {
|
||||
auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
|
||||
builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, localWorkSizesIn, globalOffsets);
|
||||
|
||||
if (multiDispatchInfo.size() == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::forceDispatchScheduler(OCLRT::MultiDispatchInfo &multiDispatchInfo) {
|
||||
BuiltIns &builtIns = BuiltIns::getInstance();
|
||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(this->getContext());
|
||||
DispatchInfo dispatchInfo(&scheduler, 1, Vec3<size_t>(scheduler.getGws(), 1, 1), Vec3<size_t>(scheduler.getLws(), 1, 1), Vec3<size_t>(0, 0, 0));
|
||||
|
||||
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
|
||||
|
||||
scheduler.createReflectionSurface();
|
||||
GraphicsAllocation *reflectionSurface = scheduler.getKernelReflectionSurface();
|
||||
|
||||
devQueueHw->resetDeviceQueue();
|
||||
|
||||
scheduler.setArgs(devQueueHw->getQueueBuffer(),
|
||||
devQueueHw->getStackBuffer(),
|
||||
devQueueHw->getEventPoolBuffer(),
|
||||
devQueueHw->getSlbBuffer(),
|
||||
devQueueHw->getDshBuffer(),
|
||||
reflectionSurface,
|
||||
devQueueHw->getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE).getGraphicsAllocation());
|
||||
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <unsigned int commandType>
|
||||
void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
size_t numSurfaceForResidency,
|
||||
bool blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
if (multiDispatchInfo.empty() && !isCommandWithoutKernel(commandType)) {
|
||||
enqueueHandler<CL_COMMAND_MARKER>(surfacesForResidency, numSurfaceForResidency, blocking, multiDispatchInfo,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
if (event) {
|
||||
castToObjectOrAbort<Event>(*event)->setCmdType(commandType);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
bool executionModelKernel = multiDispatchInfo.empty() ? false : multiDispatchInfo.begin()->getKernel()->isParentKernel;
|
||||
Kernel *parentKernel = executionModelKernel ? multiDispatchInfo.begin()->getKernel() : nullptr;
|
||||
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
|
||||
|
||||
HwTimeStamps *hwTimeStamps = nullptr;
|
||||
|
||||
TakeOwnershipWrapper<Device> deviceOwnership(*device);
|
||||
|
||||
TimeStampData queueTimeStamp;
|
||||
if (isProfilingEnabled() && event) {
|
||||
this->getDevice().getOSTime()->getCpuGpuTime(&queueTimeStamp);
|
||||
}
|
||||
|
||||
EventBuilder eventBuilder;
|
||||
if (event) {
|
||||
eventBuilder.create<Event>(this, commandType, Event::eventNotReady, 0);
|
||||
*event = eventBuilder.getEvent();
|
||||
if (eventBuilder.getEvent()->isProfilingEnabled()) {
|
||||
eventBuilder.getEvent()->setQueueTimeStamp(&queueTimeStamp);
|
||||
if (isCommandWithoutKernel(commandType)) {
|
||||
eventBuilder.getEvent()->setCPUProfilingPath(true);
|
||||
eventBuilder.getEvent()->setQueueTimeStamp();
|
||||
}
|
||||
}
|
||||
DBG_LOG(EventsDebugEnable, "enqueueHandler commandType", commandType, "output Event", eventBuilder.getEvent());
|
||||
}
|
||||
|
||||
bool profilingRequired = (this->isProfilingEnabled() && event != nullptr);
|
||||
bool perfCountersRequired = false;
|
||||
perfCountersRequired = (this->isPerfCountersEnabled() && event != nullptr);
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
std::unique_ptr<PrintfHandler> printfHandler;
|
||||
bool slmUsed = false;
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
|
||||
auto taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList);
|
||||
auto blockQueue = (taskLevel == Event::eventNotReady) || isQueueBlocked();
|
||||
|
||||
// isQueueBlocked() may use commandStream resolving events tree, get start offset after the call
|
||||
auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, profilingRequired, perfCountersRequired, multiDispatchInfo);
|
||||
auto commandStreamStart = commandStream.getUsed();
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
|
||||
// isQueueBlocked may unblock queue, get new taskLevel
|
||||
taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList);
|
||||
|
||||
DBG_LOG(EventsDebugEnable, "blockQueue", blockQueue, "virtualEvent", virtualEvent, "taskLevel", taskLevel);
|
||||
|
||||
if (DebugManager.flags.MakeEachEnqueueBlocking.get()) {
|
||||
blocking = true;
|
||||
}
|
||||
|
||||
if (executionModelKernel && !blockQueue) {
|
||||
while (!devQueueHw->isEMCriticalSectionFree())
|
||||
;
|
||||
}
|
||||
|
||||
auto updateTaskLevel = isTaskLevelUpdateRequired(taskLevel, eventWaitList, numEventsInWaitList, commandType);
|
||||
|
||||
if (updateTaskLevel) {
|
||||
taskLevel++;
|
||||
}
|
||||
|
||||
enqueueHandlerHook(commandType, multiDispatchInfo);
|
||||
|
||||
if (multiDispatchInfo.empty() == false) {
|
||||
HwPerfCounter *hwPerfCounter = nullptr;
|
||||
DebugManager.dumpKernelArgs(&multiDispatchInfo);
|
||||
|
||||
printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device));
|
||||
if (printfHandler) {
|
||||
printfHandler.get()->prepareDispatch(multiDispatchInfo);
|
||||
}
|
||||
|
||||
if ((this->isProfilingEnabled() && (eventBuilder.getEvent() != nullptr))) {
|
||||
// Get allocation for timestamps
|
||||
hwTimeStamps = eventBuilder.getEvent()->getHwTimeStamp();
|
||||
if (this->isPerfCountersEnabled()) {
|
||||
hwPerfCounter = eventBuilder.getEvent()->getHwPerfCounter();
|
||||
//PERF COUNTER: copy current configuration from queue to event
|
||||
eventBuilder.getEvent()->copyPerfCounters(this->getPerfCountersConfigData());
|
||||
}
|
||||
}
|
||||
|
||||
if (executionModelKernel) {
|
||||
parentKernel->createReflectionSurface();
|
||||
parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
|
||||
parentKernel->patchEventPool(context->getDefaultDeviceQueue());
|
||||
parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get());
|
||||
if (!blockQueue) {
|
||||
devQueueHw->resetDeviceQueue();
|
||||
devQueueHw->acquireEMCriticalSection();
|
||||
}
|
||||
}
|
||||
|
||||
dispatchWalker<GfxFamily>(
|
||||
*this,
|
||||
multiDispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
&blockedCommandsData,
|
||||
hwTimeStamps,
|
||||
hwPerfCounter,
|
||||
blockQueue,
|
||||
commandType);
|
||||
|
||||
commandStreamReceiver.setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
|
||||
|
||||
slmUsed = multiDispatchInfo.usesSlm();
|
||||
}
|
||||
|
||||
CompletionStamp completionStamp;
|
||||
if (!blockQueue) {
|
||||
if (executionModelKernel) {
|
||||
size_t minSizeISHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::INSTRUCTION>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
|
||||
size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
|
||||
|
||||
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
||||
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::INSTRUCTION, minSizeISHForEM),
|
||||
getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
|
||||
multiDispatchInfo.begin()->getKernel(),
|
||||
(uint32_t)multiDispatchInfo.size(),
|
||||
taskCount,
|
||||
hwTimeStamps);
|
||||
|
||||
BuiltIns &builtIns = BuiltIns::getInstance();
|
||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(this->getContext());
|
||||
|
||||
scheduler.setArgs(devQueueHw->getQueueBuffer(),
|
||||
devQueueHw->getStackBuffer(),
|
||||
devQueueHw->getEventPoolBuffer(),
|
||||
devQueueHw->getSlbBuffer(),
|
||||
devQueueHw->getDshBuffer(),
|
||||
multiDispatchInfo.begin()->getKernel()->getKernelReflectionSurface(),
|
||||
devQueueHw->getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE).getGraphicsAllocation(),
|
||||
devQueueHw->getDebugQueue());
|
||||
|
||||
dispatchScheduler<GfxFamily>(
|
||||
*this,
|
||||
*devQueueHw,
|
||||
scheduler);
|
||||
|
||||
scheduler.makeResident(commandStreamReceiver);
|
||||
|
||||
// Update SLM usage
|
||||
slmUsed |= scheduler.slmTotalSize > 0;
|
||||
|
||||
size_t count = parentKernel->getProgram()->getBlockKernelManager()->getCount();
|
||||
|
||||
for (uint32_t surfaceIndex = 0; surfaceIndex < count; surfaceIndex++) {
|
||||
auto surface = parentKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(surfaceIndex);
|
||||
if (surface) {
|
||||
commandStreamReceiver.makeResident(*surface);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto submissionRequired = isCommandWithoutKernel(commandType) ? false : true;
|
||||
|
||||
if (submissionRequired) {
|
||||
completionStamp = enqueueNonBlocked<commandType>(
|
||||
surfacesForResidency,
|
||||
numSurfaceForResidency,
|
||||
commandStream,
|
||||
commandStreamStart,
|
||||
blocking,
|
||||
multiDispatchInfo,
|
||||
eventBuilder,
|
||||
taskLevel,
|
||||
slmUsed,
|
||||
printfHandler.get());
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
|
||||
}
|
||||
|
||||
if (executionModelKernel && devQueueHw->getSchedulerReturnInstance() > 0) {
|
||||
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp);
|
||||
|
||||
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
|
||||
simulation.runSchedulerSimulation(devQueueHw->getQueueBuffer(),
|
||||
devQueueHw->getStackBuffer(),
|
||||
devQueueHw->getEventPoolBuffer(),
|
||||
devQueueHw->getSlbBuffer(),
|
||||
devQueueHw->getDshBuffer(),
|
||||
multiDispatchInfo.begin()->getKernel()->getKernelReflectionSurface(),
|
||||
devQueueHw->getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE).getGraphicsAllocation(),
|
||||
devQueueHw->getDebugQueue());
|
||||
}
|
||||
} else {
|
||||
auto maxTaskCount = this->taskCount;
|
||||
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {
|
||||
auto event = castToObject<Event>(eventWaitList[eventId]);
|
||||
if (!event->isUserEvent()) {
|
||||
maxTaskCount = std::max(maxTaskCount, event->peekTaskCount());
|
||||
}
|
||||
}
|
||||
|
||||
//inherit data from event_wait_list and previous packets
|
||||
completionStamp.flushStamp = this->flushStamp->peekStamp();
|
||||
completionStamp.taskCount = maxTaskCount;
|
||||
completionStamp.taskLevel = taskLevel;
|
||||
|
||||
if (eventBuilder.getEvent() && isProfilingEnabled()) {
|
||||
TimeStampData submitTimeStamp;
|
||||
this->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp);
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp();
|
||||
eventBuilder.getEvent()->setStartTimeStamp();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
CompletionStamp cmplStamp = {
|
||||
Event::eventNotReady,
|
||||
taskLevel,
|
||||
0,
|
||||
EngineType::ENGINE_RCS};
|
||||
completionStamp = cmplStamp;
|
||||
}
|
||||
updateFromCompletionStamp(completionStamp);
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->updateCompletionStamp(completionStamp.taskCount, completionStamp.taskLevel, completionStamp.flushStamp);
|
||||
DebugManager.log(DebugManager.flags.EventsDebugEnable.get(), "updateCompletionStamp Event", eventBuilder.getEvent(), "taskLevel", eventBuilder.getEvent()->taskLevel.load());
|
||||
}
|
||||
|
||||
if (blockQueue) {
|
||||
|
||||
if (executionModelKernel) {
|
||||
size_t minSizeISHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::INSTRUCTION>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
|
||||
size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
|
||||
blockedCommandsData->instructionHeapSizeEM = minSizeISHForEM;
|
||||
blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM;
|
||||
}
|
||||
|
||||
enqueueBlocked<commandType>(
|
||||
surfacesForResidency,
|
||||
numSurfaceForResidency,
|
||||
blocking,
|
||||
multiDispatchInfo,
|
||||
blockedCommandsData,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
slmUsed,
|
||||
eventBuilder,
|
||||
std::move(printfHandler));
|
||||
}
|
||||
|
||||
queueOwnership.unlock();
|
||||
deviceOwnership.unlock();
|
||||
|
||||
if (blockQueue) {
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
if (this->virtualEvent == eventBuilder.getEvent()) {
|
||||
eventBuilder.registerEvent();
|
||||
}
|
||||
}
|
||||
|
||||
if (blocking) {
|
||||
if (blockQueue) {
|
||||
while (isQueueBlocked())
|
||||
;
|
||||
waitUntilComplete(taskCount, flushStamp->peekStamp());
|
||||
} else {
|
||||
waitUntilComplete(taskCount, flushStamp->peekStamp());
|
||||
for (auto sIt = surfacesForResidency, sE = surfacesForResidency + numSurfaceForResidency;
|
||||
sIt != sE; ++sIt) {
|
||||
(*sIt)->setCompletionStamp(completionStamp, nullptr, nullptr);
|
||||
}
|
||||
if (printfHandler) {
|
||||
printfHandler->printEnqueueOutput();
|
||||
}
|
||||
commandStreamReceiver.cleanAllocationList(taskCount, TEMPORARY_ALLOCATION);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool CommandQueueHw<GfxFamily>::isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) {
|
||||
bool updateTaskLevel = true;
|
||||
//if we are blocked by user event then no update
|
||||
if (taskLevel == Event::eventNotReady) {
|
||||
updateTaskLevel = false;
|
||||
}
|
||||
//if we are executing command without kernel then it will inherit state from
|
||||
//previous commands, barrier is exception
|
||||
if (isCommandWithoutKernel(commandType) && commandType != CL_COMMAND_BARRIER) {
|
||||
updateTaskLevel = false;
|
||||
}
|
||||
//ooq special cases starts here
|
||||
if (this->isOOQEnabled()) {
|
||||
//if no wait list and barrier , do not update task level
|
||||
if (eventWaitList == nullptr && commandType != CL_COMMAND_BARRIER) {
|
||||
updateTaskLevel = false;
|
||||
}
|
||||
//if we have waitlist then deduce task level from waitlist and check if it is higher then current task level of queue
|
||||
if (eventWaitList != nullptr) {
|
||||
auto taskLevelFromEvents = getTaskLevelFromWaitList(0, numEventsInWaitList, eventWaitList);
|
||||
taskLevelFromEvents++;
|
||||
if (taskLevelFromEvents <= this->taskLevel) {
|
||||
updateTaskLevel = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return updateTaskLevel;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <unsigned int commandType>
|
||||
CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
Surface **surfaces,
|
||||
size_t surfaceCount,
|
||||
LinearStream &commandStream,
|
||||
size_t commandStreamStart,
|
||||
bool &blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
EventBuilder &eventBuilder,
|
||||
uint32_t taskLevel,
|
||||
bool slmUsed,
|
||||
PrintfHandler *printfHandler) {
|
||||
|
||||
UNRECOVERABLE_IF(multiDispatchInfo.empty());
|
||||
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
auto implicitFlush = false;
|
||||
|
||||
if (printfHandler) {
|
||||
blocking = true;
|
||||
printfHandler->makeResident(commandStreamReceiver);
|
||||
}
|
||||
|
||||
auto requiresCoherency = false;
|
||||
for (auto surface : CreateRange(surfaces, surfaceCount)) {
|
||||
surface->makeResident(commandStreamReceiver);
|
||||
requiresCoherency |= surface->IsCoherent;
|
||||
}
|
||||
|
||||
auto mediaSamplerRequired = false;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
dispatchInfo.getKernel()->makeResident(commandStreamReceiver);
|
||||
requiresCoherency |= dispatchInfo.getKernel()->requiresCoherency();
|
||||
mediaSamplerRequired |= dispatchInfo.getKernel()->isVmeKernel();
|
||||
}
|
||||
|
||||
if (mediaSamplerRequired) {
|
||||
DEBUG_BREAK_IF(device->getDeviceInfo().preemptionSupported != false);
|
||||
}
|
||||
|
||||
TimeStampData submitTimeStamp;
|
||||
if (isProfilingEnabled() && eventBuilder.getEvent()) {
|
||||
this->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp);
|
||||
this->getDevice().getCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwTimeStampAllocation());
|
||||
if (isPerfCountersEnabled()) {
|
||||
this->getDevice().getCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwPerfCounterAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
IndirectHeap *dsh = nullptr;
|
||||
IndirectHeap *ioh = nullptr;
|
||||
const bool executionModelKernel = multiDispatchInfo.begin()->getKernel()->isParentKernel;
|
||||
|
||||
if (executionModelKernel) {
|
||||
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(this->getContext().getDefaultDeviceQueue());
|
||||
DEBUG_BREAK_IF(pDevQueue == nullptr);
|
||||
dsh = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
// In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks.
|
||||
ioh = dsh;
|
||||
implicitFlush = true;
|
||||
} else {
|
||||
dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT);
|
||||
}
|
||||
|
||||
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.begin()->getKernel()->getThreadArbitrationPolicy());
|
||||
|
||||
DispatchFlags dispatchFlags;
|
||||
dispatchFlags.blocking = blocking;
|
||||
dispatchFlags.dcFlush = shouldFlushDC(commandType, printfHandler);
|
||||
dispatchFlags.useSLM = slmUsed;
|
||||
dispatchFlags.guardCommandBufferWithPipeControl = true;
|
||||
dispatchFlags.GSBA32BitRequired = commandType == CL_COMMAND_NDRANGE_KERNEL;
|
||||
dispatchFlags.mediaSamplerRequired = mediaSamplerRequired;
|
||||
dispatchFlags.requiresCoherency = requiresCoherency;
|
||||
dispatchFlags.low_priority = low_priority;
|
||||
dispatchFlags.implicitFlush = implicitFlush;
|
||||
dispatchFlags.flushStampReference = this->flushStamp->getStampReference();
|
||||
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
|
||||
dispatchFlags.outOfOrderExecutionAllowed = this->isOOQEnabled();
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
CompletionStamp completionStamp = commandStreamReceiver.flushTask(
|
||||
commandStream,
|
||||
commandStreamStart,
|
||||
*dsh,
|
||||
getIndirectHeap(IndirectHeap::INSTRUCTION),
|
||||
*ioh,
|
||||
getIndirectHeap(IndirectHeap::SURFACE_STATE),
|
||||
taskLevel,
|
||||
dispatchFlags);
|
||||
|
||||
for (auto surface : CreateRange(surfaces, surfaceCount)) {
|
||||
surface->setCompletionStamp(completionStamp, device, this);
|
||||
}
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
dispatchInfo.getKernel()->updateWithCompletionStamp(commandStreamReceiver, &completionStamp);
|
||||
}
|
||||
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <unsigned int commandType>
|
||||
void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
Surface **surfaces,
|
||||
size_t surfaceCount,
|
||||
bool &blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
KernelOperation *blockedCommandsData,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
bool slmUsed,
|
||||
EventBuilder &externalEventBuilder,
|
||||
std::unique_ptr<PrintfHandler> printfHandler) {
|
||||
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
|
||||
//store previous virtual event as it will add dependecies to new virtual event
|
||||
if (this->virtualEvent) {
|
||||
DBG_LOG(EventsDebugEnable, "enqueueBlocked", "previousVirtualEvent", this->virtualEvent);
|
||||
}
|
||||
|
||||
EventBuilder internalEventBuilder;
|
||||
EventBuilder *eventBuilder;
|
||||
// check if event will be exposed externally
|
||||
if (externalEventBuilder.getEvent()) {
|
||||
externalEventBuilder.getEvent()->incRefInternal();
|
||||
eventBuilder = &externalEventBuilder;
|
||||
DBG_LOG(EventsDebugEnable, "enqueueBlocked", "output event as virtualEvent", virtualEvent);
|
||||
} else {
|
||||
// it will be an internal event
|
||||
internalEventBuilder.create<VirtualEvent>(this, context);
|
||||
eventBuilder = &internalEventBuilder;
|
||||
DBG_LOG(EventsDebugEnable, "enqueueBlocked", "new virtualEvent", eventBuilder->getEvent());
|
||||
}
|
||||
eventBuilder->getEvent()->setCurrentCmdQVirtualEvent(true);
|
||||
|
||||
//update queue taskCount
|
||||
taskCount = eventBuilder->getEvent()->getCompletionStamp();
|
||||
|
||||
if (multiDispatchInfo.empty()) {
|
||||
DEBUG_BREAK_IF(!isCommandWithoutKernel(commandType));
|
||||
auto cmdSize = (unsigned int)EnqueueOperation<GfxFamily, commandType>::getSizeRequiredCS(isProfilingEnabled(),
|
||||
isPerfCountersEnabled(),
|
||||
*this,
|
||||
nullptr);
|
||||
auto cmd = std::unique_ptr<Command>(new CommandMarker(
|
||||
*this, commandStreamReceiver, commandType, cmdSize));
|
||||
eventBuilder->getEvent()->setCommand(std::move(cmd));
|
||||
} else {
|
||||
//store task data in event
|
||||
std::vector<Surface *> allSurfaces;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
dispatchInfo.getKernel()->getResidency(allSurfaces);
|
||||
for (auto &surface : CreateRange(surfaces, surfaceCount)) {
|
||||
allSurfaces.push_back(surface->duplicate());
|
||||
}
|
||||
}
|
||||
|
||||
auto kernelOperation = std::unique_ptr<KernelOperation>(blockedCommandsData); // marking ownership
|
||||
auto cmd = std::unique_ptr<Command>(new CommandComputeKernel(
|
||||
*this,
|
||||
commandStreamReceiver,
|
||||
std::move(kernelOperation),
|
||||
allSurfaces,
|
||||
shouldFlushDC(commandType, printfHandler.get()),
|
||||
slmUsed,
|
||||
commandType == CL_COMMAND_NDRANGE_KERNEL,
|
||||
std::move(printfHandler),
|
||||
multiDispatchInfo.begin()->getKernel(),
|
||||
(uint32_t)multiDispatchInfo.size()));
|
||||
eventBuilder->getEvent()->setCommand(std::move(cmd));
|
||||
}
|
||||
|
||||
eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventWaitList, numEventsInWaitList));
|
||||
eventBuilder->addParentEvent(this->virtualEvent);
|
||||
eventBuilder->finalize();
|
||||
|
||||
if (this->virtualEvent) {
|
||||
this->virtualEvent->setCurrentCmdQVirtualEvent(false);
|
||||
this->virtualEvent->decRefInternal();
|
||||
}
|
||||
|
||||
this->virtualEvent = eventBuilder->getEvent();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::addMapUnmapToWaitlistEventsDependencies(const cl_event *eventWaitList,
|
||||
size_t numEventsInWaitlist,
|
||||
MapOperationType opType,
|
||||
MemObj *memObj,
|
||||
EventBuilder &externalEventBuilder) {
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
|
||||
EventBuilder internalEventBuilder;
|
||||
EventBuilder *eventBuilder;
|
||||
// check if event will be exposed externally
|
||||
if (externalEventBuilder.getEvent()) {
|
||||
externalEventBuilder.getEvent()->incRefInternal();
|
||||
eventBuilder = &externalEventBuilder;
|
||||
} else {
|
||||
// it will be an internal event
|
||||
internalEventBuilder.create<VirtualEvent>(this, context);
|
||||
eventBuilder = &internalEventBuilder;
|
||||
}
|
||||
|
||||
//store task data in event
|
||||
auto cmd = std::unique_ptr<Command>(new CommandMapUnmap(opType, *memObj, commandStreamReceiver, *this));
|
||||
eventBuilder->getEvent()->setCommand(std::move(cmd));
|
||||
|
||||
//bind output event with input events
|
||||
eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventWaitList, numEventsInWaitlist));
|
||||
eventBuilder->addParentEvent(this->virtualEvent);
|
||||
eventBuilder->finalize();
|
||||
|
||||
if (this->virtualEvent) {
|
||||
this->virtualEvent->setCurrentCmdQVirtualEvent(false);
|
||||
this->virtualEvent->decRefInternal();
|
||||
}
|
||||
this->virtualEvent = eventBuilder->getEvent();
|
||||
}
|
||||
} // namespace OCLRT
|
||||
77
runtime/command_queue/enqueue_copy_buffer.h
Normal file
77
runtime/command_queue/enqueue_copy_buffer.h
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_queue/enqueue_common.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include <new>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||
Buffer *srcBuffer,
|
||||
Buffer *dstBuffer,
|
||||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
size_t size,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
|
||||
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
||||
this->getContext(), this->getDevice());
|
||||
builder.takeOwnership(this->context);
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcBuffer;
|
||||
dc.dstMemObj = dstBuffer;
|
||||
dc.srcOffset = {srcOffset, 0, 0};
|
||||
dc.dstOffset = {dstOffset, 0, 0};
|
||||
dc.size = {size, 0, 0};
|
||||
builder.buildDispatchInfos(dispatchInfo, dc);
|
||||
|
||||
MemObjSurface s1(srcBuffer);
|
||||
MemObjSurface s2(dstBuffer);
|
||||
Surface *surfaces[] = {&s1, &s2};
|
||||
|
||||
enqueueHandler<CL_COMMAND_COPY_BUFFER>(
|
||||
surfaces,
|
||||
false,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
||||
builder.releaseOwnership();
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
80
runtime/command_queue/enqueue_copy_buffer_rect.h
Normal file
80
runtime/command_queue/enqueue_copy_buffer_rect.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include <new>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
||||
Buffer *srcBuffer,
|
||||
Buffer *dstBuffer,
|
||||
const size_t *srcOrigin,
|
||||
const size_t *dstOrigin,
|
||||
const size_t *region,
|
||||
size_t srcRowPitch,
|
||||
size_t srcSlicePitch,
|
||||
size_t dstRowPitch,
|
||||
size_t dstSlicePitch,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
|
||||
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect,
|
||||
this->getContext(), this->getDevice());
|
||||
builder.takeOwnership(this->context);
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcBuffer;
|
||||
dc.dstMemObj = dstBuffer;
|
||||
dc.srcOffset = srcOrigin;
|
||||
dc.dstOffset = dstOrigin;
|
||||
dc.size = region;
|
||||
dc.srcRowPitch = srcRowPitch;
|
||||
dc.srcSlicePitch = srcSlicePitch;
|
||||
dc.dstRowPitch = dstRowPitch;
|
||||
dc.dstSlicePitch = dstSlicePitch;
|
||||
builder.buildDispatchInfos(dispatchInfo, dc);
|
||||
|
||||
enqueueHandler<CL_COMMAND_COPY_BUFFER_RECT>(
|
||||
dispatchInfo.getUsedSurfaces().begin(),
|
||||
dispatchInfo.getUsedSurfaces().size(),
|
||||
false,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
||||
builder.releaseOwnership();
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
75
runtime/command_queue/enqueue_copy_buffer_to_image.h
Normal file
75
runtime/command_queue/enqueue_copy_buffer_to_image.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/helpers/surface_formats.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include <new>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferToImage(
|
||||
Buffer *srcBuffer,
|
||||
Image *dstImage,
|
||||
size_t srcOffset,
|
||||
const size_t *dstOrigin,
|
||||
const size_t *region,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
|
||||
MultiDispatchInfo di;
|
||||
|
||||
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d,
|
||||
this->getContext(), this->getDevice());
|
||||
builder.takeOwnership(this->context);
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcBuffer;
|
||||
dc.dstMemObj = dstImage;
|
||||
dc.srcOffset = {srcOffset, 0, 0};
|
||||
dc.dstOffset = dstOrigin;
|
||||
dc.size = region;
|
||||
builder.buildDispatchInfos(di, dc);
|
||||
|
||||
enqueueHandler<CL_COMMAND_COPY_BUFFER_TO_IMAGE>(
|
||||
di.getUsedSurfaces().begin(),
|
||||
di.getUsedSurfaces().size(),
|
||||
false,
|
||||
di,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
||||
builder.releaseOwnership();
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
75
runtime/command_queue/enqueue_copy_image.h
Normal file
75
runtime/command_queue/enqueue_copy_image.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/basic_math.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include <algorithm>
|
||||
#include <new>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
|
||||
Image *srcImage,
|
||||
Image *dstImage,
|
||||
const size_t srcOrigin[3],
|
||||
const size_t dstOrigin[3],
|
||||
const size_t region[3],
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
|
||||
MultiDispatchInfo di;
|
||||
|
||||
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d,
|
||||
this->getContext(), this->getDevice());
|
||||
builder.takeOwnership(this->context);
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcImage;
|
||||
dc.dstMemObj = dstImage;
|
||||
dc.srcOffset = srcOrigin;
|
||||
dc.dstOffset = dstOrigin;
|
||||
dc.size = region;
|
||||
builder.buildDispatchInfos(di, dc);
|
||||
|
||||
enqueueHandler<CL_COMMAND_COPY_IMAGE>(
|
||||
di.getUsedSurfaces().begin(),
|
||||
di.getUsedSurfaces().size(),
|
||||
false,
|
||||
di,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
||||
builder.releaseOwnership();
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
75
runtime/command_queue/enqueue_copy_image_to_buffer.h
Normal file
75
runtime/command_queue/enqueue_copy_image_to_buffer.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/helpers/surface_formats.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include <new>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueCopyImageToBuffer(
|
||||
Image *srcImage,
|
||||
Buffer *dstBuffer,
|
||||
const size_t *srcOrigin,
|
||||
const size_t *region,
|
||||
size_t dstOffset,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
|
||||
MultiDispatchInfo di;
|
||||
|
||||
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer,
|
||||
this->getContext(), this->getDevice());
|
||||
builder.takeOwnership(this->context);
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcImage;
|
||||
dc.dstMemObj = dstBuffer;
|
||||
dc.srcOffset = srcOrigin;
|
||||
dc.dstOffset = {dstOffset, 0, 0};
|
||||
dc.size = region;
|
||||
builder.buildDispatchInfos(di, dc);
|
||||
|
||||
enqueueHandler<CL_COMMAND_COPY_IMAGE_TO_BUFFER>(
|
||||
di.getUsedSurfaces().begin(),
|
||||
di.getUsedSurfaces().size(),
|
||||
false,
|
||||
di,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
||||
builder.releaseOwnership();
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user