Initial commit

Change-Id: I4bf1707bd3dfeadf2c17b0a7daff372b1925ebbd
This commit is contained in:
Brandon Fliflet
2017-12-21 00:45:38 +01:00
commit 7e9ad41290
1350 changed files with 233156 additions and 0 deletions

View File

@ -0,0 +1,39 @@
---
Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,readability-identifier-naming,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,-clang-analyzer-optin.performance.Padding'
# -clang-analyzer-core.CallAndMessage
# WarningsAsErrors: '.*'
HeaderFilterRegex: 'runtime/'
AnalyzeTemporaryDtors: false
CheckOptions:
- key: google-readability-braces-around-statements.ShortStatementLines
value: '1'
- key: google-readability-function-size.StatementThreshold
value: '800'
- key: google-readability-namespace-comments.ShortNamespaceLines
value: '10'
- key: google-readability-namespace-comments.SpacesBeforeComments
value: '2'
- key: readability-identifier-naming.MethodCase
value: camelBack
- key: readability-identifier-naming.ParameterCase
value: camelBack
- key: readability-identifier-naming.ClassMemberCase
value: camelBack
- key: readability-identifier-naming.ClassMethodCase
value: camelBack
- key: modernize-loop-convert.MaxCopySize
value: '16'
- key: modernize-loop-convert.MinConfidence
value: reasonable
- key: modernize-loop-convert.NamingStyle
value: CamelCase
- key: modernize-pass-by-value.IncludeStyle
value: llvm
- key: modernize-replace-auto-ptr.IncludeStyle
value: llvm
- key: modernize-use-nullptr.NullMacros
value: 'NULL'
- key: modernize-use-default-member-init.UseAssignment
value: '1'
...

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "block_kernel_manager.h"
#include "runtime/helpers/debug_helpers.h"
namespace OCLRT {
void BlockKernelManager::addBlockKernelInfo(KernelInfo *blockKernelInfo) {
blockKernelInfoArray.push_back(blockKernelInfo);
blockUsesPrintf |= (blockKernelInfo->patchInfo.pAllocateStatelessPrintfSurface != nullptr);
}
const KernelInfo *BlockKernelManager::getBlockKernelInfo(size_t ordinal) {
DEBUG_BREAK_IF(ordinal >= blockKernelInfoArray.size());
return blockKernelInfoArray[ordinal];
}
BlockKernelManager::~BlockKernelManager() {
for (auto &i : blockKernelInfoArray)
delete i;
}
void BlockKernelManager::pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal) {
if (blockPrivateSurfaceArray.size() < blockKernelInfoArray.size()) {
blockPrivateSurfaceArray.resize(blockKernelInfoArray.size());
for (uint32_t i = 0; i < blockPrivateSurfaceArray.size(); i++) {
blockPrivateSurfaceArray[i] = nullptr;
}
}
DEBUG_BREAK_IF(ordinal >= blockPrivateSurfaceArray.size());
blockPrivateSurfaceArray[ordinal] = allocation;
}
GraphicsAllocation *BlockKernelManager::getPrivateSurface(size_t ordinal) {
// Ff queried ordinal is out of bound return nullptr,
// this happens when no private surface was not pushed
if (ordinal < blockPrivateSurfaceArray.size())
return blockPrivateSurfaceArray[ordinal];
return nullptr;
}
} // namespace OCLRT

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "kernel_info.h"
#include "runtime/api/cl_types.h"
#include <vector>
namespace OCLRT {
class GraphicsAllocation;
class BlockKernelManager {
public:
BlockKernelManager() = default;
virtual ~BlockKernelManager();
void addBlockKernelInfo(KernelInfo *);
const KernelInfo *getBlockKernelInfo(size_t ordinal);
size_t getCount() const {
return blockKernelInfoArray.size();
}
bool getIfBlockUsesPrintf() const {
return blockUsesPrintf;
}
void pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal);
GraphicsAllocation *getPrivateSurface(size_t ordinal);
protected:
bool blockUsesPrintf = false;
std::vector<KernelInfo *> blockKernelInfoArray;
std::vector<GraphicsAllocation *> blockPrivateSurfaceArray;
};
} // namespace OCLRT

162
runtime/program/build.cpp Normal file
View File

@ -0,0 +1,162 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "config.h"
#include "runtime/compiler_interface/compiler_interface.h"
#include "runtime/os_interface/debug_settings_manager.h"
#include "runtime/platform/platform.h"
#include "runtime/helpers/validators.h"
#include "program.h"
#include <cstring>
namespace OCLRT {
cl_int Program::build(
cl_uint numDevices,
const cl_device_id *deviceList,
const char *buildOptions,
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
void *userData,
bool enableCaching) {
cl_int retVal = CL_SUCCESS;
do {
if (((deviceList == nullptr) && (numDevices != 0)) ||
((deviceList != nullptr) && (numDevices == 0))) {
retVal = CL_INVALID_VALUE;
break;
}
if ((funcNotify == nullptr) &&
(userData != nullptr)) {
retVal = CL_INVALID_VALUE;
break;
}
// if a device_list is specified, make sure it points to our device
// NOTE: a null device_list is ok - it means "all devices"
if (deviceList && validateObject(*deviceList) != CL_SUCCESS) {
retVal = CL_INVALID_DEVICE;
break;
}
// check to see if a previous build request is in progress
if (buildStatus == CL_BUILD_IN_PROGRESS) {
retVal = CL_INVALID_OPERATION;
break;
}
if (isCreatedFromBinary == false) {
buildStatus = CL_BUILD_IN_PROGRESS;
options = (buildOptions) ? buildOptions : "";
std::string reraStr = "-cl-intel-gtpin-rera";
size_t pos = options.find(reraStr);
if (pos != std::string::npos) {
// build option "-cl-intel-gtpin-rera" is present, move it to internalOptions
size_t reraLen = reraStr.length();
options.erase(pos, reraLen);
internalOptions.append(reraStr);
internalOptions.append(" ");
}
CompilerInterface *pCompilerInterface = getCompilerInterface();
if (!pCompilerInterface) {
retVal = CL_OUT_OF_HOST_MEMORY;
break;
}
TranslationArgs inputArgs = {};
if (strcmp(sourceCode.c_str(), "") == 0) {
retVal = CL_INVALID_PROGRAM;
break;
}
internalOptions.append(platform()->getCompilerExtensions());
inputArgs.pInput = (char *)(sourceCode.c_str());
inputArgs.InputSize = (uint32_t)sourceCode.size();
inputArgs.pOptions = options.c_str();
inputArgs.OptionsSize = (uint32_t)options.length();
inputArgs.pInternalOptions = internalOptions.c_str();
inputArgs.InternalOptionsSize = (uint32_t)internalOptions.length();
inputArgs.pTracingOptions = nullptr;
inputArgs.TracingOptionsCount = 0;
DBG_LOG(LogApiCalls,
"Build Options", inputArgs.pOptions,
"\nBuild Internal Options", inputArgs.pInternalOptions);
retVal = pCompilerInterface->build(*this, inputArgs, enableCaching);
if (retVal != CL_SUCCESS) {
break;
}
}
updateNonUniformFlag();
retVal = processGenBinary();
if (retVal != CL_SUCCESS) {
break;
}
separateBlockKernels();
} while (false);
if (retVal != CL_SUCCESS) {
buildStatus = CL_BUILD_ERROR;
programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE;
} else {
buildStatus = CL_BUILD_SUCCESS;
programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
}
if (funcNotify != nullptr) {
(*funcNotify)(this, userData);
}
return retVal;
}
cl_int Program::build(const cl_device_id device, const char *buildOptions, bool enableCaching,
std::unordered_map<std::string, BuiltinDispatchInfoBuilder *> &builtinsMap) {
auto ret = this->build(1, &device, buildOptions, nullptr, nullptr, enableCaching);
if (ret != CL_SUCCESS) {
return ret;
}
for (auto &ki : this->kernelInfoArray) {
auto fit = builtinsMap.find(ki->name);
if (fit == builtinsMap.end()) {
continue;
}
ki->builtinDispatchBuilder = fit->second;
}
return ret;
}
cl_int Program::build(
const char *pKernelData,
size_t kernelDataSize) {
cl_int retVal = CL_SUCCESS;
processKernel(pKernelData, retVal);
return retVal;
}
}

188
runtime/program/compile.cpp Normal file
View File

@ -0,0 +1,188 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "config.h"
#include "elf/writer.h"
#include "runtime/compiler_interface/compiler_interface.h"
#include "runtime/platform/platform.h"
#include "runtime/helpers/validators.h"
#include "program.h"
#include <cstring>
namespace OCLRT {
cl_int Program::compile(
cl_uint numDevices,
const cl_device_id *deviceList,
const char *buildOptions,
cl_uint numInputHeaders,
const cl_program *inputHeaders,
const char **headerIncludeNames,
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
void *userData) {
cl_int retVal = CL_SUCCESS;
cl_program program;
CLElfLib::CElfWriter *pElfWriter = nullptr;
Program *pHeaderProgObj;
size_t compileDataSize;
char *pCompileData = nullptr;
do {
if (((deviceList == nullptr) && (numDevices != 0)) ||
((deviceList != nullptr) && (numDevices == 0))) {
retVal = CL_INVALID_VALUE;
break;
}
if (numInputHeaders == 0) {
if ((headerIncludeNames != nullptr) || (inputHeaders != nullptr)) {
retVal = CL_INVALID_VALUE;
break;
}
} else {
if ((headerIncludeNames == nullptr) || (inputHeaders == nullptr)) {
retVal = CL_INVALID_VALUE;
break;
}
}
if ((funcNotify == nullptr) &&
(userData != nullptr)) {
retVal = CL_INVALID_VALUE;
break;
}
// if a device_list is specified, make sure it points to our device
// NOTE: a null device_list is ok - it means "all devices"
if ((deviceList != nullptr) && validateObject(*deviceList) != CL_SUCCESS) {
retVal = CL_INVALID_DEVICE;
break;
}
if (buildStatus == CL_BUILD_IN_PROGRESS) {
retVal = CL_INVALID_OPERATION;
break;
}
buildStatus = CL_BUILD_IN_PROGRESS;
options = (buildOptions != nullptr) ? buildOptions : "";
std::string reraStr = "-cl-intel-gtpin-rera";
size_t pos = options.find(reraStr);
if (pos != std::string::npos) {
// compile option "-cl-intel-gtpin-rera" is present, move it to internalOptions
size_t reraLen = reraStr.length();
options.erase(pos, reraLen);
internalOptions.append(reraStr);
internalOptions.append(" ");
}
// create ELF writer to process all sources to be compiled
pElfWriter = CLElfLib::CElfWriter::create(CLElfLib::EH_TYPE_OPENCL_SOURCE, CLElfLib::EH_MACHINE_NONE, 0);
UNRECOVERABLE_IF(pElfWriter == nullptr);
CLElfLib::SSectionNode sectionNode;
// create main section
sectionNode.Name = "CLMain";
sectionNode.pData = (char *)sourceCode.c_str();
sectionNode.DataSize = (unsigned int)(strlen(sourceCode.c_str()) + 1);
sectionNode.Flags = 0;
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_SOURCE;
// add main program's source
pElfWriter->addSection(&sectionNode);
for (cl_uint i = 0; i < numInputHeaders; i++) {
program = inputHeaders[i];
if (program == nullptr) {
retVal = CL_INVALID_PROGRAM;
break;
}
pHeaderProgObj = castToObject<Program>(program);
if (pHeaderProgObj == nullptr) {
retVal = CL_INVALID_PROGRAM;
break;
}
sectionNode.Name = headerIncludeNames[i];
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_HEADER;
sectionNode.Flags = 0;
// collect required data from the header
retVal = pHeaderProgObj->getSource(sectionNode.pData, sectionNode.DataSize);
if (retVal != CL_SUCCESS) {
break;
}
pElfWriter->addSection(&sectionNode);
}
if (retVal != CL_SUCCESS) {
break;
}
pElfWriter->resolveBinary(nullptr, compileDataSize);
pCompileData = new char[compileDataSize];
pElfWriter->resolveBinary(pCompileData, compileDataSize);
CompilerInterface *pCompilerInterface = getCompilerInterface();
if (!pCompilerInterface) {
retVal = CL_OUT_OF_HOST_MEMORY;
break;
}
TranslationArgs inputArgs = {};
// set parameters for compilation
internalOptions.append(platform()->getCompilerExtensions());
inputArgs.pInput = pCompileData;
inputArgs.InputSize = (uint32_t)compileDataSize;
inputArgs.pOptions = options.c_str();
inputArgs.OptionsSize = (uint32_t)options.length();
inputArgs.pInternalOptions = internalOptions.c_str();
inputArgs.InternalOptionsSize = (uint32_t)internalOptions.length();
inputArgs.pTracingOptions = nullptr;
inputArgs.TracingOptionsCount = 0;
retVal = pCompilerInterface->compile(*this, inputArgs);
if (retVal != CL_SUCCESS) {
break;
}
updateNonUniformFlag();
} while (false);
if (retVal != CL_SUCCESS) {
buildStatus = CL_BUILD_ERROR;
programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE;
} else {
buildStatus = CL_BUILD_SUCCESS;
programBinaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
}
CLElfLib::CElfWriter::destroy(pElfWriter);
delete[] pCompileData;
internalOptions.clear();
if (funcNotify != nullptr) {
(*funcNotify)(this, userData);
}
return retVal;
}
} // namespace OCLRT

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/program/create.inl"
#include "runtime/program/program.h"
namespace OCLRT {
template Program *Program::create<Program>(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int &);
template Program *Program::create<Program>(cl_context, cl_uint, const char **, const size_t *, cl_int &);
template Program *Program::create<Program>(const char *, Context *, Device &, bool, cl_int *);
template Program *Program::createFromIL<Program>(Context *, const void *, size_t length, cl_int &);
}

144
runtime/program/create.inl Normal file
View File

@ -0,0 +1,144 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/program/program.h"
#include "runtime/context/context.h"
namespace OCLRT {
template <typename T>
T *Program::create(
cl_context context,
cl_uint numDevices,
const cl_device_id *deviceList,
const size_t *lengths,
const unsigned char **binaries,
cl_int *binaryStatus,
cl_int &errcodeRet) {
auto pContext = castToObject<Context>(context);
DEBUG_BREAK_IF(!pContext);
auto program = new T(pContext);
auto retVal = program->createProgramFromBinary(binaries[0], lengths[0]);
if (binaryStatus) {
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
*binaryStatus = CL_SUCCESS;
}
if (retVal != CL_SUCCESS) {
delete program;
program = nullptr;
}
errcodeRet = retVal;
return program;
}
template <typename T>
T *Program::create(
cl_context context,
cl_uint count,
const char **strings,
const size_t *lengths,
cl_int &errcodeRet) {
std::string combinedString;
size_t combinedStringSize = 0;
T *program = nullptr;
auto pContext = castToObject<Context>(context);
DEBUG_BREAK_IF(!pContext);
auto retVal = createCombinedString(
combinedString,
combinedStringSize,
count,
strings,
lengths);
if (CL_SUCCESS == retVal) {
program = new T(pContext);
program->sourceCode.swap(combinedString);
}
errcodeRet = retVal;
return program;
}
template <typename T>
T *Program::create(
const char *nullTerminatedString,
Context *context,
Device &device,
bool isBuiltIn,
cl_int *errcodeRet) {
cl_int retVal = CL_SUCCESS;
T *program = nullptr;
if (nullTerminatedString == nullptr) {
retVal = CL_INVALID_VALUE;
}
if (retVal == CL_SUCCESS) {
program = new T();
program->setSource((char *)nullTerminatedString);
program->context = context;
program->isBuiltIn = isBuiltIn;
if (program->context && !program->isBuiltIn) {
program->context->incRefInternal();
}
program->pDevice = &device;
program->numDevices = 1;
if (is32bit || DebugManager.flags.DisableStatelessToStatefulOptimization.get()) {
program->internalOptions += "-cl-intel-greater-than-4GB-buffer-required";
}
}
if (errcodeRet) {
*errcodeRet = retVal;
}
return program;
}
template <typename T>
T *Program::createFromIL(Context *ctx,
const void *il,
size_t length,
cl_int &errcodeRet) {
errcodeRet = CL_SUCCESS;
if ((il == nullptr) || (length == 0)) {
errcodeRet = CL_INVALID_BINARY;
return nullptr;
}
T *program = new T(ctx, false);
errcodeRet = program->createProgramFromBinary(il, length);
if (errcodeRet != CL_SUCCESS) {
delete program;
program = nullptr;
}
return program;
}
} // namespace OCLRT

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/program/program.h"
namespace OCLRT {
bool Program::isSafeToSkipUnhandledToken(unsigned int token) const {
return false;
}
} // namespace OCLRT

View File

@ -0,0 +1,224 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/context/context.h"
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/get_info.h"
#include "runtime/helpers/validators.h"
#include "program.h"
namespace OCLRT {
cl_int Program::getInfo(cl_program_info paramName, size_t paramValueSize,
void *paramValue, size_t *paramValueSizeRet) {
cl_int retVal = CL_SUCCESS;
const void *pSrc = nullptr;
size_t srcSize = 0;
size_t retSize = 0;
std::string kernelNamesString;
cl_device_id device_id = pDevice;
cl_uint refCount = 0;
size_t numKernels;
cl_context clContext = context;
switch (paramName) {
case CL_PROGRAM_CONTEXT:
pSrc = &clContext;
retSize = srcSize = sizeof(clContext);
break;
case CL_PROGRAM_BINARIES:
resolveProgramBinary();
pSrc = elfBinary;
retSize = sizeof(void **);
srcSize = elfBinarySize;
if (paramValue != nullptr) {
if (paramValueSize < retSize) {
retVal = CL_INVALID_VALUE;
break;
}
paramValueSize = srcSize;
paramValue = *(void **)paramValue;
}
break;
case CL_PROGRAM_BINARY_SIZES:
resolveProgramBinary();
pSrc = &elfBinarySize;
retSize = srcSize = sizeof(size_t *);
break;
case CL_PROGRAM_KERNEL_NAMES:
kernelNamesString = getKernelNamesString();
pSrc = kernelNamesString.c_str();
retSize = srcSize = kernelNamesString.length() + 1;
if (buildStatus != CL_BUILD_SUCCESS) {
retVal = CL_INVALID_PROGRAM_EXECUTABLE;
}
break;
case CL_PROGRAM_NUM_KERNELS:
numKernels = kernelInfoArray.size();
pSrc = &numKernels;
retSize = srcSize = sizeof(numKernels);
if (buildStatus != CL_BUILD_SUCCESS) {
retVal = CL_INVALID_PROGRAM_EXECUTABLE;
}
break;
case CL_PROGRAM_NUM_DEVICES:
pSrc = &numDevices;
retSize = srcSize = sizeof(cl_uint);
break;
case CL_PROGRAM_DEVICES:
pSrc = &device_id;
retSize = srcSize = sizeof(cl_device_id);
break;
case CL_PROGRAM_REFERENCE_COUNT:
refCount = static_cast<cl_uint>(this->getReference());
retSize = srcSize = sizeof(refCount);
pSrc = &refCount;
break;
case CL_PROGRAM_SOURCE:
pSrc = sourceCode.c_str();
retSize = srcSize = strlen(sourceCode.c_str()) + 1;
break;
case CL_PROGRAM_IL:
pSrc = sourceCode.data();
retSize = srcSize = sourceCode.size();
if (!Program::isValidSpirvBinary(pSrc, srcSize)) {
if (paramValueSizeRet) {
*paramValueSizeRet = 0;
}
return CL_SUCCESS;
}
break;
case CL_PROGRAM_DEBUG_INFO_SIZES_INTEL:
resolveProgramBinary();
retSize = srcSize = sizeof(debugDataSize);
pSrc = &debugDataSize;
break;
case CL_PROGRAM_DEBUG_INFO_INTEL:
resolveProgramBinary();
pSrc = debugData;
retSize = numDevices * sizeof(void **);
srcSize = debugDataSize;
if (paramValue != nullptr) {
if (paramValueSize < retSize) {
retVal = CL_INVALID_VALUE;
break;
}
paramValueSize = srcSize;
paramValue = *(void **)paramValue;
}
break;
default:
retVal = CL_INVALID_VALUE;
break;
}
retVal = (retVal == CL_SUCCESS)
? ::getInfo(paramValue, paramValueSize, pSrc, srcSize)
: retVal;
if (paramValueSizeRet) {
*paramValueSizeRet = retSize;
}
return retVal;
}
cl_int Program::getBuildInfo(cl_device_id device, cl_program_build_info paramName,
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const {
cl_int retVal = CL_SUCCESS;
const void *pSrc = nullptr;
size_t srcSize = 0;
size_t retSize = 0;
cl_device_id device_id = pDevice;
if (device != device_id) {
return CL_INVALID_DEVICE;
}
retVal = validateObjects(device);
if (retVal != CL_SUCCESS) {
return CL_INVALID_DEVICE;
}
auto pDev = castToObject<Device>(device);
switch (paramName) {
case CL_PROGRAM_BUILD_STATUS:
srcSize = retSize = sizeof(cl_build_status);
pSrc = &buildStatus;
break;
case CL_PROGRAM_BUILD_OPTIONS:
srcSize = retSize = strlen(options.c_str()) + 1;
pSrc = options.c_str();
break;
case CL_PROGRAM_BUILD_LOG: {
const char *pBuildLog = getBuildLog(pDev);
if (pBuildLog != nullptr) {
pSrc = pBuildLog;
srcSize = retSize = strlen(pBuildLog) + 1;
} else {
pSrc = "";
srcSize = retSize = 1;
}
} break;
case CL_PROGRAM_BINARY_TYPE:
srcSize = retSize = sizeof(cl_program_binary_type);
pSrc = &programBinaryType;
break;
case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE:
pSrc = &globalVarTotalSize;
retSize = srcSize = sizeof(size_t);
break;
default:
retVal = CL_INVALID_VALUE;
break;
}
retVal = (retVal == CL_SUCCESS)
? ::getInfo(paramValue, paramValueSize, pSrc, srcSize)
: retVal;
if (paramValueSizeRet) {
*paramValueSizeRet = retSize;
}
return retVal;
}
} // namespace OCLRT

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <cstdint>
#include "patch_info.h"
namespace OCLRT {
struct HeapInfo {
const SKernelBinaryHeaderCommon *pKernelHeader;
const void *pKernelHeap;
const void *pGsh;
const void *pDsh;
void *pSsh;
const void *pPatchList;
const void *pBlob;
size_t blobSize;
HeapInfo() {
pKernelHeader = nullptr;
pKernelHeap = nullptr;
pGsh = nullptr;
pDsh = nullptr;
pSsh = nullptr;
pPatchList = nullptr;
pBlob = nullptr;
blobSize = 0;
}
};
} // namespace OCLRT

View File

@ -0,0 +1,78 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "config.h"
#include "CL/cl.h"
#include <cstdint>
#include <string>
struct KernelArgPatchInfo {
uint32_t crossthreadOffset = 0;
uint32_t size = 0;
uint32_t sourceOffset = 0;
};
struct KernelArgInfo {
static constexpr uint32_t undefinedOffset = (uint32_t)-1;
std::string name;
std::string typeStr;
std::string accessQualifierStr;
std::string addressQualifierStr;
std::string typeQualifierStr;
uint32_t offsetHeap = 0;
std::vector<KernelArgPatchInfo> kernelArgPatchInfoVector;
uint32_t slmAlignment = 0;
bool isImage = false;
bool isMediaImage = false;
bool isMediaBlockImage = false;
bool isSampler = false;
bool isAccelerator = false;
bool isDeviceQueue = false;
bool isBuffer = false;
uint32_t samplerArgumentType = 0;
uint32_t offsetImgWidth = undefinedOffset;
uint32_t offsetImgHeight = undefinedOffset;
uint32_t offsetImgDepth = undefinedOffset;
uint32_t offsetChannelDataType = undefinedOffset;
uint32_t offsetChannelOrder = undefinedOffset;
uint32_t offsetArraySize = undefinedOffset;
uint32_t offsetNumSamples = undefinedOffset;
uint32_t offsetSamplerSnapWa = undefinedOffset;
uint32_t offsetSamplerAddressingMode = undefinedOffset;
uint32_t offsetSamplerNormalizedCoords = undefinedOffset;
uint32_t offsetVmeMbBlockType = undefinedOffset;
uint32_t offsetVmeSubpixelMode = undefinedOffset;
uint32_t offsetVmeSadAdjustMode = undefinedOffset;
uint32_t offsetVmeSearchPathType = undefinedOffset;
uint32_t offsetObjectId = undefinedOffset;
uint32_t offsetBufferOffset = undefinedOffset;
bool needPatch = false;
cl_kernel_arg_access_qualifier accessQualifier = CL_KERNEL_ARG_ACCESS_NONE;
cl_kernel_arg_address_qualifier addressQualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL;
cl_kernel_arg_type_qualifier typeQualifier = CL_KERNEL_ARG_TYPE_NONE;
KernelArgInfo() = default;
};

View File

@ -0,0 +1,499 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "hw_cmds.h"
#include "runtime/device/device.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/mem_obj/buffer.h"
#include "runtime/mem_obj/image.h"
#include "runtime/kernel/kernel.h"
#include "runtime/sampler/sampler.h"
#include "runtime/helpers/string.h"
#include <cstdint>
#include <cstring>
#include <map>
#include <unordered_map>
#include <sstream>
namespace OCLRT {
const uint32_t WorkloadInfo::undefinedOffset = (uint32_t)-1;
const uint32_t WorkloadInfo::invalidParentEvent = (uint32_t)-1;
std::unordered_map<std::string, uint32_t> accessQualifierMap = {
{"", CL_KERNEL_ARG_ACCESS_NONE},
{"NONE", CL_KERNEL_ARG_ACCESS_NONE},
{"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
{"__read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
{"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
{"__write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
{"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE},
{"__read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE},
};
std::unordered_map<std::string, uint32_t> addressQualifierMap = {
{"", CL_KERNEL_ARG_ADDRESS_GLOBAL},
{"__global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
{"__local", CL_KERNEL_ARG_ADDRESS_LOCAL},
{"__private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
{"__constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
{"not_specified", CL_KERNEL_ARG_ADDRESS_PRIVATE},
};
struct KernelArgumentType {
const char *argTypeQualifier;
uint64_t argTypeQualifierValue;
};
constexpr KernelArgumentType typeQualifiers[] = {
{"const", CL_KERNEL_ARG_TYPE_CONST},
{"volatile", CL_KERNEL_ARG_TYPE_VOLATILE},
{"restrict", CL_KERNEL_ARG_TYPE_RESTRICT},
{"pipe", CL_KERNEL_ARG_TYPE_PIPE},
};
std::map<std::string, size_t> typeSizeMap = {
{"char", sizeof(cl_char)},
{"char2", sizeof(cl_char2)},
{"char3", sizeof(cl_char3)},
{"char4", sizeof(cl_char4)},
{"char8", sizeof(cl_char8)},
{"char16", sizeof(cl_char16)},
{"uchar", sizeof(cl_uchar)},
{"uchar2", sizeof(cl_uchar2)},
{"uchar3", sizeof(cl_uchar3)},
{"uchar4", sizeof(cl_uchar4)},
{"uchar8", sizeof(cl_uchar8)},
{"uchar16", sizeof(cl_uchar16)},
{"short", sizeof(cl_short)},
{"short2", sizeof(cl_short2)},
{"short3", sizeof(cl_short3)},
{"short4", sizeof(cl_short4)},
{"short8", sizeof(cl_short8)},
{"short16", sizeof(cl_short16)},
{"ushort", sizeof(cl_ushort)},
{"ushort2", sizeof(cl_ushort2)},
{"ushort3", sizeof(cl_ushort3)},
{"ushort4", sizeof(cl_ushort4)},
{"ushort8", sizeof(cl_ushort8)},
{"ushort16", sizeof(cl_ushort16)},
{"int", sizeof(cl_int)},
{"int2", sizeof(cl_int2)},
{"int3", sizeof(cl_int3)},
{"int4", sizeof(cl_int4)},
{"int8", sizeof(cl_int8)},
{"int16", sizeof(cl_int16)},
{"uint", sizeof(cl_uint)},
{"uint2", sizeof(cl_uint2)},
{"uint3", sizeof(cl_uint3)},
{"uint4", sizeof(cl_uint4)},
{"uint8", sizeof(cl_uint8)},
{"uint16", sizeof(cl_uint16)},
{"long", sizeof(cl_long)},
{"long2", sizeof(cl_long2)},
{"long3", sizeof(cl_long3)},
{"long4", sizeof(cl_long4)},
{"long8", sizeof(cl_long8)},
{"long16", sizeof(cl_long16)},
{"ulong", sizeof(cl_ulong)},
{"ulong2", sizeof(cl_ulong2)},
{"ulong3", sizeof(cl_ulong3)},
{"ulong4", sizeof(cl_ulong4)},
{"ulong8", sizeof(cl_ulong8)},
{"ulong16", sizeof(cl_ulong16)},
{"half", sizeof(cl_half)},
{"float", sizeof(cl_float)},
{"float2", sizeof(cl_float2)},
{"float3", sizeof(cl_float3)},
{"float4", sizeof(cl_float4)},
{"float8", sizeof(cl_float8)},
{"float16", sizeof(cl_float16)},
#ifdef cl_khr_fp16
{"half2", sizeof(cl_half2)},
{"half3", sizeof(cl_half3)},
{"half4", sizeof(cl_half4)},
{"half8", sizeof(cl_half8)},
{"half16", sizeof(cl_half16)},
#endif
{"double", sizeof(cl_double)},
{"double2", sizeof(cl_double2)},
{"double3", sizeof(cl_double3)},
{"double4", sizeof(cl_double4)},
{"double8", sizeof(cl_double8)},
{"double16", sizeof(cl_double16)},
};
WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, uint32_t hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, GFXCORE_FAMILY coreFamily, uint32_t numThreadsPerSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface) {
this->maxWorkGroupSize = maxWorkGroupSize;
this->hasBarriers = hasBarriers;
this->simdSize = simdSize;
this->slmTotalSize = slmTotalSize;
this->coreFamily = coreFamily;
this->numThreadsPerSlice = numThreadsPerSlice;
this->localMemSize = localMemSize;
this->imgUsed = imgUsed;
this->yTiledSurfaces = yTiledSurface;
setMinWorkGroupSize();
}
WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) {
this->maxWorkGroupSize = (uint32_t)dispatchInfo.getKernel()->getDevice().getDeviceInfo().maxWorkGroupSize;
this->hasBarriers = (uint32_t)dispatchInfo.getKernel()->getKernelInfo().patchInfo.executionEnvironment->HasBarriers;
this->simdSize = (uint32_t)dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize();
this->slmTotalSize = (uint32_t)dispatchInfo.getKernel()->slmTotalSize;
this->coreFamily = dispatchInfo.getKernel()->getDevice().getHardwareInfo().pPlatform->eRenderCoreFamily;
this->numThreadsPerSlice = (uint32_t)dispatchInfo.getKernel()->getDevice().getDeviceInfo().maxNumEUsPerSubSlice;
this->localMemSize = (uint32_t)dispatchInfo.getKernel()->getDevice().getDeviceInfo().localMemSize;
setIfUseImg(dispatchInfo.getKernel());
setMinWorkGroupSize();
}
void WorkSizeInfo::setIfUseImg(Kernel *pKernel) {
auto ParamsCount = pKernel->getKernelArgsNumber();
for (auto i = 0u; i < ParamsCount; i++) {
if (pKernel->getKernelInfo().kernelArgInfo[i].isImage) {
imgUsed = true;
yTiledSurfaces = true;
}
}
}
void WorkSizeInfo::setMinWorkGroupSize() {
minWorkGroupSize = 0;
if (hasBarriers > 0) {
uint32_t maxBarriersPerHSlice = (coreFamily >= IGFX_GEN9_CORE) ? 32 : 16;
minWorkGroupSize = numThreadsPerSlice * simdSize / maxBarriersPerHSlice;
}
if (slmTotalSize > 0) {
minWorkGroupSize = std::max(maxWorkGroupSize / ((localMemSize / slmTotalSize)), minWorkGroupSize);
}
}
void WorkSizeInfo::checkRatio(const size_t workItems[3]) {
if (slmTotalSize > 0) {
useRatio = true;
targetRatio = log((float)workItems[0]) - log((float)workItems[1]);
useStrictRatio = false;
} else if (yTiledSurfaces == true) {
useRatio = true;
targetRatio = YTilingRatioValue;
useStrictRatio = true;
}
}
KernelInfo *KernelInfo::create() {
return new KernelInfo();
}
KernelInfo::~KernelInfo() {
kernelArgInfo.clear();
for (auto &stringData : patchInfo.stringDataMap) {
delete[] stringData.second.pStringData;
}
patchInfo.stringDataMap.clear();
delete[] crossThreadData;
}
cl_int KernelInfo::storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo) {
cl_int retVal = CL_SUCCESS;
if (pkernelArgInfo == nullptr) {
retVal = CL_INVALID_BINARY;
} else {
uint32_t argNum = pkernelArgInfo->ArgumentNumber;
auto pCurArgAttrib = ptrOffset(
reinterpret_cast<const char *>(pkernelArgInfo),
sizeof(SPatchKernelArgumentInfo));
resizeKernelArgInfoAndRegisterParameter(argNum);
kernelArgInfo[argNum].addressQualifierStr = pCurArgAttrib;
pCurArgAttrib += pkernelArgInfo->AddressQualifierSize;
kernelArgInfo[argNum].accessQualifierStr = pCurArgAttrib;
pCurArgAttrib += pkernelArgInfo->AccessQualifierSize;
kernelArgInfo[argNum].name = pCurArgAttrib;
pCurArgAttrib += pkernelArgInfo->ArgumentNameSize;
{
auto argType = strchr(pCurArgAttrib, ';');
DEBUG_BREAK_IF(argType == nullptr);
kernelArgInfo[argNum].typeStr.assign(pCurArgAttrib, argType - pCurArgAttrib);
pCurArgAttrib += pkernelArgInfo->TypeNameSize;
++argType;
}
kernelArgInfo[argNum].typeQualifierStr = pCurArgAttrib;
patchInfo.kernelArgumentInfo.push_back(pkernelArgInfo);
}
return retVal;
}
void KernelInfo::storeKernelArgument(
const SPatchDataParameterBuffer *pDataParameterKernelArg) {
uint32_t argNum = pDataParameterKernelArg->ArgumentNumber;
uint32_t dataSize = pDataParameterKernelArg->DataSize;
uint32_t offset = pDataParameterKernelArg->Offset;
uint32_t sourceOffset = pDataParameterKernelArg->SourceOffset;
storeKernelArgPatchInfo(argNum, dataSize, offset, sourceOffset, 0);
}
void KernelInfo::storeKernelArgument(
const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg) {
uint32_t argNum = pStatelessGlobalKernelArg->ArgumentNumber;
uint32_t offsetSSH = pStatelessGlobalKernelArg->SurfaceStateHeapOffset;
usesSsh |= true;
storeKernelArgPatchInfo(argNum, pStatelessGlobalKernelArg->DataParamSize, pStatelessGlobalKernelArg->DataParamOffset, 0, offsetSSH);
kernelArgInfo[argNum].isBuffer = true;
patchInfo.statelessGlobalMemObjKernelArgs.push_back(pStatelessGlobalKernelArg);
}
void KernelInfo::storeKernelArgument(
const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg) {
uint32_t argNum = pImageMemObjKernelArg->ArgumentNumber;
uint32_t offsetSurfaceState = pImageMemObjKernelArg->Offset;
usesSsh |= true;
storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState);
kernelArgInfo[argNum].isImage = true;
if (pImageMemObjKernelArg->Type == iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA) {
kernelArgInfo[argNum].isMediaImage = true;
}
if (pImageMemObjKernelArg->Type == iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA_BLOCK) {
kernelArgInfo[argNum].isMediaBlockImage = true;
}
kernelArgInfo[argNum].accessQualifier = pImageMemObjKernelArg->Writeable
? CL_KERNEL_ARG_ACCESS_READ_WRITE
: CL_KERNEL_ARG_ACCESS_READ_ONLY;
patchInfo.imageMemObjKernelArgs.push_back(pImageMemObjKernelArg);
}
void KernelInfo::storeKernelArgument(
const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjKernelArg) {
uint32_t argNum = pGlobalMemObjKernelArg->ArgumentNumber;
uint32_t offsetSurfaceState = pGlobalMemObjKernelArg->Offset;
usesSsh |= true;
storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState);
kernelArgInfo[argNum].isBuffer = true;
patchInfo.globalMemObjKernelArgs.push_back(pGlobalMemObjKernelArg);
}
void KernelInfo::storeKernelArgument(
const SPatchSamplerKernelArgument *pSamplerArgument) {
uint32_t argNum = pSamplerArgument->ArgumentNumber;
uint32_t offsetSurfaceState = pSamplerArgument->Offset;
storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState);
kernelArgInfo[argNum].samplerArgumentType = pSamplerArgument->Type;
if (this->name == "ve_enhance_intel" ||
this->name == "ve_dn_enhance_intel" ||
this->name == "ve_dn_di_enhance_intel") {
kernelArgInfo[argNum].isAccelerator = true;
kernelArgInfo[argNum].samplerArgumentType = iOpenCL::SAMPLER_OBJECT_VE;
} else if (pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_TEXTURE) {
DEBUG_BREAK_IF(pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_VME &&
pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_VE &&
pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_VD);
kernelArgInfo[argNum].isAccelerator = true;
isVmeWorkload = true;
} else {
kernelArgInfo[argNum].isSampler = true;
}
}
void KernelInfo::storeKernelArgument(
const SPatchStatelessConstantMemoryObjectKernelArgument *pStatelessConstMemObjKernelArg) {
uint32_t argNum = pStatelessConstMemObjKernelArg->ArgumentNumber;
uint32_t offsetSSH = pStatelessConstMemObjKernelArg->SurfaceStateHeapOffset;
usesSsh |= true;
storeKernelArgPatchInfo(argNum, pStatelessConstMemObjKernelArg->DataParamSize, pStatelessConstMemObjKernelArg->DataParamOffset, 0, offsetSSH);
kernelArgInfo[argNum].isBuffer = true;
patchInfo.statelessGlobalMemObjKernelArgs.push_back(reinterpret_cast<const SPatchStatelessGlobalMemoryObjectKernelArgument *>(pStatelessConstMemObjKernelArg));
}
void KernelInfo::storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg) {
uint32_t argNum = pStatelessDeviceQueueKernelArg->ArgumentNumber;
resizeKernelArgInfoAndRegisterParameter(argNum);
kernelArgInfo[argNum].isDeviceQueue = true;
storeKernelArgPatchInfo(argNum, pStatelessDeviceQueueKernelArg->DataParamSize, pStatelessDeviceQueueKernelArg->DataParamOffset, 0, pStatelessDeviceQueueKernelArg->SurfaceStateHeapOffset);
}
void KernelInfo::storePatchToken(
const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg) {
usesSsh |= true;
patchInfo.pAllocateStatelessPrivateSurface = pStatelessPrivateSurfaceArg;
}
void KernelInfo::storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg) {
usesSsh |= true;
patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = pStatelessConstantMemorySurfaceWithInitializationArg;
}
void KernelInfo::storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg) {
usesSsh |= true;
patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = pStatelessGlobalMemorySurfaceWithInitializationArg;
}
void KernelInfo::storePatchToken(const SPatchAllocateStatelessPrintfSurface *pStatelessPrintfSurfaceArg) {
usesSsh |= true;
patchInfo.pAllocateStatelessPrintfSurface = pStatelessPrintfSurfaceArg;
}
void KernelInfo::storePatchToken(const SPatchAllocateStatelessEventPoolSurface *pStatelessEventPoolSurfaceArg) {
usesSsh |= true;
patchInfo.pAllocateStatelessEventPoolSurface = pStatelessEventPoolSurfaceArg;
}
void KernelInfo::storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg) {
usesSsh |= true;
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = pStatelessDefaultDeviceQueueSurfaceArg;
}
void KernelInfo::storePatchToken(const SPatchString *pStringArg) {
uint32_t stringIndex = pStringArg->Index;
PrintfStringInfo printfStringInfo;
printfStringInfo.SizeInBytes = pStringArg->StringSize;
if (printfStringInfo.SizeInBytes) {
printfStringInfo.pStringData = new char[printfStringInfo.SizeInBytes];
if (printfStringInfo.pStringData != nullptr) {
memcpy_s(printfStringInfo.pStringData, printfStringInfo.SizeInBytes, (cl_char *)pStringArg + sizeof(SPatchString), printfStringInfo.SizeInBytes);
patchInfo.stringDataMap.insert(std::pair<uint32_t, PrintfStringInfo>(stringIndex, printfStringInfo));
}
}
}
void KernelInfo::storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo) {
attributes = reinterpret_cast<const char *>(pKernelAttributesInfo) + sizeof(SPatchKernelAttributesInfo);
auto start = attributes.find("intel_reqd_sub_group_size(");
if (start != std::string::npos) {
start += strlen("intel_reqd_sub_group_size(");
auto stop = attributes.find(")", start);
std::stringstream requiredSubGroupSizeStr(attributes.substr(start, stop - start));
requiredSubGroupSizeStr >> requiredSubGroupSize;
}
}
const char *KernelInfo::queryPrintfString(uint32_t index) const {
auto printfInfo = patchInfo.stringDataMap.find(index);
return printfInfo == patchInfo.stringDataMap.end() ? nullptr : printfInfo->second.pStringData;
}
cl_int KernelInfo::resolveKernelInfo() {
cl_int retVal = CL_SUCCESS;
std::unordered_map<std::string, uint32_t>::iterator iterUint;
std::unordered_map<std::string, size_t>::iterator iterSizeT;
for (auto &argInfo : kernelArgInfo) {
iterUint = accessQualifierMap.find(argInfo.accessQualifierStr);
if (iterUint != accessQualifierMap.end()) {
argInfo.accessQualifier = iterUint->second;
} else {
retVal = CL_INVALID_BINARY;
break;
}
iterUint = addressQualifierMap.find(argInfo.addressQualifierStr);
if (iterUint != addressQualifierMap.end()) {
argInfo.addressQualifier = iterUint->second;
} else {
retVal = CL_INVALID_BINARY;
break;
}
auto qualifierCount = sizeof(typeQualifiers) / sizeof(typeQualifiers[0]);
for (auto qualifierId = 0u; qualifierId < qualifierCount; qualifierId++) {
if (strstr(argInfo.typeQualifierStr.c_str(), typeQualifiers[qualifierId].argTypeQualifier) != nullptr) {
argInfo.typeQualifier |= typeQualifiers[qualifierId].argTypeQualifierValue;
}
}
}
return retVal;
}
void KernelInfo::storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t dataOffset, uint32_t sourceOffset, uint32_t offsetSSH) {
resizeKernelArgInfoAndRegisterParameter(argNum);
KernelArgPatchInfo kernelArgPatchInfo;
kernelArgPatchInfo.crossthreadOffset = dataOffset;
kernelArgPatchInfo.size = dataSize;
kernelArgPatchInfo.sourceOffset = sourceOffset;
kernelArgInfo[argNum].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo);
kernelArgInfo[argNum].offsetHeap = offsetSSH;
}
size_t KernelInfo::getSamplerStateArrayCount() const {
size_t count = patchInfo.samplerStateArray ? (size_t)patchInfo.samplerStateArray->Count : 0;
return count;
}
size_t KernelInfo::getSamplerStateArraySize(const HardwareInfo &hwInfo) const {
size_t samplerStateArraySize = getSamplerStateArrayCount() * Sampler::getSamplerStateSize(hwInfo);
return samplerStateArraySize;
}
size_t KernelInfo::getBorderColorStateSize() const {
size_t borderColorSize = 0;
if (patchInfo.samplerStateArray) {
borderColorSize = patchInfo.samplerStateArray->Offset - patchInfo.samplerStateArray->BorderColorOffset;
}
return borderColorSize;
}
size_t KernelInfo::getBorderColorOffset() const {
size_t borderColorOffset = 0;
if (patchInfo.samplerStateArray) {
borderColorOffset = patchInfo.samplerStateArray->BorderColorOffset;
}
return borderColorOffset;
}
uint32_t KernelInfo::getConstantBufferSize() const {
return patchInfo.dataParameterStream ? patchInfo.dataParameterStream->DataParameterStreamSize : 0;
}
} // namespace OCLRT

View File

@ -0,0 +1,239 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "config.h"
#include "CL/cl.h"
#include "heap_info.h"
#include "kernel_arg_info.h"
#include "patch_info.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/helpers/dispatch_info.h"
#include <algorithm>
#include <cstdint>
#include <cmath>
#include <vector>
#include <string>
#include <unordered_map>
#include <map>
namespace OCLRT {
class BuiltinDispatchInfoBuilder;
class Device;
class Kernel;
struct KernelInfo;
struct KernelArgumentType;
extern std::unordered_map<std::string, uint32_t> accessQualifierMap;
extern std::unordered_map<std::string, uint32_t> addressQualifierMap;
extern std::map<std::string, size_t> typeSizeMap;
struct WorkloadInfo {
uint32_t globalWorkOffsetOffsets[3];
uint32_t globalWorkSizeOffsets[3];
uint32_t localWorkSizeOffsets[3];
uint32_t localWorkSizeOffsets2[3];
uint32_t enqueuedLocalWorkSizeOffsets[3];
uint32_t numWorkGroupsOffset[3];
uint32_t maxWorkGroupSizeOffset;
uint32_t workDimOffset;
uint32_t slmStaticSize = 0;
uint32_t simdSizeOffset;
uint32_t parentEventOffset;
uint32_t prefferedWkgMultipleOffset;
static const uint32_t undefinedOffset;
static const uint32_t invalidParentEvent;
WorkloadInfo() {
globalWorkOffsetOffsets[0] = undefinedOffset;
globalWorkOffsetOffsets[1] = undefinedOffset;
globalWorkOffsetOffsets[2] = undefinedOffset;
globalWorkSizeOffsets[0] = undefinedOffset;
globalWorkSizeOffsets[1] = undefinedOffset;
globalWorkSizeOffsets[2] = undefinedOffset;
localWorkSizeOffsets[0] = undefinedOffset;
localWorkSizeOffsets[1] = undefinedOffset;
localWorkSizeOffsets[2] = undefinedOffset;
localWorkSizeOffsets2[0] = undefinedOffset;
localWorkSizeOffsets2[1] = undefinedOffset;
localWorkSizeOffsets2[2] = undefinedOffset;
enqueuedLocalWorkSizeOffsets[0] = undefinedOffset;
enqueuedLocalWorkSizeOffsets[1] = undefinedOffset;
enqueuedLocalWorkSizeOffsets[2] = undefinedOffset;
numWorkGroupsOffset[0] = undefinedOffset;
numWorkGroupsOffset[1] = undefinedOffset;
numWorkGroupsOffset[2] = undefinedOffset;
maxWorkGroupSizeOffset = undefinedOffset;
workDimOffset = undefinedOffset;
simdSizeOffset = undefinedOffset;
parentEventOffset = undefinedOffset;
prefferedWkgMultipleOffset = undefinedOffset;
}
};
static const float YTilingRatioValue = 1.3862943611198906188344642429164f;
struct WorkSizeInfo {
uint32_t maxWorkGroupSize;
uint32_t minWorkGroupSize;
uint32_t hasBarriers;
uint32_t simdSize;
uint32_t slmTotalSize;
GFXCORE_FAMILY coreFamily;
uint32_t numThreadsPerSlice;
uint32_t localMemSize;
bool imgUsed = false;
bool yTiledSurfaces = false;
bool useRatio = false;
bool useStrictRatio = false;
float targetRatio = 0;
WorkSizeInfo(uint32_t maxWorkGroupSize, uint32_t hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, GFXCORE_FAMILY coreFamily, uint32_t numThreadsPerSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface);
WorkSizeInfo(const DispatchInfo &dispatchInfo);
void setIfUseImg(Kernel *pKernel);
void setMinWorkGroupSize();
void checkRatio(const size_t workItems[3]);
};
struct KernelInfo {
public:
static KernelInfo *create();
KernelInfo() {
heapInfo = {};
patchInfo = {};
workloadInfo = {};
kernelArgInfo = {};
kernelNonArgInfo = {};
childrenKernelsIdOffset = {};
reqdWorkGroupSize[0] = WorkloadInfo::undefinedOffset;
reqdWorkGroupSize[1] = WorkloadInfo::undefinedOffset;
reqdWorkGroupSize[2] = WorkloadInfo::undefinedOffset;
}
KernelInfo(const KernelInfo &) = delete;
KernelInfo &operator=(const KernelInfo &) = delete;
~KernelInfo();
cl_int storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo);
void storeKernelArgument(const SPatchDataParameterBuffer *pDataParameterKernelArg);
void storeKernelArgument(const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg);
void storeKernelArgument(const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg);
void storeKernelArgument(const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjKernelArg);
void storeKernelArgument(const SPatchStatelessConstantMemoryObjectKernelArgument *pStatelessConstMemObjKernelArg);
void storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg);
void storeKernelArgument(const SPatchSamplerKernelArgument *pSamplerKernelArg);
void storePatchToken(const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg);
void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg);
void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg);
void storePatchToken(const SPatchAllocateStatelessPrintfSurface *pStatelessPrintfSurfaceArg);
void storePatchToken(const SPatchAllocateStatelessEventPoolSurface *pStatelessEventPoolSurfaceArg);
void storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg);
void storePatchToken(const SPatchString *pStringArg);
void storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo);
cl_int resolveKernelInfo();
void resizeKernelArgInfoAndRegisterParameter(uint32_t argCount) {
if (kernelArgInfo.size() <= argCount) {
kernelArgInfo.resize(argCount + 1);
}
if (!kernelArgInfo[argCount].needPatch) {
kernelArgInfo[argCount].needPatch = true;
argumentsToPatchNum++;
}
}
void storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t crossthreadOffset, uint32_t sourceOffset, uint32_t offsetSSH);
const char *queryPrintfString(uint32_t index) const;
size_t getSamplerStateArrayCount() const;
size_t getSamplerStateArraySize(const HardwareInfo &hwInfo) const;
size_t getBorderColorStateSize() const;
size_t getBorderColorOffset() const;
unsigned int getMaxSimdSize() const {
const auto executionEnvironment = patchInfo.executionEnvironment;
if (executionEnvironment == nullptr) {
return 1;
}
if (executionEnvironment->CompiledSIMD32) {
return 32;
}
if (executionEnvironment->CompiledSIMD16) {
return 16;
}
return 8;
}
bool hasDeviceEnqueue() const {
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->HasDeviceEnqueue : false;
}
bool requiresSubgroupIndependentForwardProgress() const {
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired : false;
}
size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
auto requiredWorkGroupSizeX = patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
auto requiredWorkGroupSizeY = patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
auto requiredWorkGroupSizeZ = patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
size_t maxRequiredWorkGroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) {
maxRequiredWorkGroupSize = maxWorkGroupSize;
}
return maxRequiredWorkGroupSize;
}
uint32_t getConstantBufferSize() const;
int32_t getArgNumByName(const char *name) const {
int32_t argNum = 0;
for (auto &arg : kernelArgInfo) {
if (arg.name == name) {
return argNum;
}
++argNum;
}
return -1;
}
std::string name;
std::string attributes;
HeapInfo heapInfo;
PatchInfo patchInfo;
std::vector<KernelArgInfo> kernelArgInfo;
std::vector<KernelArgInfo> kernelNonArgInfo;
WorkloadInfo workloadInfo;
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
bool usesSsh = false;
bool requiresSshForBuffers = false;
bool isValid = false;
bool isVmeWorkload = false;
char *crossThreadData = nullptr;
size_t reqdWorkGroupSize[3];
size_t requiredSubGroupSize = 0;
uint32_t gpuPointerSize = 0;
const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
uint32_t argumentsToPatchNum = 0;
uint32_t systemKernelOffset = 0;
};
} // namespace OCLRT

179
runtime/program/link.cpp Normal file
View File

@ -0,0 +1,179 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "config.h"
#include "runtime/compiler_interface/compiler_interface.h"
#include "runtime/platform/platform.h"
#include "runtime/helpers/validators.h"
#include "program.h"
#include "elf/writer.h"
#include <cstring>
namespace OCLRT {
cl_int Program::link(
cl_uint numDevices,
const cl_device_id *deviceList,
const char *buildOptions,
cl_uint numInputPrograms,
const cl_program *inputPrograms,
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
void *userData) {
cl_int retVal = CL_SUCCESS;
cl_program program;
CLElfLib::CElfWriter *pElfWriter = nullptr;
Program *pInputProgObj;
size_t dataSize;
char *pData = nullptr;
bool isCreateLibrary;
CLElfLib::SSectionNode sectionNode;
do {
if (((deviceList == nullptr) && (numDevices != 0)) ||
((deviceList != nullptr) && (numDevices == 0))) {
retVal = CL_INVALID_VALUE;
break;
}
if ((numInputPrograms == 0) || (inputPrograms == nullptr)) {
retVal = CL_INVALID_VALUE;
break;
}
if ((funcNotify == nullptr) &&
(userData != nullptr)) {
retVal = CL_INVALID_VALUE;
break;
}
if ((deviceList != nullptr) && validateObject(*deviceList) != CL_SUCCESS) {
retVal = CL_INVALID_DEVICE;
break;
}
if (buildStatus == CL_BUILD_IN_PROGRESS) {
retVal = CL_INVALID_OPERATION;
break;
}
options = (buildOptions != nullptr) ? buildOptions : "";
isCreateLibrary = (strstr(options.c_str(), "-create-library") != nullptr);
buildStatus = CL_BUILD_IN_PROGRESS;
pElfWriter = CLElfLib::CElfWriter::create(CLElfLib::EH_TYPE_OPENCL_OBJECTS, CLElfLib::EH_MACHINE_NONE, 0);
StackVec<const Program *, 16> inputProgramsInternal;
for (cl_uint i = 0; i < numInputPrograms; i++) {
program = inputPrograms[i];
if (program == nullptr) {
retVal = CL_INVALID_PROGRAM;
break;
}
pInputProgObj = castToObject<Program>(program);
if (pInputProgObj == nullptr) {
retVal = CL_INVALID_PROGRAM;
break;
}
inputProgramsInternal.push_back(pInputProgObj);
if ((pInputProgObj->llvmBinary == nullptr) || (pInputProgObj->llvmBinarySize == 0)) {
retVal = CL_INVALID_PROGRAM;
break;
}
sectionNode.Name = "";
if (pInputProgObj->getIsSpirV()) {
sectionNode.Type = CLElfLib::SH_TYPE_SPIRV;
} else {
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_LLVM_BINARY;
}
sectionNode.Flags = 0;
sectionNode.pData = pInputProgObj->llvmBinary;
sectionNode.DataSize = static_cast<unsigned int>(pInputProgObj->llvmBinarySize);
pElfWriter->addSection(&sectionNode);
}
if (retVal != CL_SUCCESS) {
break;
}
pElfWriter->resolveBinary(nullptr, dataSize);
pData = new char[dataSize];
pElfWriter->resolveBinary(pData, dataSize);
CompilerInterface *pCompilerInterface = getCompilerInterface();
if (!pCompilerInterface) {
retVal = CL_OUT_OF_HOST_MEMORY;
break;
}
TranslationArgs inputArgs = {};
inputArgs.pInput = pData;
inputArgs.InputSize = (uint32_t)dataSize;
inputArgs.pOptions = options.c_str();
inputArgs.OptionsSize = (uint32_t)options.length();
inputArgs.pInternalOptions = internalOptions.c_str();
inputArgs.InternalOptionsSize = (uint32_t)internalOptions.length();
inputArgs.pTracingOptions = nullptr;
inputArgs.TracingOptionsCount = 0;
if (!isCreateLibrary) {
retVal = pCompilerInterface->link(*this, inputArgs);
if (retVal != CL_SUCCESS) {
break;
}
retVal = processGenBinary();
if (retVal != CL_SUCCESS) {
break;
}
programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
} else {
retVal = pCompilerInterface->createLibrary(*this, inputArgs);
if (retVal != CL_SUCCESS) {
break;
}
programBinaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY;
}
updateNonUniformFlag(&*inputProgramsInternal.begin(), inputProgramsInternal.size());
separateBlockKernels();
} while (false);
if (retVal != CL_SUCCESS) {
buildStatus = CL_BUILD_ERROR;
programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE;
} else {
buildStatus = CL_BUILD_SUCCESS;
}
CLElfLib::CElfWriter::destroy(pElfWriter);
delete[] pData;
internalOptions.clear();
if (funcNotify != nullptr) {
(*funcNotify)(this, userData);
}
return retVal;
}
} // namespace OCLRT

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "patch_list.h"
#include "patch_g7.h"
#include <vector>
#include <map>
namespace OCLRT {
using iOpenCL::SPatchMediaInterfaceDescriptorLoad;
using iOpenCL::SPatchAllocateLocalSurface;
using iOpenCL::SPatchMediaVFEState;
using iOpenCL::SPatchInterfaceDescriptorData;
using iOpenCL::SPatchSamplerStateArray;
using iOpenCL::SPatchBindingTableState;
using iOpenCL::SPatchDataParameterBuffer;
using iOpenCL::SPatchStatelessGlobalMemoryObjectKernelArgument;
using iOpenCL::SPatchGlobalMemoryObjectKernelArgument;
using iOpenCL::SPatchStatelessConstantMemoryObjectKernelArgument;
using iOpenCL::SPatchStatelessDeviceQueueKernelArgument;
using iOpenCL::SPatchImageMemoryObjectKernelArgument;
using iOpenCL::SPatchSamplerKernelArgument;
using iOpenCL::SPatchDataParameterStream;
using iOpenCL::SPatchThreadPayload;
using iOpenCL::SPatchExecutionEnvironment;
using iOpenCL::SPatchKernelAttributesInfo;
using iOpenCL::SPatchKernelArgumentInfo;
using iOpenCL::SKernelBinaryHeaderCommon;
using iOpenCL::SProgramBinaryHeader;
using iOpenCL::SPatchAllocateStatelessPrivateSurface;
using iOpenCL::SPatchAllocateStatelessConstantMemorySurfaceWithInitialization;
using iOpenCL::SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization;
using iOpenCL::SPatchAllocateStatelessPrintfSurface;
using iOpenCL::SPatchAllocateStatelessEventPoolSurface;
using iOpenCL::SPatchAllocateStatelessDefaultDeviceQueueSurface;
using iOpenCL::SPatchString;
using iOpenCL::SPatchGtpinFreeGRFInfo;
using iOpenCL::SPatchStateSIP;
typedef struct TagPrintfStringInfo {
size_t SizeInBytes;
char *pStringData;
} PrintfStringInfo, *PPrintfStringInfo;
struct PatchInfo {
const SPatchMediaInterfaceDescriptorLoad *interfaceDescriptorDataLoad = nullptr;
const SPatchAllocateLocalSurface *localsurface = nullptr;
const SPatchMediaVFEState *mediavfestate = nullptr;
const SPatchInterfaceDescriptorData *interfaceDescriptorData = nullptr;
const SPatchSamplerStateArray *samplerStateArray = nullptr;
const SPatchBindingTableState *bindingTableState = nullptr;
::std::vector<const SPatchDataParameterBuffer *> dataParameterBuffers;
::std::vector<const SPatchStatelessGlobalMemoryObjectKernelArgument *>
statelessGlobalMemObjKernelArgs;
::std::vector<const SPatchImageMemoryObjectKernelArgument *>
imageMemObjKernelArgs;
::std::vector<const SPatchGlobalMemoryObjectKernelArgument *>
globalMemObjKernelArgs;
const SPatchDataParameterStream *dataParameterStream = nullptr;
const SPatchThreadPayload *threadPayload = nullptr;
const SPatchExecutionEnvironment *executionEnvironment = nullptr;
const SPatchKernelAttributesInfo *pKernelAttributesInfo = nullptr;
const SPatchAllocateStatelessPrivateSurface *pAllocateStatelessPrivateSurface = nullptr;
const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr;
const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr;
const SPatchAllocateStatelessPrintfSurface *pAllocateStatelessPrintfSurface = nullptr;
const SPatchAllocateStatelessEventPoolSurface *pAllocateStatelessEventPoolSurface = nullptr;
const SPatchAllocateStatelessDefaultDeviceQueueSurface *pAllocateStatelessDefaultDeviceQueueSurface = nullptr;
::std::map<uint32_t, PrintfStringInfo> stringDataMap;
::std::vector<const SPatchKernelArgumentInfo *> kernelArgumentInfo;
PatchInfo() {
}
};
} // namespace OCLRT

View File

@ -0,0 +1,189 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "print_formatter.h"
#include "runtime/helpers/string.h"
#include "runtime/memory_manager/graphics_allocation.h"
#include <iostream>
namespace OCLRT {
PrintFormatter::PrintFormatter(Kernel &kernelArg, GraphicsAllocation &dataArg) : kernel(kernelArg),
data(dataArg),
buffer(nullptr),
bufferSize(0),
offset(0) {
}
void PrintFormatter::printKernelOutput(const std::function<void(char *)> &print) {
offset = 0;
buffer = reinterpret_cast<uint8_t *>(data.getUnderlyingBuffer());
// first 4 bytes of the buffer store it's own size
// before reading it size needs to be set to 4 because read() checks bounds and would fail if bufferSize was 0
bufferSize = 4;
read(&bufferSize);
uint32_t stringIndex = 0;
while (offset + 4 <= bufferSize) {
read(&stringIndex);
const char *formatString = kernel.getKernelInfo().queryPrintfString(stringIndex);
if (formatString != nullptr) {
printString(formatString, print);
}
}
}
void PrintFormatter::printString(const char *formatString, const std::function<void(char *)> &print) {
size_t length = strnlen_s(formatString, maxPrintfOutputLength);
char output[maxPrintfOutputLength];
size_t cursor = 0;
for (size_t i = 0; i <= length; i++) {
if (formatString[i] == '\\')
output[cursor++] = escapeChar(formatString[++i]);
else if (formatString[i] == '%') {
size_t end = i;
if (end + 1 <= length && formatString[end + 1] == '%') {
output[cursor++] = '%';
continue;
}
while (isConversionSpecifier(formatString[end++]) == false && end < length)
;
char dataFormat[maxPrintfOutputLength];
memcpy_s(dataFormat, maxPrintfOutputLength, formatString + i, end - i);
dataFormat[end - i] = '\0';
if (formatString[end - 1] == 's')
cursor += printStringToken(output + cursor, maxPrintfOutputLength - cursor, dataFormat);
else
cursor += printToken(output + cursor, maxPrintfOutputLength - cursor, dataFormat);
i = end - 1;
} else {
output[cursor++] = formatString[i];
}
}
print(output);
}
void PrintFormatter::stripVectorFormat(const char *format, char *stripped) {
while (*format != '\0') {
if (*format != 'v') {
*stripped = *format;
} else if (*(format + 1) != '1') {
format += 2;
continue;
} else {
format += 3;
continue;
}
stripped++;
format++;
}
*stripped = '\0';
}
void PrintFormatter::stripVectorTypeConversion(char *format) {
size_t len = strlen(format);
if (len > 3 && format[len - 3] == 'h' && format[len - 2] == 'l') {
format[len - 3] = format[len - 1];
format[len - 2] = '\0';
}
}
size_t PrintFormatter::printToken(char *output, size_t size, const char *formatString) {
PRINTF_DATA_TYPE type(PRINTF_DATA_TYPE::INVALID);
read(&type);
switch (type) {
case PRINTF_DATA_TYPE::BYTE:
return typedPrintToken<int8_t>(output, size, formatString);
case PRINTF_DATA_TYPE::SHORT:
return typedPrintToken<int16_t>(output, size, formatString);
case PRINTF_DATA_TYPE::INT:
return typedPrintToken<int>(output, size, formatString);
case PRINTF_DATA_TYPE::FLOAT:
return typedPrintToken<float>(output, size, formatString);
case PRINTF_DATA_TYPE::LONG:
return typedPrintToken<int64_t>(output, size, formatString);
case PRINTF_DATA_TYPE::POINTER:
return typedPrintToken<void *>(output, size, formatString);
case PRINTF_DATA_TYPE::DOUBLE:
return typedPrintToken<double>(output, size, formatString);
case PRINTF_DATA_TYPE::VECTOR_BYTE:
return typedPrintVectorToken<int8_t>(output, size, formatString);
case PRINTF_DATA_TYPE::VECTOR_SHORT:
return typedPrintVectorToken<int16_t>(output, size, formatString);
case PRINTF_DATA_TYPE::VECTOR_INT:
return typedPrintVectorToken<int>(output, size, formatString);
case PRINTF_DATA_TYPE::VECTOR_LONG:
return typedPrintVectorToken<int64_t>(output, size, formatString);
case PRINTF_DATA_TYPE::VECTOR_FLOAT:
return typedPrintVectorToken<float>(output, size, formatString);
case PRINTF_DATA_TYPE::VECTOR_DOUBLE:
return typedPrintVectorToken<double>(output, size, formatString);
default:
return 0;
}
}
char PrintFormatter::escapeChar(char escape) {
switch (escape) {
case 'n':
return '\n';
default:
return escape;
}
}
bool PrintFormatter::isConversionSpecifier(char c) {
switch (c) {
case 'd':
case 'i':
case 'o':
case 'u':
case 'x':
case 'X':
case 'a':
case 'A':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
case 's':
case 'c':
case 'p':
return true;
default:
return false;
}
}
} // namespace OCLRT

View File

@ -0,0 +1,141 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/helpers/aligned_memory.h"
#include "runtime/kernel/kernel.h"
#include "runtime/os_interface/print.h"
#include <algorithm>
#include <cctype>
#include <cstdint>
#include <functional>
extern int memcpy_s(void *dst, size_t destSize, const void *src, size_t count);
namespace OCLRT {
enum class PRINTF_DATA_TYPE : int {
INVALID,
BYTE,
SHORT,
INT,
FLOAT,
STRING,
LONG,
POINTER,
DOUBLE,
VECTOR_BYTE,
VECTOR_SHORT,
VECTOR_INT,
VECTOR_LONG,
VECTOR_FLOAT,
VECTOR_DOUBLE
};
class PrintFormatter {
public:
PrintFormatter(Kernel &kernelArg, GraphicsAllocation &dataArg);
void printKernelOutput(const std::function<void(char *)> &print = [](char *str) { printToSTDOUT(str); });
static const size_t maxPrintfOutputLength = 1024;
protected:
void printString(const char *formatString, const std::function<void(char *)> &print);
size_t printToken(char *output, size_t size, const char *formatString);
char escapeChar(char escape);
bool isConversionSpecifier(char c);
void stripVectorFormat(const char *format, char *stripped);
void stripVectorTypeConversion(char *format);
template <class T>
bool read(T *value) {
if (offset + sizeof(T) <= bufferSize) {
auto srcPtr = reinterpret_cast<T *>(buffer + offset);
if (isAligned(srcPtr)) {
*value = *srcPtr;
} else {
memcpy_s(value, bufferSize - offset, srcPtr, sizeof(T));
}
offset += sizeof(T);
return true;
} else {
return false;
}
}
template <class T>
size_t typedPrintToken(char *output, size_t size, const char *formatString) {
T value = {0};
read(&value);
return simple_sprintf(output, size, formatString, value);
}
template <class T>
size_t typedPrintVectorToken(char *output, size_t size, const char *formatString) {
T value = {0};
int valueCount = 0;
read(&valueCount);
size_t charactersPrinted = 0;
char strippedFormat[1024];
stripVectorFormat(formatString, strippedFormat);
stripVectorTypeConversion(strippedFormat);
for (int i = 0; i < valueCount; i++) {
read(&value);
charactersPrinted += simple_sprintf(output + charactersPrinted, size - charactersPrinted, strippedFormat, value);
if (i < valueCount - 1)
charactersPrinted += simple_sprintf(output + charactersPrinted, size - charactersPrinted, "%c", ',');
}
if (sizeof(T) < 4) {
offset += (4 - sizeof(T)) * valueCount;
}
return charactersPrinted;
}
size_t printStringToken(char *output, size_t size, const char *formatString) {
int index = 0;
int type = 0;
// additional read to discard the data type
read(&type);
read(&index);
if (type == static_cast<int>(PRINTF_DATA_TYPE::STRING))
return simple_sprintf(output, size, formatString, kernel.getKernelInfo().queryPrintfString(index));
else
return simple_sprintf(output, size, formatString, 0);
}
Kernel &kernel;
GraphicsAllocation &data;
uint8_t *buffer; // buffer extracted from the kernel, contains values to be printed
uint32_t bufferSize; // size of the data contained in the buffer
uint32_t offset; // current position in currently parsed buffer
};
};

View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "printf_handler.h"
#include "runtime/mem_obj/buffer.h"
#include "runtime/program/print_formatter.h"
#include "runtime/kernel/kernel.h"
#include "runtime/helpers/dispatch_info.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/memory_manager/memory_manager.h"
namespace OCLRT {
PrintfHandler::PrintfHandler(Device &deviceArg) : device(deviceArg) {}
PrintfHandler::~PrintfHandler() {
device.getMemoryManager()->freeGraphicsMemory(printfSurface);
}
PrintfHandler *PrintfHandler::create(const MultiDispatchInfo &multiDispatchInfo, Device &device) {
if (multiDispatchInfo.usesStatelessPrintfSurface() ||
(multiDispatchInfo.begin()->getKernel()->checkIfIsParentKernelAndBlocksUsesPrintf())) {
return new PrintfHandler(device);
}
return nullptr;
}
void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) {
auto printfSurfaceSize = device.getDeviceInfo().printfBufferSize;
if (printfSurfaceSize == 0) {
return;
}
kernel = multiDispatchInfo.begin()->getKernel();
printfSurface = device.getMemoryManager()->createGraphicsAllocationWithRequiredBitness(printfSurfaceSize, nullptr);
*reinterpret_cast<uint32_t *>(printfSurface->getUnderlyingBuffer()) = printfSurfaceInitialDataSize;
auto printfPatchAddress = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getCrossThreadData()),
kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset);
patchWithRequiredSize(printfPatchAddress, kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamSize, (uintptr_t)printfSurface->getGpuAddressToPatch());
if (kernel->requiresSshForBuffers()) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap()),
kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset);
void *addressToPatch = printfSurface->getUnderlyingBuffer();
size_t sizeToPatch = printfSurface->getUnderlyingBufferSize();
Buffer::setSurfaceState(&kernel->getContext(), surfaceState, sizeToPatch, addressToPatch, printfSurface);
}
}
void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
commandStreamReceiver.makeResident(*printfSurface);
}
void PrintfHandler::printEnqueueOutput() {
PrintFormatter printFormatter(*kernel, *printfSurface);
printFormatter.printKernelOutput();
}
} // namespace OCLRT

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/kernel/kernel.h"
#include "runtime/command_stream/command_stream_receiver.h"
namespace OCLRT {
struct MultiDispatchInfo;
class PrintfHandler {
public:
static PrintfHandler *create(const MultiDispatchInfo &multiDispatchInfo, Device &deviceArg);
~PrintfHandler();
void prepareDispatch(const MultiDispatchInfo &multiDispatchInfo);
void makeResident(CommandStreamReceiver &commandStreamReceiver);
void printEnqueueOutput();
GraphicsAllocation *getSurface() {
return printfSurface;
}
protected:
PrintfHandler(Device &device);
static const uint32_t printfSurfaceInitialDataSize = sizeof(uint32_t);
Device &device;
Kernel *kernel = nullptr;
GraphicsAllocation *printfSurface = nullptr;
};
} // namespace OCLRT

View File

@ -0,0 +1,266 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "elf/reader.h"
#include "elf/writer.h"
#include "program.h"
#include "runtime/helpers/string.h"
namespace OCLRT {
cl_int Program::processElfBinary(
const void *pBinary,
size_t binarySize,
uint32_t &binaryVersion) {
cl_int retVal = CL_SUCCESS;
CLElfLib::CElfReader *pElfReader = nullptr;
const CLElfLib::SElf64Header *pElfHeader = nullptr;
char *pSectionData = nullptr;
size_t sectionDataSize = 0;
binaryVersion = iOpenCL::CURRENT_ICBE_VERSION;
if (CLElfLib::CElfReader::isValidElf64(pBinary, binarySize) == false) {
retVal = CL_INVALID_BINARY;
}
if (retVal == CL_SUCCESS) {
delete[] elfBinary;
elfBinarySize = 0;
elfBinary = new char[binarySize];
elfBinarySize = binarySize;
memcpy_s(elfBinary, elfBinarySize, pBinary, binarySize);
}
if (retVal == CL_SUCCESS) {
pElfReader = CLElfLib::CElfReader::create(
(const char *)pBinary,
binarySize);
if (pElfReader == nullptr) {
retVal = CL_OUT_OF_HOST_MEMORY;
}
}
if (retVal == CL_SUCCESS) {
pElfHeader = pElfReader->getElfHeader();
switch (pElfHeader->Type) {
case CLElfLib::EH_TYPE_OPENCL_EXECUTABLE:
programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
break;
case CLElfLib::EH_TYPE_OPENCL_LIBRARY:
programBinaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY;
break;
case CLElfLib::EH_TYPE_OPENCL_OBJECTS:
programBinaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
break;
default:
retVal = CL_INVALID_BINARY;
}
}
if (retVal == CL_SUCCESS) {
// section 0 is always null
for (uint32_t i = 1; i < pElfHeader->NumSectionHeaderEntries; i++) {
const CLElfLib::SElf64SectionHeader *pSectionHeader = pElfReader->getSectionHeader(i);
pSectionData = nullptr;
sectionDataSize = 0;
switch (pSectionHeader->Type) {
case CLElfLib::SH_TYPE_SPIRV:
isSpirV = true;
// FALLTHROUGH
case CLElfLib::SH_TYPE_OPENCL_LLVM_BINARY:
pElfReader->getSectionData(i, pSectionData, sectionDataSize);
if (pSectionData && sectionDataSize) {
storeLlvmBinary(pSectionData, sectionDataSize);
}
break;
case CLElfLib::SH_TYPE_OPENCL_DEV_BINARY:
pElfReader->getSectionData(i, pSectionData, sectionDataSize);
if (pSectionData && sectionDataSize && validateGenBinaryHeader((SProgramBinaryHeader *)pSectionData)) {
storeGenBinary(pSectionData, sectionDataSize);
isCreatedFromBinary = true;
} else {
getProgramCompilerVersion((SProgramBinaryHeader *)pSectionData, binaryVersion);
retVal = CL_INVALID_BINARY;
}
break;
case CLElfLib::SH_TYPE_OPENCL_OPTIONS:
pElfReader->getSectionData(i, pSectionData, sectionDataSize);
if (pSectionData && sectionDataSize) {
options = pSectionData;
}
break;
case CLElfLib::SH_TYPE_STR_TBL:
// We can skip the string table
break;
default:
retVal = CL_INVALID_BINARY;
}
if (retVal != CL_SUCCESS) {
break;
}
}
}
if (retVal == CL_SUCCESS) {
isProgramBinaryResolved = true;
buildStatus = CL_BUILD_SUCCESS;
// Create an empty build log since program is effectively built
updateBuildLog(pDevice, "", 1);
}
CLElfLib::CElfReader::destroy(pElfReader);
return retVal;
}
cl_int Program::resolveProgramBinary() {
cl_int retVal = CL_SUCCESS;
CLElfLib::E_EH_TYPE headerType;
CLElfLib::CElfWriter *pElfWriter = nullptr;
if (isProgramBinaryResolved == false) {
delete[] elfBinary;
elfBinary = nullptr;
elfBinarySize = 0;
switch (programBinaryType) {
case CL_PROGRAM_BINARY_TYPE_EXECUTABLE:
headerType = CLElfLib::EH_TYPE_OPENCL_EXECUTABLE;
if (!genBinary || !genBinarySize) {
retVal = CL_INVALID_BINARY;
}
break;
case CL_PROGRAM_BINARY_TYPE_LIBRARY:
headerType = CLElfLib::EH_TYPE_OPENCL_LIBRARY;
if (!llvmBinary || !llvmBinarySize) {
retVal = CL_INVALID_BINARY;
}
break;
case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT:
headerType = CLElfLib::EH_TYPE_OPENCL_OBJECTS;
if (!llvmBinary || !llvmBinarySize) {
retVal = CL_INVALID_BINARY;
}
break;
default:
retVal = CL_INVALID_BINARY;
}
if (retVal == CL_SUCCESS) {
pElfWriter = CLElfLib::CElfWriter::create(headerType, CLElfLib::EH_MACHINE_NONE, 0);
if (pElfWriter) {
CLElfLib::SSectionNode sectionNode;
// Always add the options string
sectionNode.Name = "BuildOptions";
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_OPTIONS;
sectionNode.pData = (char *)options.c_str();
sectionNode.DataSize = (uint32_t)(strlen(options.c_str()) + 1);
auto elfRetVal = pElfWriter->addSection(&sectionNode);
if (elfRetVal) {
// Add the LLVM component if available
if (getIsSpirV()) {
sectionNode.Type = CLElfLib::SH_TYPE_SPIRV;
} else {
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_LLVM_BINARY;
}
if (headerType == CLElfLib::EH_TYPE_OPENCL_LIBRARY) {
sectionNode.Name = "Intel(R) OpenCL LLVM Archive";
sectionNode.pData = (char *)llvmBinary;
sectionNode.DataSize = (uint32_t)llvmBinarySize;
elfRetVal = pElfWriter->addSection(&sectionNode);
} else {
sectionNode.Name = "Intel(R) OpenCL LLVM Object";
sectionNode.pData = (char *)llvmBinary;
sectionNode.DataSize = (uint32_t)llvmBinarySize;
elfRetVal = pElfWriter->addSection(&sectionNode);
}
}
// Add the device binary if it exists
if (elfRetVal && genBinary) {
sectionNode.Name = "Intel(R) OpenCL Device Binary";
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_DEV_BINARY;
sectionNode.pData = (char *)genBinary;
sectionNode.DataSize = (uint32_t)genBinarySize;
elfRetVal = pElfWriter->addSection(&sectionNode);
}
// Add the device debug data if it exists
if (elfRetVal && (debugData != nullptr)) {
sectionNode.Name = "Intel(R) OpenCL Device Debug";
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_DEV_DEBUG;
sectionNode.pData = debugData;
sectionNode.DataSize = (uint32_t)debugDataSize;
elfRetVal = pElfWriter->addSection(&sectionNode);
}
if (elfRetVal) {
elfRetVal = pElfWriter->resolveBinary(elfBinary, elfBinarySize);
}
if (elfRetVal) {
elfBinary = new char[elfBinarySize];
elfRetVal = pElfWriter->resolveBinary(elfBinary, elfBinarySize);
}
if (elfRetVal) {
isProgramBinaryResolved = true;
} else {
retVal = CL_INVALID_BINARY;
}
} else {
retVal = CL_OUT_OF_HOST_MEMORY;
}
CLElfLib::CElfWriter::destroy(pElfWriter);
}
}
return retVal;
}
}

View File

@ -0,0 +1,976 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/debug_helpers.h"
#include "runtime/helpers/hash.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/string.h"
#include "runtime/memory_manager/memory_manager.h"
#include "patch_list.h"
#include "patch_shared.h"
#include "program.h"
#include "runtime/kernel/kernel.h"
#include <algorithm>
using namespace iOpenCL;
namespace OCLRT {
extern bool familyEnabled[];
const KernelInfo *Program::getKernelInfo(
const char *kernelName) const {
if (kernelName == nullptr) {
return nullptr;
}
auto it = std::find_if(kernelInfoArray.begin(), kernelInfoArray.end(),
[=](const KernelInfo *kInfo) { return (0 == strcmp(kInfo->name.c_str(), kernelName)); });
return (it != kernelInfoArray.end()) ? *it : nullptr;
}
size_t Program::getNumKernels() const {
return kernelInfoArray.size();
}
const KernelInfo *Program::getKernelInfo(size_t ordinal) const {
DEBUG_BREAK_IF(ordinal >= kernelInfoArray.size());
return kernelInfoArray[ordinal];
}
std::string Program::getKernelNamesString() const {
std::string semiColonDelimitedKernelNameStr;
for (uint32_t i = 0; i < kernelInfoArray.size(); i++) {
semiColonDelimitedKernelNameStr += kernelInfoArray[i]->name;
if ((i + 1) != kernelInfoArray.size()) {
semiColonDelimitedKernelNameStr += ";";
}
}
return semiColonDelimitedKernelNameStr;
}
size_t Program::processKernel(
const void *pKernelBlob,
cl_int &retVal) {
size_t sizeProcessed = 0;
do {
auto pKernelInfo = KernelInfo::create();
if (!pKernelInfo) {
retVal = CL_OUT_OF_HOST_MEMORY;
break;
}
auto pCurKernelPtr = pKernelBlob;
pKernelInfo->heapInfo.pBlob = pKernelBlob;
pKernelInfo->heapInfo.pKernelHeader = reinterpret_cast<const SKernelBinaryHeaderCommon *>(pCurKernelPtr);
pCurKernelPtr = ptrOffset(pCurKernelPtr, sizeof(SKernelBinaryHeaderCommon));
std::string readName{reinterpret_cast<const char *>(pCurKernelPtr), pKernelInfo->heapInfo.pKernelHeader->KernelNameSize};
pKernelInfo->name = readName.c_str();
pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->KernelNameSize);
pKernelInfo->heapInfo.pKernelHeap = pCurKernelPtr;
pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->KernelHeapSize);
pKernelInfo->heapInfo.pGsh = pCurKernelPtr;
pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->GeneralStateHeapSize);
pKernelInfo->heapInfo.pDsh = pCurKernelPtr;
pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->DynamicStateHeapSize);
pKernelInfo->heapInfo.pSsh = const_cast<void *>(pCurKernelPtr);
pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize);
pKernelInfo->heapInfo.pPatchList = pCurKernelPtr;
retVal = parsePatchList(*pKernelInfo);
if (retVal != CL_SUCCESS) {
delete pKernelInfo;
sizeProcessed = ptrDiff(pCurKernelPtr, pKernelBlob);
break;
}
auto pKernelHeader = pKernelInfo->heapInfo.pKernelHeader;
auto pKernel = ptrOffset(pKernelBlob, sizeof(SKernelBinaryHeaderCommon));
if (genBinary)
pKernelInfo->gpuPointerSize = reinterpret_cast<const SProgramBinaryHeader *>(genBinary)->GPUPointerSizeInBytes;
uint32_t kernelSize =
pKernelHeader->DynamicStateHeapSize +
pKernelHeader->GeneralStateHeapSize +
pKernelHeader->KernelHeapSize +
pKernelHeader->KernelNameSize +
pKernelHeader->PatchListSize +
pKernelHeader->SurfaceStateHeapSize;
pKernelInfo->heapInfo.blobSize = kernelSize + sizeof(SKernelBinaryHeaderCommon);
uint32_t kernelCheckSum = pKernelInfo->heapInfo.pKernelHeader->CheckSum;
uint64_t hashValue = Hash::hash(reinterpret_cast<const char *>(pKernel), kernelSize);
uint32_t calcCheckSum = hashValue & 0xFFFFFFFF;
pKernelInfo->isValid = (calcCheckSum == kernelCheckSum);
retVal = CL_SUCCESS;
sizeProcessed = sizeof(SKernelBinaryHeaderCommon) + kernelSize;
kernelInfoArray.push_back(pKernelInfo);
if (pKernelInfo->hasDeviceEnqueue()) {
parentKernelInfoArray.push_back(pKernelInfo);
}
if (pKernelInfo->requiresSubgroupIndependentForwardProgress()) {
subgroupKernelInfoArray.push_back(pKernelInfo);
}
} while (false);
return sizeProcessed;
}
cl_int Program::parsePatchList(KernelInfo &kernelInfo) {
cl_int retVal = CL_SUCCESS;
auto pPatchList = kernelInfo.heapInfo.pPatchList;
auto patchListSize = kernelInfo.heapInfo.pKernelHeader->PatchListSize;
auto pCurPatchListPtr = pPatchList;
uint32_t PrivateMemoryStatelessSizeOffset = 0xFFffFFff;
uint32_t LocalMemoryStatelessWindowSizeOffset = 0xFFffFFff;
uint32_t LocalMemoryStatelessWindowStartAddressOffset = 0xFFffFFff;
//Speed up containers by giving some pre-allocated storage
kernelInfo.kernelArgInfo.reserve(10);
kernelInfo.patchInfo.kernelArgumentInfo.reserve(10);
kernelInfo.patchInfo.dataParameterBuffers.reserve(20);
std::stringstream PatchTokens;
DBG_LOG(LogPatchTokens, "\nPATCH_TOKENs for kernel", kernelInfo.name);
while (ptrDiff(pCurPatchListPtr, pPatchList) < patchListSize) {
uint32_t index = 0;
uint32_t argNum = 0;
auto pPatch = reinterpret_cast<const SPatchItemHeader *>(pCurPatchListPtr);
const SPatchDataParameterBuffer *pDataParameterBuffer = nullptr;
switch (pPatch->Token) {
case PATCH_TOKEN_SAMPLER_STATE_ARRAY:
kernelInfo.patchInfo.samplerStateArray =
reinterpret_cast<const SPatchSamplerStateArray *>(pPatch);
DBG_LOG(LogPatchTokens,
"\n.SAMPLER_STATE_ARRAY", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .Offset", kernelInfo.patchInfo.samplerStateArray->Offset,
"\n .Count", kernelInfo.patchInfo.samplerStateArray->Count,
"\n .BorderColorOffset", kernelInfo.patchInfo.samplerStateArray->BorderColorOffset);
break;
case PATCH_TOKEN_BINDING_TABLE_STATE:
kernelInfo.patchInfo.bindingTableState =
reinterpret_cast<const SPatchBindingTableState *>(pPatch);
kernelInfo.usesSsh = (kernelInfo.patchInfo.bindingTableState->Count > 0);
DBG_LOG(LogPatchTokens,
"\n.BINDING_TABLE_STATE", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .Offset", kernelInfo.patchInfo.bindingTableState->Offset,
"\n .Count", kernelInfo.patchInfo.bindingTableState->Count,
"\n .SurfaceStateOffset", kernelInfo.patchInfo.bindingTableState->SurfaceStateOffset);
break;
case PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE:
kernelInfo.patchInfo.localsurface =
reinterpret_cast<const SPatchAllocateLocalSurface *>(pPatch);
kernelInfo.workloadInfo.slmStaticSize = kernelInfo.patchInfo.localsurface->TotalInlineLocalMemorySize;
DBG_LOG(LogPatchTokens,
"\n.ALLOCATE_LOCAL_SURFACE", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .TotalInlineLocalMemorySize", kernelInfo.patchInfo.localsurface->TotalInlineLocalMemorySize);
break;
case PATCH_TOKEN_MEDIA_VFE_STATE:
kernelInfo.patchInfo.mediavfestate =
reinterpret_cast<const SPatchMediaVFEState *>(pPatch);
DBG_LOG(LogPatchTokens,
"\n.MEDIA_VFE_STATE", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ScratchSpaceOffset", kernelInfo.patchInfo.mediavfestate->ScratchSpaceOffset,
"\n .PerThreadScratchSpace", kernelInfo.patchInfo.mediavfestate->PerThreadScratchSpace);
break;
case PATCH_TOKEN_DATA_PARAMETER_BUFFER:
DBG_LOG(LogPatchTokens,
"\n.DATA_PARAMETER_BUFFER", pPatch->Token,
"\n .Size", pPatch->Size);
pDataParameterBuffer = reinterpret_cast<const SPatchDataParameterBuffer *>(pPatch);
kernelInfo.patchInfo.dataParameterBuffers.push_back(
pDataParameterBuffer);
argNum = pDataParameterBuffer->ArgumentNumber;
switch (pDataParameterBuffer->Type) {
case DATA_PARAMETER_KERNEL_ARGUMENT:
kernelInfo.storeKernelArgument(pDataParameterBuffer);
DBG_LOG(LogPatchTokens, "\n .Type", "KERNEL_ARGUMENT");
break;
case DATA_PARAMETER_LOCAL_WORK_SIZE: {
DBG_LOG(LogPatchTokens, "\n .Type", "LOCAL_WORK_SIZE");
index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t);
if (kernelInfo.workloadInfo.localWorkSizeOffsets[2] == WorkloadInfo::undefinedOffset) {
kernelInfo.workloadInfo.localWorkSizeOffsets[index] =
pDataParameterBuffer->Offset;
} else {
kernelInfo.workloadInfo.localWorkSizeOffsets2[index] =
pDataParameterBuffer->Offset;
}
break;
}
case DATA_PARAMETER_GLOBAL_WORK_OFFSET:
DBG_LOG(LogPatchTokens, "\n .Type", "GLOBAL_WORK_OFFSET");
index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t);
kernelInfo.workloadInfo.globalWorkOffsetOffsets[index] =
pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE:
DBG_LOG(LogPatchTokens, "\n .Type", "ENQUEUED_LOCAL_WORK_SIZE");
index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t);
kernelInfo.workloadInfo.enqueuedLocalWorkSizeOffsets[index] =
pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_GLOBAL_WORK_SIZE:
DBG_LOG(LogPatchTokens, "\n .Type", "GLOBAL_WORK_SIZE");
index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t);
kernelInfo.workloadInfo.globalWorkSizeOffsets[index] =
pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_NUM_WORK_GROUPS:
DBG_LOG(LogPatchTokens, "\n .Type", "NUM_WORK_GROUPS");
index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t);
kernelInfo.workloadInfo.numWorkGroupsOffset[index] =
pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_MAX_WORKGROUP_SIZE:
DBG_LOG(LogPatchTokens, "\n .Type", "MAX_WORKGROUP_SIZE");
kernelInfo.workloadInfo.maxWorkGroupSizeOffset = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_WORK_DIMENSIONS:
DBG_LOG(LogPatchTokens, "\n .Type", "WORK_DIMENSIONS");
kernelInfo.workloadInfo.workDimOffset = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES: {
DBG_LOG(LogPatchTokens, "\n .Type", "SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
KernelArgPatchInfo kernelArgPatchInfo;
kernelArgPatchInfo.size = pDataParameterBuffer->DataSize;
kernelArgPatchInfo.crossthreadOffset = pDataParameterBuffer->Offset;
kernelInfo.kernelArgInfo[argNum].slmAlignment = pDataParameterBuffer->SourceOffset;
kernelInfo.kernelArgInfo[argNum].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo);
} break;
case DATA_PARAMETER_IMAGE_WIDTH:
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_WIDTH");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetImgWidth = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_IMAGE_HEIGHT:
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_HEIGHT");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetImgHeight = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_IMAGE_DEPTH:
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_DEPTH");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetImgDepth = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED:
DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_COORDINATE_SNAP_WA_REQUIRED");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetSamplerSnapWa = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_SAMPLER_ADDRESS_MODE:
DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_ADDRESS_MODE");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetSamplerAddressingMode = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS:
DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_ADDRESS_MODE");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetSamplerNormalizedCoords = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE:
DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_ADDRESS_MODE");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetChannelDataType = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_IMAGE_CHANNEL_ORDER:
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_CHANNEL_ORDER");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetChannelOrder = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_IMAGE_ARRAY_SIZE:
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_ARRAY_SIZE");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetArraySize = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_OBJECT_ID:
DBG_LOG(LogPatchTokens, "\n .Type", "OBJECT_ID");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetObjectId = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_SIMD_SIZE:
DBG_LOG(LogPatchTokens, "\n .Type", "SIMD_SIZE");
kernelInfo.workloadInfo.simdSizeOffset = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_PARENT_EVENT:
DBG_LOG(LogPatchTokens, "\n .Type", "PARENT_EVENT");
kernelInfo.workloadInfo.parentEventOffset = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE:
DBG_LOG(LogPatchTokens, "\n .Type", "CHILD_BLOCK_SIMD_SIZE");
kernelInfo.childrenKernelsIdOffset.push_back({argNum, pDataParameterBuffer->Offset});
break;
case DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE:
DBG_LOG(LogPatchTokens, "\n .Type", "PRIVATE_MEMORY_STATELESS_SIZE");
PrivateMemoryStatelessSizeOffset = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE:
DBG_LOG(LogPatchTokens, "\n .Type", "LOCAL_MEMORY_STATELESS_WINDOW_SIZE");
LocalMemoryStatelessWindowSizeOffset = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS:
DBG_LOG(LogPatchTokens, "\n .Type", "LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS");
LocalMemoryStatelessWindowStartAddressOffset = pDataParameterBuffer->Offset;
pDevice->prepareSLMWindow();
break;
case DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE:
DBG_LOG(LogPatchTokens, "\n .Type", "PREFERRED_WORKGROUP_MULTIPLE");
kernelInfo.workloadInfo.prefferedWkgMultipleOffset = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_BUFFER_OFFSET:
DBG_LOG(LogPatchTokens, "\n .Type", "DATA_PARAMETER_BUFFER_OFFSET");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetBufferOffset = pDataParameterBuffer->Offset;
break;
case DATA_PARAMETER_NUM_HARDWARE_THREADS:
case DATA_PARAMETER_PRINTF_SURFACE_SIZE:
DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled", pDataParameterBuffer->Type);
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr,
"Program::parsePatchList.Unhandled Data parameter: %d\n", pDataParameterBuffer->Type);
break;
case DATA_PARAMETER_VME_MB_BLOCK_TYPE:
DBG_LOG(LogPatchTokens, "\n .Type", "VME_MB_BLOCK_TYPE");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetVmeMbBlockType = pDataParameterBuffer->Offset;
DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t));
break;
case DATA_PARAMETER_VME_SUBPIXEL_MODE:
DBG_LOG(LogPatchTokens, "\n .Type", "VME_SUBPIXEL_MODE");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetVmeSubpixelMode = pDataParameterBuffer->Offset;
DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t));
break;
case DATA_PARAMETER_VME_SAD_ADJUST_MODE:
DBG_LOG(LogPatchTokens, "\n .Type", "VME_SAD_ADJUST_MODE");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetVmeSadAdjustMode = pDataParameterBuffer->Offset;
DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t));
break;
case DATA_PARAMETER_VME_SEARCH_PATH_TYPE:
DBG_LOG(LogPatchTokens, "\n .Type", "VME_SEARCH_PATH_TYPE");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetVmeSearchPathType = pDataParameterBuffer->Offset;
DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t));
break;
case DATA_PARAMETER_IMAGE_NUM_SAMPLES:
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_NUM_SAMPLES");
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
kernelInfo.kernelArgInfo[argNum].offsetNumSamples = pDataParameterBuffer->Offset;
DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t));
break;
case DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS:
case DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER:
case DATA_PARAMETER_STAGE_IN_GRID_ORIGIN:
case DATA_PARAMETER_STAGE_IN_GRID_SIZE:
break;
case DATA_PARAMETER_LOCAL_ID:
case DATA_PARAMETER_EXECUTION_MASK:
case DATA_PARAMETER_VME_IMAGE_TYPE:
case DATA_PARAMETER_VME_MB_SKIP_BLOCK_TYPE:
break;
default:
DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled", pDataParameterBuffer->Type);
DEBUG_BREAK_IF(true);
}
DBG_LOG(LogPatchTokens,
"\n .ArgumentNumber", pDataParameterBuffer->ArgumentNumber,
"\n .Offset", pDataParameterBuffer->Offset,
"\n .DataSize", pDataParameterBuffer->DataSize,
"\n .SourceOffset", pDataParameterBuffer->SourceOffset);
break;
case PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD:
kernelInfo.patchInfo.interfaceDescriptorDataLoad =
reinterpret_cast<const SPatchMediaInterfaceDescriptorLoad *>(pPatch);
DBG_LOG(LogPatchTokens,
"\n.MEDIA_INTERFACE_DESCRIPTOR_LOAD", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .InterfaceDescriptorDataOffset", kernelInfo.patchInfo.interfaceDescriptorDataLoad->InterfaceDescriptorDataOffset);
break;
case PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA:
kernelInfo.patchInfo.interfaceDescriptorData =
reinterpret_cast<const SPatchInterfaceDescriptorData *>(pPatch);
DBG_LOG(LogPatchTokens,
"\n.INTERFACE_DESCRIPTOR_DATA", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .Offset", kernelInfo.patchInfo.interfaceDescriptorData->Offset,
"\n .SamplerStateOffset", kernelInfo.patchInfo.interfaceDescriptorData->SamplerStateOffset,
"\n .KernelOffset", kernelInfo.patchInfo.interfaceDescriptorData->KernelOffset,
"\n .BindingTableOffset", kernelInfo.patchInfo.interfaceDescriptorData->BindingTableOffset);
break;
case PATCH_TOKEN_THREAD_PAYLOAD:
kernelInfo.patchInfo.threadPayload =
reinterpret_cast<const SPatchThreadPayload *>(pPatch);
DBG_LOG(LogPatchTokens,
"\n.THREAD_PAYLOAD", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .HeaderPresent", kernelInfo.patchInfo.threadPayload->HeaderPresent,
"\n .LocalIDXPresent", kernelInfo.patchInfo.threadPayload->LocalIDXPresent,
"\n .LocalIDYPresent", kernelInfo.patchInfo.threadPayload->LocalIDYPresent,
"\n .LocalIDZPresent", kernelInfo.patchInfo.threadPayload->LocalIDZPresent,
"\n .LocalIDFlattenedPresent", kernelInfo.patchInfo.threadPayload->LocalIDFlattenedPresent,
"\n .IndirectPayloadStorage", kernelInfo.patchInfo.threadPayload->IndirectPayloadStorage,
"\n .UnusedPerThreadConstantPresent", kernelInfo.patchInfo.threadPayload->UnusedPerThreadConstantPresent,
"\n .GetLocalIDPresent", kernelInfo.patchInfo.threadPayload->GetLocalIDPresent,
"\n .GetGroupIDPresent", kernelInfo.patchInfo.threadPayload->GetGroupIDPresent,
"\n .GetGlobalOffsetPresent", kernelInfo.patchInfo.threadPayload->GetGlobalOffsetPresent);
break;
case PATCH_TOKEN_EXECUTION_ENVIRONMENT:
kernelInfo.patchInfo.executionEnvironment =
reinterpret_cast<const SPatchExecutionEnvironment *>(pPatch);
if (kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeX != 0) {
kernelInfo.reqdWorkGroupSize[0] = kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
kernelInfo.reqdWorkGroupSize[1] = kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
kernelInfo.reqdWorkGroupSize[2] = kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
DEBUG_BREAK_IF(!(kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeY > 0));
DEBUG_BREAK_IF(!(kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeZ > 0));
}
if (kernelInfo.patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers == false) {
kernelInfo.requiresSshForBuffers = true;
}
DBG_LOG(LogPatchTokens,
"\n.EXECUTION_ENVIRONMENT", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .RequiredWorkGroupSizeX", kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeX,
"\n .RequiredWorkGroupSizeY", kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeY,
"\n .RequiredWorkGroupSizeZ", kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeZ,
"\n .LargestCompiledSIMDSize", kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize,
"\n .CompiledSubGroupsNumber", kernelInfo.patchInfo.executionEnvironment->CompiledSubGroupsNumber,
"\n .HasBarriers", kernelInfo.patchInfo.executionEnvironment->HasBarriers,
"\n .DisableMidThreadPreemption", kernelInfo.patchInfo.executionEnvironment->DisableMidThreadPreemption,
"\n .CompiledSIMD8", kernelInfo.patchInfo.executionEnvironment->CompiledSIMD8,
"\n .CompiledSIMD16", kernelInfo.patchInfo.executionEnvironment->CompiledSIMD16,
"\n .CompiledSIMD32", kernelInfo.patchInfo.executionEnvironment->CompiledSIMD32,
"\n .HasDeviceEnqueue", kernelInfo.patchInfo.executionEnvironment->HasDeviceEnqueue,
"\n .MayAccessUndeclaredResource", kernelInfo.patchInfo.executionEnvironment->MayAccessUndeclaredResource,
"\n .UsesFencesForReadWriteImages", kernelInfo.patchInfo.executionEnvironment->UsesFencesForReadWriteImages,
"\n .UsesStatelessSpillFill", kernelInfo.patchInfo.executionEnvironment->UsesStatelessSpillFill,
"\n .IsCoherent", kernelInfo.patchInfo.executionEnvironment->IsCoherent,
"\n .SubgroupIndependentForwardProgressRequired", kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired);
break;
case PATCH_TOKEN_DATA_PARAMETER_STREAM:
kernelInfo.patchInfo.dataParameterStream =
reinterpret_cast<const SPatchDataParameterStream *>(pPatch);
DBG_LOG(LogPatchTokens,
"\n.DATA_PARAMETER_STREAM", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .DataParameterStreamSize", kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize);
break;
case PATCH_TOKEN_KERNEL_ARGUMENT_INFO: {
auto pkernelArgInfo = reinterpret_cast<const SPatchKernelArgumentInfo *>(pPatch);
kernelInfo.storeArgInfo(pkernelArgInfo);
DBG_LOG(LogPatchTokens,
"\n.KERNEL_ARGUMENT_INFO", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ArgumentNumber", pkernelArgInfo->ArgumentNumber,
"\n .AddressQualifierSize", pkernelArgInfo->AddressQualifierSize,
"\n .AccessQualifierSize", pkernelArgInfo->AccessQualifierSize,
"\n .ArgumentNameSize", pkernelArgInfo->ArgumentNameSize,
"\n .TypeNameSize", pkernelArgInfo->TypeNameSize,
"\n .TypeQualifierSize", pkernelArgInfo->TypeQualifierSize);
break;
}
case PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO:
kernelInfo.patchInfo.pKernelAttributesInfo =
reinterpret_cast<const SPatchKernelAttributesInfo *>(pPatch);
kernelInfo.storePatchToken(kernelInfo.patchInfo.pKernelAttributesInfo);
DBG_LOG(LogPatchTokens,
"\n.KERNEL_ATTRIBUTES_INFO", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .AttributesSize", kernelInfo.patchInfo.pKernelAttributesInfo->AttributesSize);
break;
case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT: {
const SPatchSamplerKernelArgument *pSamplerKernelObjectKernelArg = nullptr;
pSamplerKernelObjectKernelArg = reinterpret_cast<const SPatchSamplerKernelArgument *>(pPatch);
kernelInfo.storeKernelArgument(pSamplerKernelObjectKernelArg);
DBG_LOG(LogPatchTokens,
"\n.SAMPLER_KERNEL_ARGUMENT", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ArgumentNumber", pSamplerKernelObjectKernelArg->ArgumentNumber,
"\n .Type", pSamplerKernelObjectKernelArg->Type,
"\n .Offset", pSamplerKernelObjectKernelArg->Offset);
};
break;
case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT: {
const SPatchImageMemoryObjectKernelArgument *pImageMemObjectKernelArg = nullptr;
pImageMemObjectKernelArg =
reinterpret_cast<const SPatchImageMemoryObjectKernelArgument *>(pPatch);
kernelInfo.storeKernelArgument(pImageMemObjectKernelArg);
DBG_LOG(LogPatchTokens,
"\n.IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ArgumentNumber", pImageMemObjectKernelArg->ArgumentNumber,
"\n .Type", pImageMemObjectKernelArg->Type,
"\n .Offset", pImageMemObjectKernelArg->Offset,
"\n .LocationIndex", pImageMemObjectKernelArg->LocationIndex,
"\n .LocationIndex2", pImageMemObjectKernelArg->LocationIndex2,
"\n .Transformable", pImageMemObjectKernelArg->Transformable);
};
break;
case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: {
const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjectKernelArg = nullptr;
pGlobalMemObjectKernelArg =
reinterpret_cast<const SPatchGlobalMemoryObjectKernelArgument *>(pPatch);
kernelInfo.storeKernelArgument(pGlobalMemObjectKernelArg);
DBG_LOG(LogPatchTokens,
"\n.GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ArgumentNumber", pGlobalMemObjectKernelArg->ArgumentNumber,
"\n .Offset", pGlobalMemObjectKernelArg->Offset,
"\n .LocationIndex", pGlobalMemObjectKernelArg->LocationIndex,
"\n .LocationIndex2", pGlobalMemObjectKernelArg->LocationIndex2);
};
break;
case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: {
const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalMemObjKernelArg = nullptr;
pStatelessGlobalMemObjKernelArg =
reinterpret_cast<const SPatchStatelessGlobalMemoryObjectKernelArgument *>(pPatch);
kernelInfo.storeKernelArgument(pStatelessGlobalMemObjKernelArg);
DBG_LOG(LogPatchTokens,
"\n.STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ArgumentNumber", pStatelessGlobalMemObjKernelArg->ArgumentNumber,
"\n .SurfaceStateHeapOffset", pStatelessGlobalMemObjKernelArg->SurfaceStateHeapOffset,
"\n .DataParamOffset", pStatelessGlobalMemObjKernelArg->DataParamOffset,
"\n .DataParamSize", pStatelessGlobalMemObjKernelArg->DataParamSize,
"\n .LocationIndex", pStatelessGlobalMemObjKernelArg->LocationIndex,
"\n .LocationIndex2", pStatelessGlobalMemObjKernelArg->LocationIndex2);
};
break;
case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT: {
const SPatchStatelessConstantMemoryObjectKernelArgument *pPatchToken = reinterpret_cast<const SPatchStatelessConstantMemoryObjectKernelArgument *>(pPatch);
kernelInfo.storeKernelArgument(pPatchToken);
DBG_LOG(LogPatchTokens,
"\n.STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ArgumentNumber", pPatchToken->ArgumentNumber,
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
"\n .DataParamOffset", pPatchToken->DataParamOffset,
"\n .DataParamSize", pPatchToken->DataParamSize);
} break;
case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT: {
const SPatchStatelessDeviceQueueKernelArgument *pPatchToken = reinterpret_cast<const SPatchStatelessDeviceQueueKernelArgument *>(pPatch);
kernelInfo.storeKernelArgument(pPatchToken);
DBG_LOG(LogPatchTokens,
"\n.STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ArgumentNumber", pPatchToken->ArgumentNumber,
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
"\n .DataParamOffset", pPatchToken->DataParamOffset,
"\n .DataParamSize", pPatchToken->DataParamSize);
} break;
case PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY: {
const SPatchAllocateStatelessPrivateSurface *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessPrivateSurface *>(pPatch);
kernelInfo.storePatchToken(pPatchToken);
DBG_LOG(LogPatchTokens,
"\n.ALLOCATE_STATELESS_PRIVATE_MEMORY", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
"\n .DataParamOffset", pPatchToken->DataParamOffset,
"\n .DataParamSize", pPatchToken->DataParamSize,
"\n .PerThreadPrivateMemorySize", pPatchToken->PerThreadPrivateMemorySize);
} break;
case PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION: {
const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *>(pPatch);
kernelInfo.storePatchToken(pPatchToken);
DBG_LOG(LogPatchTokens,
"\n.ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ConstantBufferIndex", pPatchToken->ConstantBufferIndex,
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
"\n .DataParamOffset", pPatchToken->DataParamOffset,
"\n .DataParamSize", pPatchToken->DataParamSize);
} break;
case PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION: {
const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *>(pPatch);
kernelInfo.storePatchToken(pPatchToken);
DBG_LOG(LogPatchTokens,
"\n.ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .GlobalBufferIndex", pPatchToken->GlobalBufferIndex,
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
"\n .DataParamOffset", pPatchToken->DataParamOffset,
"\n .DataParamSize", pPatchToken->DataParamSize);
} break;
case PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE: {
const SPatchAllocateStatelessPrintfSurface *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessPrintfSurface *>(pPatch);
kernelInfo.storePatchToken(pPatchToken);
DBG_LOG(LogPatchTokens,
"\n.ALLOCATE_STATELESS_PRINTF_SURFACE", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .PrintfSurfaceIndex", pPatchToken->PrintfSurfaceIndex,
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
"\n .DataParamOffset", pPatchToken->DataParamOffset,
"\n .DataParamSize", pPatchToken->DataParamSize);
} break;
case PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE: {
const SPatchAllocateStatelessEventPoolSurface *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessEventPoolSurface *>(pPatch);
kernelInfo.storePatchToken(pPatchToken);
DBG_LOG(LogPatchTokens,
"\n.ALLOCATE_STATELESS_EVENT_POOL_SURFACE", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .EventPoolSurfaceIndex", pPatchToken->EventPoolSurfaceIndex,
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
"\n .DataParamOffset", pPatchToken->DataParamOffset,
"\n .DataParamSize", pPatchToken->DataParamSize);
} break;
case PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE: {
const SPatchAllocateStatelessDefaultDeviceQueueSurface *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessDefaultDeviceQueueSurface *>(pPatch);
kernelInfo.storePatchToken(pPatchToken);
DBG_LOG(LogPatchTokens,
"\n.ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
"\n .DataParamOffset", pPatchToken->DataParamOffset,
"\n .DataParamSize", pPatchToken->DataParamSize);
} break;
case PATCH_TOKEN_STRING: {
const SPatchString *pPatchToken = reinterpret_cast<const SPatchString *>(pPatch);
kernelInfo.storePatchToken(pPatchToken);
DBG_LOG(LogPatchTokens,
"\n.STRING", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .Index", pPatchToken->Index,
"\n .StringSize", pPatchToken->StringSize);
} break;
case PATCH_TOKEN_INLINE_VME_SAMPLER_INFO:
kernelInfo.isVmeWorkload = true;
DBG_LOG(LogPatchTokens,
"\n.INLINE_VME_SAMPLER_INFO", pPatch->Token,
"\n .Size", pPatch->Size);
break;
case PATCH_TOKEN_GTPIN_FREE_GRF_INFO: {
const SPatchGtpinFreeGRFInfo *pPatchToken = reinterpret_cast<const SPatchGtpinFreeGRFInfo *>(pPatch);
DBG_LOG(LogPatchTokens,
"\n.PATCH_TOKEN_GTPIN_FREE_GRF_INFO", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .BufferSize", pPatchToken->BufferSize);
} break;
case PATCH_TOKEN_STATE_SIP: {
const SPatchStateSIP *pPatchToken = reinterpret_cast<const SPatchStateSIP *>(pPatch);
kernelInfo.systemKernelOffset = pPatchToken->SystemKernelOffset;
DBG_LOG(LogPatchTokens,
"\n.PATCH_TOKEN_STATE_SIP", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .SystemKernelOffset", pPatchToken->SystemKernelOffset);
} break;
default:
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, " Program::parsePatchList. Unknown Patch Token: %d\n", pPatch->Token);
if (false == isSafeToSkipUnhandledToken(pPatch->Token)) {
retVal = CL_INVALID_KERNEL;
}
break;
}
if (retVal != CL_SUCCESS) {
break;
}
pCurPatchListPtr = ptrOffset(pCurPatchListPtr, pPatch->Size);
}
if (retVal == CL_SUCCESS) {
retVal = kernelInfo.resolveKernelInfo();
}
if (kernelInfo.patchInfo.dataParameterStream && kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize) {
uint32_t crossThreadDataSize = kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize;
kernelInfo.crossThreadData = new char[crossThreadDataSize];
memset(kernelInfo.crossThreadData, 0x00, crossThreadDataSize);
if (LocalMemoryStatelessWindowStartAddressOffset != 0xFFffFFff) {
*(uintptr_t *)&(kernelInfo.crossThreadData[LocalMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast<uintptr_t>(this->pDevice->getSLMWindowStartAddress());
}
if (LocalMemoryStatelessWindowSizeOffset != 0xFFffFFff) {
*(uint32_t *)&(kernelInfo.crossThreadData[LocalMemoryStatelessWindowSizeOffset]) = (uint32_t)this->pDevice->getDeviceInfo().localMemSize;
}
if (kernelInfo.patchInfo.pAllocateStatelessPrivateSurface && (PrivateMemoryStatelessSizeOffset != 0xFFffFFff)) {
*(uint32_t *)&(kernelInfo.crossThreadData[PrivateMemoryStatelessSizeOffset]) = kernelInfo.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize * this->getDevice(0).getDeviceInfo().computeUnitsUsedForScratch * kernelInfo.getMaxSimdSize();
}
if (kernelInfo.workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) {
*(uint32_t *)&(kernelInfo.crossThreadData[kernelInfo.workloadInfo.maxWorkGroupSizeOffset]) = (uint32_t)this->getDevice(0).getDeviceInfo().maxWorkGroupSize;
}
}
return retVal;
}
cl_int Program::parseProgramScopePatchList() {
cl_int retVal = CL_SUCCESS;
cl_uint surfaceSize = 0;
auto pPatchList = programScopePatchList;
auto patchListSize = programScopePatchListSize;
auto pCurPatchListPtr = pPatchList;
cl_uint headerSize = 0;
while (ptrDiff(pCurPatchListPtr, pPatchList) < patchListSize) {
auto pPatch = reinterpret_cast<const SPatchItemHeader *>(pCurPatchListPtr);
switch (pPatch->Token) {
case PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO: {
auto patch = *(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo *)pPatch;
if (constantSurface) {
pDevice->getMemoryManager()->freeGraphicsMemory(constantSurface);
}
surfaceSize = patch.InlineDataSize;
headerSize = sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo);
constantSurface = pDevice->getMemoryManager()->createGraphicsAllocationWithRequiredBitness(surfaceSize, nullptr);
memcpy_s(constantSurface->getUnderlyingBuffer(), surfaceSize, (cl_char *)pPatch + headerSize, surfaceSize);
pCurPatchListPtr = ptrOffset(pCurPatchListPtr, surfaceSize);
DBG_LOG(LogPatchTokens,
"\n .ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ConstantBufferIndex", patch.ConstantBufferIndex,
"\n .InitializationDataSize", patch.InlineDataSize);
};
break;
case PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO: {
auto patch = *(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo *)pPatch;
if (globalSurface) {
pDevice->getMemoryManager()->freeGraphicsMemory(globalSurface);
}
surfaceSize = patch.InlineDataSize;
globalVarTotalSize += (size_t)surfaceSize;
headerSize = sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo);
globalSurface = pDevice->getMemoryManager()->createGraphicsAllocationWithRequiredBitness(surfaceSize, nullptr);
memcpy_s(globalSurface->getUnderlyingBuffer(), surfaceSize, (cl_char *)pPatch + headerSize, surfaceSize);
pCurPatchListPtr = ptrOffset(pCurPatchListPtr, surfaceSize);
DBG_LOG(LogPatchTokens,
"\n .ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .BufferType", patch.Type,
"\n .GlobalBufferIndex", patch.GlobalBufferIndex,
"\n .InitializationDataSize", patch.InlineDataSize);
};
break;
case PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO:
if (globalSurface != nullptr) {
auto patch = *(SPatchGlobalPointerProgramBinaryInfo *)pPatch;
if ((patch.GlobalBufferIndex == 0) && (patch.BufferIndex == 0) && (patch.BufferType == PROGRAM_SCOPE_GLOBAL_BUFFER)) {
void *pPtr = (void *)((uintptr_t)globalSurface->getUnderlyingBuffer() + (uintptr_t)patch.GlobalPointerOffset);
if (globalSurface->is32BitAllocation) {
*reinterpret_cast<uint32_t *>(pPtr) += static_cast<uint32_t>(globalSurface->getGpuAddressToPatch());
} else {
*reinterpret_cast<uintptr_t *>(pPtr) += reinterpret_cast<uintptr_t>(globalSurface->getUnderlyingBuffer());
}
} else {
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Program::parseProgramScopePatchList. Unhandled Data parameter: %d\n", pPatch->Token);
}
DBG_LOG(LogPatchTokens,
"\n .GLOBAL_POINTER_PROGRAM_BINARY_INFO", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .GlobalBufferIndex", patch.GlobalBufferIndex,
"\n .GlobalPointerOffset", patch.GlobalPointerOffset,
"\n .BufferType", patch.BufferType,
"\n .BufferIndex", patch.BufferIndex);
}
break;
case PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO:
if (constantSurface != nullptr) {
auto patch = *(SPatchConstantPointerProgramBinaryInfo *)pPatch;
if ((patch.ConstantBufferIndex == 0) && (patch.BufferIndex == 0) && (patch.BufferType == PROGRAM_SCOPE_CONSTANT_BUFFER)) {
void *pPtr = (uintptr_t *)((uintptr_t)constantSurface->getUnderlyingBuffer() + (uintptr_t)patch.ConstantPointerOffset);
if (constantSurface->is32BitAllocation) {
*reinterpret_cast<uint32_t *>(pPtr) += static_cast<uint32_t>(constantSurface->getGpuAddressToPatch());
} else {
*reinterpret_cast<uintptr_t *>(pPtr) += reinterpret_cast<uintptr_t>(constantSurface->getUnderlyingBuffer());
}
} else {
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Program::parseProgramScopePatchList. Unhandled Data parameter: %d\n", pPatch->Token);
}
DBG_LOG(LogPatchTokens,
"\n .CONSTANT_POINTER_PROGRAM_BINARY_INFO", pPatch->Token,
"\n .Size", pPatch->Size,
"\n .ConstantBufferIndex", patch.ConstantBufferIndex,
"\n .ConstantPointerOffset", patch.ConstantPointerOffset,
"\n .BufferType", patch.BufferType,
"\n .BufferIndex", patch.BufferIndex);
}
break;
default:
if (false == isSafeToSkipUnhandledToken(pPatch->Token)) {
retVal = CL_INVALID_BINARY;
}
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, " Program::parseProgramScopePatchList. Unknown Patch Token: %d\n", pPatch->Token);
DBG_LOG(LogPatchTokens,
"\n .Program Unknown Patch Token", pPatch->Token,
"\n .Size", pPatch->Size);
}
if (retVal != CL_SUCCESS) {
break;
}
pCurPatchListPtr = ptrOffset(pCurPatchListPtr, pPatch->Size);
}
return retVal;
}
cl_int Program::processGenBinary() {
cl_int retVal = CL_SUCCESS;
for (auto &i : kernelInfoArray)
delete i;
kernelInfoArray.clear();
do {
if (!genBinary || genBinarySize == 0) {
retVal = CL_INVALID_BINARY;
break;
}
auto pCurBinaryPtr = genBinary;
auto pGenBinaryHeader = reinterpret_cast<const SProgramBinaryHeader *>(pCurBinaryPtr);
if (!validateGenBinaryHeader(pGenBinaryHeader)) {
retVal = CL_INVALID_BINARY;
break;
}
pCurBinaryPtr = ptrOffset(pCurBinaryPtr, sizeof(SProgramBinaryHeader));
programScopePatchList = pCurBinaryPtr;
programScopePatchListSize = pGenBinaryHeader->PatchListSize;
if (programScopePatchListSize != 0u) {
retVal = parseProgramScopePatchList();
}
pCurBinaryPtr = ptrOffset(pCurBinaryPtr, pGenBinaryHeader->PatchListSize);
auto numKernels = pGenBinaryHeader->NumberOfKernels;
for (uint32_t i = 0; i < numKernels && retVal == CL_SUCCESS; i++) {
size_t bytesProcessed = processKernel(pCurBinaryPtr, retVal);
pCurBinaryPtr = ptrOffset(pCurBinaryPtr, bytesProcessed);
}
} while (false);
return retVal;
}
bool Program::validateGenBinaryDevice(GFXCORE_FAMILY device) const {
bool isValid = familyEnabled[device];
return isValid;
}
bool Program::validateGenBinaryHeader(const iOpenCL::SProgramBinaryHeader *pGenBinaryHeader) const {
return pGenBinaryHeader->Magic == MAGIC_CL &&
pGenBinaryHeader->Version == CURRENT_ICBE_VERSION &&
validateGenBinaryDevice(static_cast<GFXCORE_FAMILY>(pGenBinaryHeader->Device));
}
} // namespace OCLRT

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "program.h"
namespace OCLRT {
bool Program::isValidSpirvBinary(
const void *pBinary,
size_t binarySize) {
const uint32_t magicWord[2] = {0x03022307, 0x07230203};
bool retVal = false;
if (pBinary && (binarySize > sizeof(uint32_t))) {
if ((memcmp(pBinary, &magicWord[0], sizeof(uint32_t)) == 0) ||
(memcmp(pBinary, &magicWord[1], sizeof(uint32_t)) == 0)) {
retVal = true;
}
}
return retVal;
}
cl_int Program::processSpirBinary(
const void *pBinary,
size_t binarySize,
bool isSpirV) {
programBinaryType = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE;
std::string binaryString(static_cast<const char *>(pBinary), binarySize);
sourceCode.swap(binaryString);
buildStatus = CL_BUILD_NONE;
this->isSpirV = isSpirV;
return CL_SUCCESS;
}
}

443
runtime/program/program.cpp Normal file
View File

@ -0,0 +1,443 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "program.h"
#include "elf/writer.h"
#include "runtime/context/context.h"
#include "runtime/helpers/debug_helpers.h"
#include "runtime/helpers/string.h"
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/compiler_interface/compiler_interface.h"
#include <sstream>
namespace OCLRT {
const std::string Program::clOptNameClVer("-cl-std=CL");
const std::string Program::clOptNameUniformWgs{"-cl-uniform-work-group-size"};
Program::Program() : Program(nullptr) {
numDevices = 0;
}
Program::Program(Context *context, bool isBuiltIn) : context(context), isBuiltIn(isBuiltIn) {
if (this->context && !this->isBuiltIn) {
this->context->incRefInternal();
}
blockKernelManager = new BlockKernelManager();
pDevice = context ? context->getDevice(0) : nullptr;
numDevices = 1;
elfBinary = nullptr;
elfBinarySize = 0;
genBinary = nullptr;
genBinarySize = 0;
llvmBinary = nullptr;
llvmBinarySize = 0;
debugData = nullptr;
debugDataSize = 0;
buildStatus = CL_BUILD_NONE;
programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE;
isCreatedFromBinary = false;
isProgramBinaryResolved = false;
constantSurface = nullptr;
globalSurface = nullptr;
globalVarTotalSize = 0;
programScopePatchListSize = 0;
programScopePatchList = nullptr;
programOptionVersion = 12u;
allowNonUniform = false;
char paramValue[32] = {};
bool force32BitAddressess = false;
if (pDevice) {
pDevice->getDeviceInfo(CL_DEVICE_VERSION, 32, paramValue, nullptr);
if (strstr(paramValue, "2.1")) {
internalOptions = "-ocl-version=210 ";
} else if (strstr(paramValue, "2.0")) {
internalOptions = "-ocl-version=200 ";
} else if (strstr(paramValue, "1.2")) {
internalOptions = "-ocl-version=120 ";
}
force32BitAddressess = pDevice->getDeviceInfo().force32BitAddressess;
if (force32BitAddressess) {
internalOptions += "-m32 ";
}
pDevice->increaseProgramCount();
bool forceStateless = is32bit | DebugManager.flags.DisableStatelessToStatefulOptimization.get();
if (force32BitAddressess) {
forceStateless = true;
}
if (forceStateless) {
internalOptions += "-cl-intel-greater-than-4GB-buffer-required ";
}
}
if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get()) {
internalOptions += "-cl-intel-has-buffer-offset-arg ";
}
}
Program::~Program() {
if (context && !isBuiltIn) {
context->decRefInternal();
}
delete[] genBinary;
genBinary = nullptr;
delete[] llvmBinary;
llvmBinary = nullptr;
delete[] debugData;
debugData = nullptr;
delete[] elfBinary;
elfBinary = nullptr;
elfBinarySize = 0;
for (auto &i : kernelInfoArray) {
delete i;
}
freeBlockPrivateSurfaces();
delete blockKernelManager;
if (constantSurface) {
auto memoryManager = pDevice->getMemoryManager();
memoryManager->freeGraphicsMemory(constantSurface);
constantSurface = nullptr;
}
if (globalSurface) {
auto memoryManager = pDevice->getMemoryManager();
memoryManager->freeGraphicsMemory(globalSurface);
globalSurface = nullptr;
}
}
cl_int Program::createProgramFromBinary(
const void *pBinary,
size_t binarySize) {
cl_int retVal = CL_SUCCESS;
uint32_t binaryVersion = iOpenCL::CURRENT_ICBE_VERSION;
if (Program::isValidLlvmBinary(pBinary, binarySize)) {
retVal = processSpirBinary(pBinary, binarySize, false);
} else if (Program::isValidSpirvBinary(pBinary, binarySize)) {
retVal = processSpirBinary(pBinary, binarySize, true);
} else {
retVal = processElfBinary(pBinary, binarySize, binaryVersion);
if (retVal == CL_SUCCESS) {
isCreatedFromBinary = true;
} else if (binaryVersion != iOpenCL::CURRENT_ICBE_VERSION) {
// Version of compiler used to create program binary is invalid,
// needs to recompile program binary from its LLVM (if available).
// if recompile fails propagate error retVal from previous function
if (!rebuildProgramFromLLVM()) {
retVal = CL_SUCCESS;
}
}
}
return retVal;
}
cl_int Program::rebuildProgramFromLLVM() {
cl_int retVal = CL_SUCCESS;
size_t dataSize;
char *pData = nullptr;
CLElfLib::CElfWriter *pElfWriter = nullptr;
do {
if (!Program::isValidLlvmBinary(llvmBinary, llvmBinarySize)) {
retVal = CL_INVALID_PROGRAM;
break;
}
pElfWriter = CLElfLib::CElfWriter::create(CLElfLib::EH_TYPE_OPENCL_OBJECTS, CLElfLib::EH_MACHINE_NONE, 0);
CLElfLib::SSectionNode sectionNode;
sectionNode.Name = "";
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_LLVM_BINARY;
sectionNode.Flags = 0;
sectionNode.pData = llvmBinary;
sectionNode.DataSize = static_cast<unsigned int>(llvmBinarySize);
pElfWriter->addSection(&sectionNode);
pElfWriter->resolveBinary(nullptr, dataSize);
pData = new char[dataSize];
pElfWriter->resolveBinary(pData, dataSize);
CompilerInterface *pCompilerInterface = getCompilerInterface();
if (nullptr == pCompilerInterface) {
retVal = CL_OUT_OF_HOST_MEMORY;
break;
}
TranslationArgs inputArgs = {};
inputArgs.pInput = pData;
inputArgs.InputSize = static_cast<unsigned int>(dataSize);
inputArgs.pOptions = options.c_str();
inputArgs.OptionsSize = static_cast<unsigned int>(options.length());
inputArgs.pInternalOptions = internalOptions.c_str();
inputArgs.InternalOptionsSize = static_cast<unsigned int>(internalOptions.length());
inputArgs.pTracingOptions = nullptr;
inputArgs.TracingOptionsCount = 0;
retVal = pCompilerInterface->link(*this, inputArgs);
if (retVal != CL_SUCCESS) {
break;
}
retVal = processGenBinary();
if (retVal != CL_SUCCESS) {
break;
}
programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
isCreatedFromBinary = true;
isProgramBinaryResolved = true;
} while (false);
CLElfLib::CElfWriter::destroy(pElfWriter);
delete[] pData;
return retVal;
}
void Program::getProgramCompilerVersion(
SProgramBinaryHeader *pSectionData,
uint32_t &binaryVersion) const {
if (pSectionData != nullptr) {
binaryVersion = pSectionData->Version;
}
}
bool Program::isValidLlvmBinary(
const void *pBinary,
size_t binarySize) {
const char *pLlvmMagic = "BC\xc0\xde";
bool retVal = false;
if (pBinary && (binarySize > (strlen(pLlvmMagic) + 1))) {
if (strstr((char *)pBinary, pLlvmMagic) != nullptr) {
retVal = true;
}
}
return retVal;
}
void Program::setSource(char *pSourceString) {
sourceCode = pSourceString;
}
cl_int Program::getSource(char *&pBinary, unsigned int &dataSize) const {
cl_int retVal = CL_INVALID_PROGRAM;
pBinary = nullptr;
dataSize = 0;
if (!sourceCode.empty()) {
pBinary = (char *)(sourceCode.c_str());
dataSize = (unsigned int)(sourceCode.size());
retVal = CL_SUCCESS;
}
return retVal;
}
void Program::storeGenBinary(
const void *pSrc,
const size_t srcSize) {
storeBinary(genBinary, genBinarySize, pSrc, srcSize);
}
void Program::storeLlvmBinary(
const void *pSrc,
const size_t srcSize) {
storeBinary(llvmBinary, llvmBinarySize, pSrc, srcSize);
}
void Program::storeDebugData(
const void *pSrc,
const size_t srcSize) {
storeBinary(debugData, debugDataSize, pSrc, srcSize);
}
void Program::storeBinary(
char *&pDst,
size_t &dstSize,
const void *pSrc,
const size_t srcSize) {
dstSize = 0;
DEBUG_BREAK_IF(!(pSrc && srcSize > 0));
delete[] pDst;
pDst = new char[srcSize];
dstSize = (cl_uint)srcSize;
memcpy_s(pDst, dstSize, pSrc, srcSize);
}
void Program::updateBuildLog(const Device *pDevice, const char *pErrorString,
size_t errorStringSize) {
if ((pErrorString == nullptr) || (errorStringSize == 0) || (pErrorString[0] == '\0')) {
return;
}
if (pErrorString[errorStringSize - 1] == '\0') {
--errorStringSize;
}
auto it = buildLog.find(pDevice);
if (it == buildLog.end()) {
buildLog[pDevice].assign(pErrorString, pErrorString + errorStringSize);
return;
}
buildLog[pDevice].append("\n");
buildLog[pDevice].append(pErrorString, pErrorString + errorStringSize);
}
const char *Program::getBuildLog(const Device *pDevice) const {
const char *entry = nullptr;
auto it = buildLog.find(pDevice);
if (it != buildLog.end()) {
entry = it->second.c_str();
}
return entry;
}
CompilerInterface *Program::getCompilerInterface() const {
return CompilerInterface::getInstance();
}
void Program::separateBlockKernels() {
if ((0 == parentKernelInfoArray.size()) && (0 == subgroupKernelInfoArray.size())) {
return;
}
auto allKernelInfos(kernelInfoArray);
kernelInfoArray.clear();
for (auto &i : allKernelInfos) {
auto end = i->name.rfind("_dispatch_");
if (end != std::string::npos) {
bool baseKernelFound = false;
std::string baseKernelName(i->name, 0, end);
for (auto &j : parentKernelInfoArray) {
if (j->name.compare(baseKernelName) == 0) {
baseKernelFound = true;
break;
}
}
if (!baseKernelFound) {
for (auto &j : subgroupKernelInfoArray) {
if (j->name.compare(baseKernelName) == 0) {
baseKernelFound = true;
break;
}
}
}
if (baseKernelFound) {
//Parent or subgroup kernel found -> child kernel
blockKernelManager->addBlockKernelInfo(i);
} else {
kernelInfoArray.push_back(i);
}
} else {
//Regular kernel found
kernelInfoArray.push_back(i);
}
}
allKernelInfos.clear();
}
void Program::allocateBlockPrivateSurfaces() {
size_t blockCount = blockKernelManager->getCount();
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *info = blockKernelManager->getBlockKernelInfo(i);
if (info->patchInfo.pAllocateStatelessPrivateSurface) {
size_t privateSize = info->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize;
if (privateSize > 0 && blockKernelManager->getPrivateSurface(i) == nullptr) {
privateSize *= getDevice(0).getDeviceInfo().computeUnitsUsedForScratch * info->getMaxSimdSize();
auto *privateSurface = getDevice(0).getMemoryManager()->createGraphicsAllocationWithRequiredBitness(privateSize, nullptr);
blockKernelManager->pushPrivateSurface(privateSurface, i);
}
}
}
}
void Program::freeBlockPrivateSurfaces() {
size_t blockCount = blockKernelManager->getCount();
for (uint32_t i = 0; i < blockCount; i++) {
auto *privateSurface = blockKernelManager->getPrivateSurface(i);
if (privateSurface != nullptr) {
blockKernelManager->pushPrivateSurface(nullptr, i);
getDevice(0).getMemoryManager()->freeGraphicsMemory(privateSurface);
}
}
}
void Program::updateNonUniformFlag() {
//Look for -cl-std=CL substring and extract value behind which can be 1.2 2.0 2.1 and convert to value
auto pos = options.find(clOptNameClVer);
if (pos == std::string::npos) {
programOptionVersion = 12u; //Default is 1.2
} else {
std::stringstream ss{options.c_str() + pos + clOptNameClVer.size()};
uint32_t majorV, minorV;
char dot;
ss >> majorV;
ss >> dot;
ss >> minorV;
programOptionVersion = majorV * 10u + minorV;
}
if (programOptionVersion >= 20u && options.find(clOptNameUniformWgs) == std::string::npos) {
allowNonUniform = true;
}
}
void Program::updateNonUniformFlag(const Program **inputPrograms, size_t numInputPrograms) {
bool allowNonUniform = true;
for (cl_uint i = 0; i < numInputPrograms; i++) {
allowNonUniform = allowNonUniform && inputPrograms[i]->getAllowNonUniform();
}
this->allowNonUniform = allowNonUniform;
}
} // namespace OCLRT

330
runtime/program/program.h Normal file
View File

@ -0,0 +1,330 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "block_kernel_manager.h"
#include "elf/reader.h"
#include "kernel_info.h"
#include "runtime/api/cl_types.h"
#include "runtime/device/device.h"
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/stdio.h"
#include "runtime/helpers/string_helpers.h"
#include "igfxfmid.h"
#include "patch_list.h"
#include <vector>
#include <string>
#include <map>
#define OCLRT_ALIGN(a, b) ((((a) % (b)) != 0) ? ((a) - ((a) % (b)) + (b)) : (a))
namespace OCLRT {
class Context;
class CompilerInterface;
template <>
struct OpenCLObjectMapper<_cl_program> {
typedef class Program DerivedType;
};
bool isSafeToSkipUnhandledToken(unsigned int token);
class Program : public BaseObject<_cl_program> {
public:
static const cl_ulong objectMagic = 0x5651C89100AAACFELL;
// Create program from binary
template <typename T = Program>
static T *create(
cl_context context,
cl_uint numDevices,
const cl_device_id *deviceList,
const size_t *lengths,
const unsigned char **binaries,
cl_int *binaryStatus,
cl_int &errcodeRet);
// Create program from source
template <typename T = Program>
static T *create(
cl_context context,
cl_uint count,
const char **strings,
const size_t *lengths,
cl_int &errcodeRet);
template <typename T = Program>
static T *create(
const char *nullTerminatedString,
Context *context,
Device &device,
bool isBuiltIn,
cl_int *errcodeRet);
template <typename T = Program>
static T *createFromGenBinary(
Context *context,
const void *binary,
size_t size,
bool isBuiltIn,
cl_int *errcodeRet) {
cl_int retVal = CL_SUCCESS;
T *program = nullptr;
if ((binary == nullptr) || (size == 0)) {
retVal = CL_INVALID_VALUE;
}
if (CL_SUCCESS == retVal) {
program = new T(context, isBuiltIn);
program->numDevices = 1;
program->storeGenBinary(binary, size);
program->isCreatedFromBinary = true;
program->programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
program->isProgramBinaryResolved = true;
program->buildStatus = CL_BUILD_SUCCESS;
}
if (errcodeRet) {
*errcodeRet = retVal;
}
return program;
}
template <typename T = Program>
static T *createFromIL(Context *context,
const void *il,
size_t length,
cl_int &errcodeRet);
Program(Context *context, bool isBuiltIn = false);
~Program() override;
Program(const Program &) = delete;
Program &operator=(const Program &) = delete;
cl_int build(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions,
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
void *userData, bool enableCaching);
cl_int build(const cl_device_id device, const char *buildOptions, bool enableCaching,
std::unordered_map<std::string, BuiltinDispatchInfoBuilder *> &builtinsMap);
cl_int build(const char *pKernelData, size_t kernelDataSize);
MOCKABLE_VIRTUAL cl_int processGenBinary();
cl_int compile(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions,
cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames,
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
void *userData);
cl_int link(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions,
cl_uint numInputPrograms, const cl_program *inputPrograms,
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
void *userData);
size_t getNumKernels() const;
const KernelInfo *getKernelInfo(const char *kernelName) const;
const KernelInfo *getKernelInfo(size_t ordinal) const;
cl_int getInfo(cl_program_info paramName, size_t paramValueSize,
void *paramValue, size_t *paramValueSizeRet);
cl_int getBuildInfo(cl_device_id device, cl_program_build_info paramName,
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;
Context &getContext() const {
return *context;
}
Context *getContextPtr() const {
return context;
}
const Device &getDevice(cl_uint deviceOrdinal) const {
return *pDevice;
}
cl_uint getNumDevices() const {
return 1;
}
MOCKABLE_VIRTUAL cl_int processElfBinary(const void *pBinary, size_t binarySize, uint32_t &binaryVersion);
cl_int processSpirBinary(const void *pBinary, size_t binarySize, bool isSpirV);
void setSource(char *pSourceString);
cl_int getSource(char *&pBinary, unsigned int &dataSize) const;
void storeGenBinary(const void *pSrc, const size_t srcSize);
char *getGenBinary(size_t &genBinarySize) const {
genBinarySize = this->genBinarySize;
return this->genBinary;
}
void storeLlvmBinary(const void *pSrc, const size_t srcSize);
void storeDebugData(const void *pSrc, const size_t srcSize);
void updateBuildLog(const Device *pDevice, const char *pErrorString, const size_t errorStringSize);
const char *getBuildLog(const Device *pDevice) const;
cl_uint getProgramBinaryType() const {
return programBinaryType;
}
bool getIsSpirV() const {
return isSpirV;
}
size_t getProgramScopePatchListSize() const {
return programScopePatchListSize;
}
GraphicsAllocation *getConstantSurface() const {
return constantSurface;
}
GraphicsAllocation *getGlobalSurface() const {
return globalSurface;
}
BlockKernelManager *getBlockKernelManager() const {
return blockKernelManager;
}
void allocateBlockPrivateSurfaces();
void freeBlockPrivateSurfaces();
const std::string &getOptions() const { return options; }
const std::string &getInternalOptions() const { return internalOptions; }
bool getAllowNonUniform() const {
return allowNonUniform;
}
bool getIsBuiltIn() const {
return isBuiltIn;
}
uint32_t getProgramOptionVersion() const {
return programOptionVersion;
}
static bool isValidLlvmBinary(const void *pBinary, size_t binarySize);
static bool isValidSpirvBinary(const void *pBinary, size_t binarySize);
protected:
Program();
MOCKABLE_VIRTUAL bool isSafeToSkipUnhandledToken(unsigned int token) const;
MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize);
bool optionsAreNew(const char *options) const;
cl_int processElfHeader(const CLElfLib::SElf64Header *pElfHeader,
cl_program_binary_type &binaryType, uint32_t &numSections);
void getProgramCompilerVersion(SProgramBinaryHeader *pSectionData, uint32_t &binaryVersion) const;
cl_int resolveProgramBinary();
cl_int parseProgramScopePatchList();
MOCKABLE_VIRTUAL cl_int rebuildProgramFromLLVM();
cl_int parsePatchList(KernelInfo &pKernelInfo);
size_t processKernel(const void *pKernelBlob, cl_int &retVal);
void storeBinary(char *&pDst, size_t &dstSize, const void *pSrc, const size_t srcSize);
bool validateGenBinaryDevice(GFXCORE_FAMILY device) const;
bool validateGenBinaryHeader(const iOpenCL::SProgramBinaryHeader *pGenBinaryHeader) const;
std::string getKernelNamesString() const;
MOCKABLE_VIRTUAL CompilerInterface *getCompilerInterface() const;
void separateBlockKernels();
void updateNonUniformFlag();
void updateNonUniformFlag(const Program **inputProgram, size_t numInputPrograms);
static const std::string clOptNameClVer;
static const std::string clOptNameUniformWgs;
// clang-format off
cl_program_binary_type programBinaryType;
bool isSpirV = false;
char* elfBinary;
size_t elfBinarySize;
char* genBinary;
size_t genBinarySize;
char* llvmBinary;
size_t llvmBinarySize;
char* debugData;
size_t debugDataSize;
std::vector<KernelInfo*> kernelInfoArray;
std::vector<KernelInfo*> parentKernelInfoArray;
std::vector<KernelInfo*> subgroupKernelInfoArray;
BlockKernelManager * blockKernelManager;
const void* programScopePatchList;
size_t programScopePatchListSize;
GraphicsAllocation* constantSurface;
GraphicsAllocation* globalSurface;
size_t globalVarTotalSize;
cl_build_status buildStatus;
bool isCreatedFromBinary;
bool isProgramBinaryResolved;
std::string sourceCode;
std::string options;
std::string internalOptions;
std::string hashFileName;
std::string hashFilePath;
uint32_t programOptionVersion;
bool allowNonUniform;
std::map<const Device*, std::string> buildLog;
Context* context;
Device* pDevice;
cl_uint numDevices;
bool isBuiltIn;
friend class OfflineCompiler;
// clang-format on
};
} // namespace OCLRT