mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Initial commit
Change-Id: I4bf1707bd3dfeadf2c17b0a7daff372b1925ebbd
This commit is contained in:
39
runtime/program/.clang-tidy
Normal file
39
runtime/program/.clang-tidy
Normal file
@ -0,0 +1,39 @@
|
||||
---
|
||||
Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,readability-identifier-naming,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,-clang-analyzer-optin.performance.Padding'
|
||||
# -clang-analyzer-core.CallAndMessage
|
||||
# WarningsAsErrors: '.*'
|
||||
HeaderFilterRegex: 'runtime/'
|
||||
AnalyzeTemporaryDtors: false
|
||||
CheckOptions:
|
||||
- key: google-readability-braces-around-statements.ShortStatementLines
|
||||
value: '1'
|
||||
- key: google-readability-function-size.StatementThreshold
|
||||
value: '800'
|
||||
- key: google-readability-namespace-comments.ShortNamespaceLines
|
||||
value: '10'
|
||||
- key: google-readability-namespace-comments.SpacesBeforeComments
|
||||
value: '2'
|
||||
- key: readability-identifier-naming.MethodCase
|
||||
value: camelBack
|
||||
- key: readability-identifier-naming.ParameterCase
|
||||
value: camelBack
|
||||
- key: readability-identifier-naming.ClassMemberCase
|
||||
value: camelBack
|
||||
- key: readability-identifier-naming.ClassMethodCase
|
||||
value: camelBack
|
||||
- key: modernize-loop-convert.MaxCopySize
|
||||
value: '16'
|
||||
- key: modernize-loop-convert.MinConfidence
|
||||
value: reasonable
|
||||
- key: modernize-loop-convert.NamingStyle
|
||||
value: CamelCase
|
||||
- key: modernize-pass-by-value.IncludeStyle
|
||||
value: llvm
|
||||
- key: modernize-replace-auto-ptr.IncludeStyle
|
||||
value: llvm
|
||||
- key: modernize-use-nullptr.NullMacros
|
||||
value: 'NULL'
|
||||
- key: modernize-use-default-member-init.UseAssignment
|
||||
value: '1'
|
||||
...
|
||||
|
63
runtime/program/block_kernel_manager.cpp
Normal file
63
runtime/program/block_kernel_manager.cpp
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "block_kernel_manager.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
void BlockKernelManager::addBlockKernelInfo(KernelInfo *blockKernelInfo) {
|
||||
blockKernelInfoArray.push_back(blockKernelInfo);
|
||||
blockUsesPrintf |= (blockKernelInfo->patchInfo.pAllocateStatelessPrintfSurface != nullptr);
|
||||
}
|
||||
|
||||
const KernelInfo *BlockKernelManager::getBlockKernelInfo(size_t ordinal) {
|
||||
DEBUG_BREAK_IF(ordinal >= blockKernelInfoArray.size());
|
||||
return blockKernelInfoArray[ordinal];
|
||||
}
|
||||
|
||||
BlockKernelManager::~BlockKernelManager() {
|
||||
for (auto &i : blockKernelInfoArray)
|
||||
delete i;
|
||||
}
|
||||
void BlockKernelManager::pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal) {
|
||||
if (blockPrivateSurfaceArray.size() < blockKernelInfoArray.size()) {
|
||||
blockPrivateSurfaceArray.resize(blockKernelInfoArray.size());
|
||||
|
||||
for (uint32_t i = 0; i < blockPrivateSurfaceArray.size(); i++) {
|
||||
blockPrivateSurfaceArray[i] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(ordinal >= blockPrivateSurfaceArray.size());
|
||||
|
||||
blockPrivateSurfaceArray[ordinal] = allocation;
|
||||
}
|
||||
|
||||
GraphicsAllocation *BlockKernelManager::getPrivateSurface(size_t ordinal) {
|
||||
// Ff queried ordinal is out of bound return nullptr,
|
||||
// this happens when no private surface was not pushed
|
||||
if (ordinal < blockPrivateSurfaceArray.size())
|
||||
return blockPrivateSurfaceArray[ordinal];
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace OCLRT
|
52
runtime/program/block_kernel_manager.h
Normal file
52
runtime/program/block_kernel_manager.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "kernel_info.h"
|
||||
#include "runtime/api/cl_types.h"
|
||||
#include <vector>
|
||||
|
||||
namespace OCLRT {
|
||||
class GraphicsAllocation;
|
||||
|
||||
class BlockKernelManager {
|
||||
public:
|
||||
BlockKernelManager() = default;
|
||||
virtual ~BlockKernelManager();
|
||||
void addBlockKernelInfo(KernelInfo *);
|
||||
const KernelInfo *getBlockKernelInfo(size_t ordinal);
|
||||
size_t getCount() const {
|
||||
return blockKernelInfoArray.size();
|
||||
}
|
||||
bool getIfBlockUsesPrintf() const {
|
||||
return blockUsesPrintf;
|
||||
}
|
||||
|
||||
void pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal);
|
||||
GraphicsAllocation *getPrivateSurface(size_t ordinal);
|
||||
|
||||
protected:
|
||||
bool blockUsesPrintf = false;
|
||||
std::vector<KernelInfo *> blockKernelInfoArray;
|
||||
std::vector<GraphicsAllocation *> blockPrivateSurfaceArray;
|
||||
};
|
||||
} // namespace OCLRT
|
162
runtime/program/build.cpp
Normal file
162
runtime/program/build.cpp
Normal file
@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "runtime/compiler_interface/compiler_interface.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
#include "runtime/platform/platform.h"
|
||||
#include "runtime/helpers/validators.h"
|
||||
#include "program.h"
|
||||
#include <cstring>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
cl_int Program::build(
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *deviceList,
|
||||
const char *buildOptions,
|
||||
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
|
||||
void *userData,
|
||||
bool enableCaching) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
do {
|
||||
if (((deviceList == nullptr) && (numDevices != 0)) ||
|
||||
((deviceList != nullptr) && (numDevices == 0))) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((funcNotify == nullptr) &&
|
||||
(userData != nullptr)) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
// if a device_list is specified, make sure it points to our device
|
||||
// NOTE: a null device_list is ok - it means "all devices"
|
||||
if (deviceList && validateObject(*deviceList) != CL_SUCCESS) {
|
||||
retVal = CL_INVALID_DEVICE;
|
||||
break;
|
||||
}
|
||||
|
||||
// check to see if a previous build request is in progress
|
||||
if (buildStatus == CL_BUILD_IN_PROGRESS) {
|
||||
retVal = CL_INVALID_OPERATION;
|
||||
break;
|
||||
}
|
||||
|
||||
if (isCreatedFromBinary == false) {
|
||||
buildStatus = CL_BUILD_IN_PROGRESS;
|
||||
|
||||
options = (buildOptions) ? buildOptions : "";
|
||||
std::string reraStr = "-cl-intel-gtpin-rera";
|
||||
size_t pos = options.find(reraStr);
|
||||
if (pos != std::string::npos) {
|
||||
// build option "-cl-intel-gtpin-rera" is present, move it to internalOptions
|
||||
size_t reraLen = reraStr.length();
|
||||
options.erase(pos, reraLen);
|
||||
internalOptions.append(reraStr);
|
||||
internalOptions.append(" ");
|
||||
}
|
||||
|
||||
CompilerInterface *pCompilerInterface = getCompilerInterface();
|
||||
if (!pCompilerInterface) {
|
||||
retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
TranslationArgs inputArgs = {};
|
||||
if (strcmp(sourceCode.c_str(), "") == 0) {
|
||||
retVal = CL_INVALID_PROGRAM;
|
||||
break;
|
||||
}
|
||||
|
||||
internalOptions.append(platform()->getCompilerExtensions());
|
||||
inputArgs.pInput = (char *)(sourceCode.c_str());
|
||||
inputArgs.InputSize = (uint32_t)sourceCode.size();
|
||||
inputArgs.pOptions = options.c_str();
|
||||
inputArgs.OptionsSize = (uint32_t)options.length();
|
||||
inputArgs.pInternalOptions = internalOptions.c_str();
|
||||
inputArgs.InternalOptionsSize = (uint32_t)internalOptions.length();
|
||||
inputArgs.pTracingOptions = nullptr;
|
||||
inputArgs.TracingOptionsCount = 0;
|
||||
DBG_LOG(LogApiCalls,
|
||||
"Build Options", inputArgs.pOptions,
|
||||
"\nBuild Internal Options", inputArgs.pInternalOptions);
|
||||
|
||||
retVal = pCompilerInterface->build(*this, inputArgs, enableCaching);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
updateNonUniformFlag();
|
||||
|
||||
retVal = processGenBinary();
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
separateBlockKernels();
|
||||
} while (false);
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
buildStatus = CL_BUILD_ERROR;
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE;
|
||||
} else {
|
||||
buildStatus = CL_BUILD_SUCCESS;
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
|
||||
}
|
||||
|
||||
if (funcNotify != nullptr) {
|
||||
(*funcNotify)(this, userData);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int Program::build(const cl_device_id device, const char *buildOptions, bool enableCaching,
|
||||
std::unordered_map<std::string, BuiltinDispatchInfoBuilder *> &builtinsMap) {
|
||||
auto ret = this->build(1, &device, buildOptions, nullptr, nullptr, enableCaching);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (auto &ki : this->kernelInfoArray) {
|
||||
auto fit = builtinsMap.find(ki->name);
|
||||
if (fit == builtinsMap.end()) {
|
||||
continue;
|
||||
}
|
||||
ki->builtinDispatchBuilder = fit->second;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_int Program::build(
|
||||
const char *pKernelData,
|
||||
size_t kernelDataSize) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
processKernel(pKernelData, retVal);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
}
|
188
runtime/program/compile.cpp
Normal file
188
runtime/program/compile.cpp
Normal file
@ -0,0 +1,188 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "elf/writer.h"
|
||||
#include "runtime/compiler_interface/compiler_interface.h"
|
||||
#include "runtime/platform/platform.h"
|
||||
#include "runtime/helpers/validators.h"
|
||||
#include "program.h"
|
||||
#include <cstring>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
cl_int Program::compile(
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *deviceList,
|
||||
const char *buildOptions,
|
||||
cl_uint numInputHeaders,
|
||||
const cl_program *inputHeaders,
|
||||
const char **headerIncludeNames,
|
||||
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
|
||||
void *userData) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
cl_program program;
|
||||
CLElfLib::CElfWriter *pElfWriter = nullptr;
|
||||
Program *pHeaderProgObj;
|
||||
size_t compileDataSize;
|
||||
char *pCompileData = nullptr;
|
||||
|
||||
do {
|
||||
if (((deviceList == nullptr) && (numDevices != 0)) ||
|
||||
((deviceList != nullptr) && (numDevices == 0))) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (numInputHeaders == 0) {
|
||||
if ((headerIncludeNames != nullptr) || (inputHeaders != nullptr)) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if ((headerIncludeNames == nullptr) || (inputHeaders == nullptr)) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ((funcNotify == nullptr) &&
|
||||
(userData != nullptr)) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
// if a device_list is specified, make sure it points to our device
|
||||
// NOTE: a null device_list is ok - it means "all devices"
|
||||
if ((deviceList != nullptr) && validateObject(*deviceList) != CL_SUCCESS) {
|
||||
retVal = CL_INVALID_DEVICE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (buildStatus == CL_BUILD_IN_PROGRESS) {
|
||||
retVal = CL_INVALID_OPERATION;
|
||||
break;
|
||||
}
|
||||
|
||||
buildStatus = CL_BUILD_IN_PROGRESS;
|
||||
|
||||
options = (buildOptions != nullptr) ? buildOptions : "";
|
||||
std::string reraStr = "-cl-intel-gtpin-rera";
|
||||
size_t pos = options.find(reraStr);
|
||||
if (pos != std::string::npos) {
|
||||
// compile option "-cl-intel-gtpin-rera" is present, move it to internalOptions
|
||||
size_t reraLen = reraStr.length();
|
||||
options.erase(pos, reraLen);
|
||||
internalOptions.append(reraStr);
|
||||
internalOptions.append(" ");
|
||||
}
|
||||
|
||||
// create ELF writer to process all sources to be compiled
|
||||
pElfWriter = CLElfLib::CElfWriter::create(CLElfLib::EH_TYPE_OPENCL_SOURCE, CLElfLib::EH_MACHINE_NONE, 0);
|
||||
UNRECOVERABLE_IF(pElfWriter == nullptr);
|
||||
|
||||
CLElfLib::SSectionNode sectionNode;
|
||||
|
||||
// create main section
|
||||
sectionNode.Name = "CLMain";
|
||||
sectionNode.pData = (char *)sourceCode.c_str();
|
||||
sectionNode.DataSize = (unsigned int)(strlen(sourceCode.c_str()) + 1);
|
||||
sectionNode.Flags = 0;
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_SOURCE;
|
||||
|
||||
// add main program's source
|
||||
pElfWriter->addSection(§ionNode);
|
||||
|
||||
for (cl_uint i = 0; i < numInputHeaders; i++) {
|
||||
program = inputHeaders[i];
|
||||
if (program == nullptr) {
|
||||
retVal = CL_INVALID_PROGRAM;
|
||||
break;
|
||||
}
|
||||
pHeaderProgObj = castToObject<Program>(program);
|
||||
if (pHeaderProgObj == nullptr) {
|
||||
retVal = CL_INVALID_PROGRAM;
|
||||
break;
|
||||
}
|
||||
sectionNode.Name = headerIncludeNames[i];
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_HEADER;
|
||||
sectionNode.Flags = 0;
|
||||
// collect required data from the header
|
||||
retVal = pHeaderProgObj->getSource(sectionNode.pData, sectionNode.DataSize);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
pElfWriter->addSection(§ionNode);
|
||||
}
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
pElfWriter->resolveBinary(nullptr, compileDataSize);
|
||||
pCompileData = new char[compileDataSize];
|
||||
pElfWriter->resolveBinary(pCompileData, compileDataSize);
|
||||
|
||||
CompilerInterface *pCompilerInterface = getCompilerInterface();
|
||||
if (!pCompilerInterface) {
|
||||
retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
TranslationArgs inputArgs = {};
|
||||
|
||||
// set parameters for compilation
|
||||
internalOptions.append(platform()->getCompilerExtensions());
|
||||
inputArgs.pInput = pCompileData;
|
||||
inputArgs.InputSize = (uint32_t)compileDataSize;
|
||||
inputArgs.pOptions = options.c_str();
|
||||
inputArgs.OptionsSize = (uint32_t)options.length();
|
||||
inputArgs.pInternalOptions = internalOptions.c_str();
|
||||
inputArgs.InternalOptionsSize = (uint32_t)internalOptions.length();
|
||||
inputArgs.pTracingOptions = nullptr;
|
||||
inputArgs.TracingOptionsCount = 0;
|
||||
|
||||
retVal = pCompilerInterface->compile(*this, inputArgs);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
updateNonUniformFlag();
|
||||
} while (false);
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
buildStatus = CL_BUILD_ERROR;
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE;
|
||||
} else {
|
||||
buildStatus = CL_BUILD_SUCCESS;
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
|
||||
}
|
||||
|
||||
CLElfLib::CElfWriter::destroy(pElfWriter);
|
||||
delete[] pCompileData;
|
||||
internalOptions.clear();
|
||||
|
||||
if (funcNotify != nullptr) {
|
||||
(*funcNotify)(this, userData);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
} // namespace OCLRT
|
31
runtime/program/create.cpp
Normal file
31
runtime/program/create.cpp
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/program/create.inl"
|
||||
#include "runtime/program/program.h"
|
||||
|
||||
namespace OCLRT {
|
||||
template Program *Program::create<Program>(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int &);
|
||||
template Program *Program::create<Program>(cl_context, cl_uint, const char **, const size_t *, cl_int &);
|
||||
template Program *Program::create<Program>(const char *, Context *, Device &, bool, cl_int *);
|
||||
template Program *Program::createFromIL<Program>(Context *, const void *, size_t length, cl_int &);
|
||||
}
|
144
runtime/program/create.inl
Normal file
144
runtime/program/create.inl
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/program/program.h"
|
||||
#include "runtime/context/context.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename T>
|
||||
T *Program::create(
|
||||
cl_context context,
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *deviceList,
|
||||
const size_t *lengths,
|
||||
const unsigned char **binaries,
|
||||
cl_int *binaryStatus,
|
||||
cl_int &errcodeRet) {
|
||||
auto pContext = castToObject<Context>(context);
|
||||
DEBUG_BREAK_IF(!pContext);
|
||||
|
||||
auto program = new T(pContext);
|
||||
|
||||
auto retVal = program->createProgramFromBinary(binaries[0], lengths[0]);
|
||||
|
||||
if (binaryStatus) {
|
||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||
*binaryStatus = CL_SUCCESS;
|
||||
}
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
delete program;
|
||||
program = nullptr;
|
||||
}
|
||||
|
||||
errcodeRet = retVal;
|
||||
return program;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T *Program::create(
|
||||
cl_context context,
|
||||
cl_uint count,
|
||||
const char **strings,
|
||||
const size_t *lengths,
|
||||
cl_int &errcodeRet) {
|
||||
std::string combinedString;
|
||||
size_t combinedStringSize = 0;
|
||||
T *program = nullptr;
|
||||
auto pContext = castToObject<Context>(context);
|
||||
DEBUG_BREAK_IF(!pContext);
|
||||
|
||||
auto retVal = createCombinedString(
|
||||
combinedString,
|
||||
combinedStringSize,
|
||||
count,
|
||||
strings,
|
||||
lengths);
|
||||
|
||||
if (CL_SUCCESS == retVal) {
|
||||
program = new T(pContext);
|
||||
program->sourceCode.swap(combinedString);
|
||||
}
|
||||
|
||||
errcodeRet = retVal;
|
||||
return program;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T *Program::create(
|
||||
const char *nullTerminatedString,
|
||||
Context *context,
|
||||
Device &device,
|
||||
bool isBuiltIn,
|
||||
cl_int *errcodeRet) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
T *program = nullptr;
|
||||
|
||||
if (nullTerminatedString == nullptr) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
program = new T();
|
||||
program->setSource((char *)nullTerminatedString);
|
||||
program->context = context;
|
||||
program->isBuiltIn = isBuiltIn;
|
||||
if (program->context && !program->isBuiltIn) {
|
||||
program->context->incRefInternal();
|
||||
}
|
||||
program->pDevice = &device;
|
||||
program->numDevices = 1;
|
||||
if (is32bit || DebugManager.flags.DisableStatelessToStatefulOptimization.get()) {
|
||||
program->internalOptions += "-cl-intel-greater-than-4GB-buffer-required";
|
||||
}
|
||||
}
|
||||
|
||||
if (errcodeRet) {
|
||||
*errcodeRet = retVal;
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T *Program::createFromIL(Context *ctx,
|
||||
const void *il,
|
||||
size_t length,
|
||||
cl_int &errcodeRet) {
|
||||
errcodeRet = CL_SUCCESS;
|
||||
|
||||
if ((il == nullptr) || (length == 0)) {
|
||||
errcodeRet = CL_INVALID_BINARY;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
T *program = new T(ctx, false);
|
||||
errcodeRet = program->createProgramFromBinary(il, length);
|
||||
if (errcodeRet != CL_SUCCESS) {
|
||||
delete program;
|
||||
program = nullptr;
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
} // namespace OCLRT
|
31
runtime/program/evaluate_unhandled_token.cpp
Normal file
31
runtime/program/evaluate_unhandled_token.cpp
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/program/program.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
bool Program::isSafeToSkipUnhandledToken(unsigned int token) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
224
runtime/program/get_info.cpp
Normal file
224
runtime/program/get_info.cpp
Normal file
@ -0,0 +1,224 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/context/context.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
#include "runtime/helpers/get_info.h"
|
||||
#include "runtime/helpers/validators.h"
|
||||
#include "program.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
cl_int Program::getInfo(cl_program_info paramName, size_t paramValueSize,
|
||||
void *paramValue, size_t *paramValueSizeRet) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
const void *pSrc = nullptr;
|
||||
size_t srcSize = 0;
|
||||
size_t retSize = 0;
|
||||
std::string kernelNamesString;
|
||||
cl_device_id device_id = pDevice;
|
||||
cl_uint refCount = 0;
|
||||
size_t numKernels;
|
||||
cl_context clContext = context;
|
||||
|
||||
switch (paramName) {
|
||||
case CL_PROGRAM_CONTEXT:
|
||||
pSrc = &clContext;
|
||||
retSize = srcSize = sizeof(clContext);
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_BINARIES:
|
||||
resolveProgramBinary();
|
||||
pSrc = elfBinary;
|
||||
retSize = sizeof(void **);
|
||||
srcSize = elfBinarySize;
|
||||
if (paramValue != nullptr) {
|
||||
if (paramValueSize < retSize) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
paramValueSize = srcSize;
|
||||
paramValue = *(void **)paramValue;
|
||||
}
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_BINARY_SIZES:
|
||||
resolveProgramBinary();
|
||||
pSrc = &elfBinarySize;
|
||||
retSize = srcSize = sizeof(size_t *);
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_KERNEL_NAMES:
|
||||
kernelNamesString = getKernelNamesString();
|
||||
pSrc = kernelNamesString.c_str();
|
||||
retSize = srcSize = kernelNamesString.length() + 1;
|
||||
|
||||
if (buildStatus != CL_BUILD_SUCCESS) {
|
||||
retVal = CL_INVALID_PROGRAM_EXECUTABLE;
|
||||
}
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_NUM_KERNELS:
|
||||
numKernels = kernelInfoArray.size();
|
||||
pSrc = &numKernels;
|
||||
retSize = srcSize = sizeof(numKernels);
|
||||
|
||||
if (buildStatus != CL_BUILD_SUCCESS) {
|
||||
retVal = CL_INVALID_PROGRAM_EXECUTABLE;
|
||||
}
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_NUM_DEVICES:
|
||||
pSrc = &numDevices;
|
||||
retSize = srcSize = sizeof(cl_uint);
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_DEVICES:
|
||||
pSrc = &device_id;
|
||||
retSize = srcSize = sizeof(cl_device_id);
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_REFERENCE_COUNT:
|
||||
refCount = static_cast<cl_uint>(this->getReference());
|
||||
retSize = srcSize = sizeof(refCount);
|
||||
pSrc = &refCount;
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_SOURCE:
|
||||
pSrc = sourceCode.c_str();
|
||||
retSize = srcSize = strlen(sourceCode.c_str()) + 1;
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_IL:
|
||||
pSrc = sourceCode.data();
|
||||
retSize = srcSize = sourceCode.size();
|
||||
if (!Program::isValidSpirvBinary(pSrc, srcSize)) {
|
||||
if (paramValueSizeRet) {
|
||||
*paramValueSizeRet = 0;
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_DEBUG_INFO_SIZES_INTEL:
|
||||
resolveProgramBinary();
|
||||
retSize = srcSize = sizeof(debugDataSize);
|
||||
pSrc = &debugDataSize;
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_DEBUG_INFO_INTEL:
|
||||
resolveProgramBinary();
|
||||
pSrc = debugData;
|
||||
retSize = numDevices * sizeof(void **);
|
||||
srcSize = debugDataSize;
|
||||
if (paramValue != nullptr) {
|
||||
if (paramValueSize < retSize) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
paramValueSize = srcSize;
|
||||
paramValue = *(void **)paramValue;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
retVal = (retVal == CL_SUCCESS)
|
||||
? ::getInfo(paramValue, paramValueSize, pSrc, srcSize)
|
||||
: retVal;
|
||||
if (paramValueSizeRet) {
|
||||
*paramValueSizeRet = retSize;
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int Program::getBuildInfo(cl_device_id device, cl_program_build_info paramName,
|
||||
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
const void *pSrc = nullptr;
|
||||
size_t srcSize = 0;
|
||||
size_t retSize = 0;
|
||||
cl_device_id device_id = pDevice;
|
||||
|
||||
if (device != device_id) {
|
||||
return CL_INVALID_DEVICE;
|
||||
}
|
||||
|
||||
retVal = validateObjects(device);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
return CL_INVALID_DEVICE;
|
||||
}
|
||||
|
||||
auto pDev = castToObject<Device>(device);
|
||||
|
||||
switch (paramName) {
|
||||
case CL_PROGRAM_BUILD_STATUS:
|
||||
srcSize = retSize = sizeof(cl_build_status);
|
||||
pSrc = &buildStatus;
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_BUILD_OPTIONS:
|
||||
srcSize = retSize = strlen(options.c_str()) + 1;
|
||||
pSrc = options.c_str();
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_BUILD_LOG: {
|
||||
const char *pBuildLog = getBuildLog(pDev);
|
||||
|
||||
if (pBuildLog != nullptr) {
|
||||
pSrc = pBuildLog;
|
||||
srcSize = retSize = strlen(pBuildLog) + 1;
|
||||
} else {
|
||||
pSrc = "";
|
||||
srcSize = retSize = 1;
|
||||
}
|
||||
} break;
|
||||
|
||||
case CL_PROGRAM_BINARY_TYPE:
|
||||
srcSize = retSize = sizeof(cl_program_binary_type);
|
||||
pSrc = &programBinaryType;
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE:
|
||||
pSrc = &globalVarTotalSize;
|
||||
retSize = srcSize = sizeof(size_t);
|
||||
break;
|
||||
|
||||
default:
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
retVal = (retVal == CL_SUCCESS)
|
||||
? ::getInfo(paramValue, paramValueSize, pSrc, srcSize)
|
||||
: retVal;
|
||||
|
||||
if (paramValueSizeRet) {
|
||||
*paramValueSizeRet = retSize;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
} // namespace OCLRT
|
51
runtime/program/heap_info.h
Normal file
51
runtime/program/heap_info.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include "patch_info.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
struct HeapInfo {
|
||||
const SKernelBinaryHeaderCommon *pKernelHeader;
|
||||
const void *pKernelHeap;
|
||||
const void *pGsh;
|
||||
const void *pDsh;
|
||||
void *pSsh;
|
||||
const void *pPatchList;
|
||||
const void *pBlob;
|
||||
size_t blobSize;
|
||||
|
||||
HeapInfo() {
|
||||
pKernelHeader = nullptr;
|
||||
pKernelHeap = nullptr;
|
||||
pGsh = nullptr;
|
||||
pDsh = nullptr;
|
||||
pSsh = nullptr;
|
||||
pPatchList = nullptr;
|
||||
pBlob = nullptr;
|
||||
blobSize = 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OCLRT
|
78
runtime/program/kernel_arg_info.h
Normal file
78
runtime/program/kernel_arg_info.h
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "config.h"
|
||||
#include "CL/cl.h"
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
struct KernelArgPatchInfo {
|
||||
uint32_t crossthreadOffset = 0;
|
||||
uint32_t size = 0;
|
||||
uint32_t sourceOffset = 0;
|
||||
};
|
||||
|
||||
struct KernelArgInfo {
|
||||
static constexpr uint32_t undefinedOffset = (uint32_t)-1;
|
||||
|
||||
std::string name;
|
||||
std::string typeStr;
|
||||
std::string accessQualifierStr;
|
||||
std::string addressQualifierStr;
|
||||
std::string typeQualifierStr;
|
||||
uint32_t offsetHeap = 0;
|
||||
std::vector<KernelArgPatchInfo> kernelArgPatchInfoVector;
|
||||
uint32_t slmAlignment = 0;
|
||||
bool isImage = false;
|
||||
bool isMediaImage = false;
|
||||
bool isMediaBlockImage = false;
|
||||
bool isSampler = false;
|
||||
bool isAccelerator = false;
|
||||
bool isDeviceQueue = false;
|
||||
bool isBuffer = false;
|
||||
uint32_t samplerArgumentType = 0;
|
||||
uint32_t offsetImgWidth = undefinedOffset;
|
||||
uint32_t offsetImgHeight = undefinedOffset;
|
||||
uint32_t offsetImgDepth = undefinedOffset;
|
||||
uint32_t offsetChannelDataType = undefinedOffset;
|
||||
uint32_t offsetChannelOrder = undefinedOffset;
|
||||
uint32_t offsetArraySize = undefinedOffset;
|
||||
uint32_t offsetNumSamples = undefinedOffset;
|
||||
uint32_t offsetSamplerSnapWa = undefinedOffset;
|
||||
uint32_t offsetSamplerAddressingMode = undefinedOffset;
|
||||
uint32_t offsetSamplerNormalizedCoords = undefinedOffset;
|
||||
uint32_t offsetVmeMbBlockType = undefinedOffset;
|
||||
uint32_t offsetVmeSubpixelMode = undefinedOffset;
|
||||
uint32_t offsetVmeSadAdjustMode = undefinedOffset;
|
||||
uint32_t offsetVmeSearchPathType = undefinedOffset;
|
||||
uint32_t offsetObjectId = undefinedOffset;
|
||||
uint32_t offsetBufferOffset = undefinedOffset;
|
||||
|
||||
bool needPatch = false;
|
||||
|
||||
cl_kernel_arg_access_qualifier accessQualifier = CL_KERNEL_ARG_ACCESS_NONE;
|
||||
cl_kernel_arg_address_qualifier addressQualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
cl_kernel_arg_type_qualifier typeQualifier = CL_KERNEL_ARG_TYPE_NONE;
|
||||
|
||||
KernelArgInfo() = default;
|
||||
};
|
499
runtime/program/kernel_info.cpp
Normal file
499
runtime/program/kernel_info.cpp
Normal file
@ -0,0 +1,499 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/ptr_math.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "runtime/sampler/sampler.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <sstream>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
const uint32_t WorkloadInfo::undefinedOffset = (uint32_t)-1;
|
||||
const uint32_t WorkloadInfo::invalidParentEvent = (uint32_t)-1;
|
||||
|
||||
std::unordered_map<std::string, uint32_t> accessQualifierMap = {
|
||||
{"", CL_KERNEL_ARG_ACCESS_NONE},
|
||||
{"NONE", CL_KERNEL_ARG_ACCESS_NONE},
|
||||
{"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
||||
{"__read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
||||
{"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
||||
{"__write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
||||
{"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE},
|
||||
{"__read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, uint32_t> addressQualifierMap = {
|
||||
{"", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"__global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"__local", CL_KERNEL_ARG_ADDRESS_LOCAL},
|
||||
{"__private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
||||
{"__constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
|
||||
{"not_specified", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
||||
};
|
||||
|
||||
struct KernelArgumentType {
|
||||
const char *argTypeQualifier;
|
||||
uint64_t argTypeQualifierValue;
|
||||
};
|
||||
|
||||
constexpr KernelArgumentType typeQualifiers[] = {
|
||||
{"const", CL_KERNEL_ARG_TYPE_CONST},
|
||||
{"volatile", CL_KERNEL_ARG_TYPE_VOLATILE},
|
||||
{"restrict", CL_KERNEL_ARG_TYPE_RESTRICT},
|
||||
{"pipe", CL_KERNEL_ARG_TYPE_PIPE},
|
||||
};
|
||||
|
||||
std::map<std::string, size_t> typeSizeMap = {
|
||||
{"char", sizeof(cl_char)},
|
||||
{"char2", sizeof(cl_char2)},
|
||||
{"char3", sizeof(cl_char3)},
|
||||
{"char4", sizeof(cl_char4)},
|
||||
{"char8", sizeof(cl_char8)},
|
||||
{"char16", sizeof(cl_char16)},
|
||||
|
||||
{"uchar", sizeof(cl_uchar)},
|
||||
{"uchar2", sizeof(cl_uchar2)},
|
||||
{"uchar3", sizeof(cl_uchar3)},
|
||||
{"uchar4", sizeof(cl_uchar4)},
|
||||
{"uchar8", sizeof(cl_uchar8)},
|
||||
{"uchar16", sizeof(cl_uchar16)},
|
||||
|
||||
{"short", sizeof(cl_short)},
|
||||
{"short2", sizeof(cl_short2)},
|
||||
{"short3", sizeof(cl_short3)},
|
||||
{"short4", sizeof(cl_short4)},
|
||||
{"short8", sizeof(cl_short8)},
|
||||
{"short16", sizeof(cl_short16)},
|
||||
|
||||
{"ushort", sizeof(cl_ushort)},
|
||||
{"ushort2", sizeof(cl_ushort2)},
|
||||
{"ushort3", sizeof(cl_ushort3)},
|
||||
{"ushort4", sizeof(cl_ushort4)},
|
||||
{"ushort8", sizeof(cl_ushort8)},
|
||||
{"ushort16", sizeof(cl_ushort16)},
|
||||
|
||||
{"int", sizeof(cl_int)},
|
||||
{"int2", sizeof(cl_int2)},
|
||||
{"int3", sizeof(cl_int3)},
|
||||
{"int4", sizeof(cl_int4)},
|
||||
{"int8", sizeof(cl_int8)},
|
||||
{"int16", sizeof(cl_int16)},
|
||||
|
||||
{"uint", sizeof(cl_uint)},
|
||||
{"uint2", sizeof(cl_uint2)},
|
||||
{"uint3", sizeof(cl_uint3)},
|
||||
{"uint4", sizeof(cl_uint4)},
|
||||
{"uint8", sizeof(cl_uint8)},
|
||||
{"uint16", sizeof(cl_uint16)},
|
||||
|
||||
{"long", sizeof(cl_long)},
|
||||
{"long2", sizeof(cl_long2)},
|
||||
{"long3", sizeof(cl_long3)},
|
||||
{"long4", sizeof(cl_long4)},
|
||||
{"long8", sizeof(cl_long8)},
|
||||
{"long16", sizeof(cl_long16)},
|
||||
|
||||
{"ulong", sizeof(cl_ulong)},
|
||||
{"ulong2", sizeof(cl_ulong2)},
|
||||
{"ulong3", sizeof(cl_ulong3)},
|
||||
{"ulong4", sizeof(cl_ulong4)},
|
||||
{"ulong8", sizeof(cl_ulong8)},
|
||||
{"ulong16", sizeof(cl_ulong16)},
|
||||
|
||||
{"half", sizeof(cl_half)},
|
||||
|
||||
{"float", sizeof(cl_float)},
|
||||
{"float2", sizeof(cl_float2)},
|
||||
{"float3", sizeof(cl_float3)},
|
||||
{"float4", sizeof(cl_float4)},
|
||||
{"float8", sizeof(cl_float8)},
|
||||
{"float16", sizeof(cl_float16)},
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
{"half2", sizeof(cl_half2)},
|
||||
{"half3", sizeof(cl_half3)},
|
||||
{"half4", sizeof(cl_half4)},
|
||||
{"half8", sizeof(cl_half8)},
|
||||
{"half16", sizeof(cl_half16)},
|
||||
#endif
|
||||
|
||||
{"double", sizeof(cl_double)},
|
||||
{"double2", sizeof(cl_double2)},
|
||||
{"double3", sizeof(cl_double3)},
|
||||
{"double4", sizeof(cl_double4)},
|
||||
{"double8", sizeof(cl_double8)},
|
||||
{"double16", sizeof(cl_double16)},
|
||||
};
|
||||
WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, uint32_t hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, GFXCORE_FAMILY coreFamily, uint32_t numThreadsPerSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface) {
|
||||
this->maxWorkGroupSize = maxWorkGroupSize;
|
||||
this->hasBarriers = hasBarriers;
|
||||
this->simdSize = simdSize;
|
||||
this->slmTotalSize = slmTotalSize;
|
||||
this->coreFamily = coreFamily;
|
||||
this->numThreadsPerSlice = numThreadsPerSlice;
|
||||
this->localMemSize = localMemSize;
|
||||
this->imgUsed = imgUsed;
|
||||
this->yTiledSurfaces = yTiledSurface;
|
||||
setMinWorkGroupSize();
|
||||
}
|
||||
WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) {
|
||||
this->maxWorkGroupSize = (uint32_t)dispatchInfo.getKernel()->getDevice().getDeviceInfo().maxWorkGroupSize;
|
||||
this->hasBarriers = (uint32_t)dispatchInfo.getKernel()->getKernelInfo().patchInfo.executionEnvironment->HasBarriers;
|
||||
this->simdSize = (uint32_t)dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize();
|
||||
this->slmTotalSize = (uint32_t)dispatchInfo.getKernel()->slmTotalSize;
|
||||
this->coreFamily = dispatchInfo.getKernel()->getDevice().getHardwareInfo().pPlatform->eRenderCoreFamily;
|
||||
this->numThreadsPerSlice = (uint32_t)dispatchInfo.getKernel()->getDevice().getDeviceInfo().maxNumEUsPerSubSlice;
|
||||
this->localMemSize = (uint32_t)dispatchInfo.getKernel()->getDevice().getDeviceInfo().localMemSize;
|
||||
setIfUseImg(dispatchInfo.getKernel());
|
||||
setMinWorkGroupSize();
|
||||
}
|
||||
void WorkSizeInfo::setIfUseImg(Kernel *pKernel) {
|
||||
auto ParamsCount = pKernel->getKernelArgsNumber();
|
||||
for (auto i = 0u; i < ParamsCount; i++) {
|
||||
if (pKernel->getKernelInfo().kernelArgInfo[i].isImage) {
|
||||
imgUsed = true;
|
||||
yTiledSurfaces = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
void WorkSizeInfo::setMinWorkGroupSize() {
|
||||
minWorkGroupSize = 0;
|
||||
if (hasBarriers > 0) {
|
||||
uint32_t maxBarriersPerHSlice = (coreFamily >= IGFX_GEN9_CORE) ? 32 : 16;
|
||||
minWorkGroupSize = numThreadsPerSlice * simdSize / maxBarriersPerHSlice;
|
||||
}
|
||||
if (slmTotalSize > 0) {
|
||||
minWorkGroupSize = std::max(maxWorkGroupSize / ((localMemSize / slmTotalSize)), minWorkGroupSize);
|
||||
}
|
||||
}
|
||||
void WorkSizeInfo::checkRatio(const size_t workItems[3]) {
|
||||
if (slmTotalSize > 0) {
|
||||
useRatio = true;
|
||||
targetRatio = log((float)workItems[0]) - log((float)workItems[1]);
|
||||
useStrictRatio = false;
|
||||
} else if (yTiledSurfaces == true) {
|
||||
useRatio = true;
|
||||
targetRatio = YTilingRatioValue;
|
||||
useStrictRatio = true;
|
||||
}
|
||||
}
|
||||
|
||||
KernelInfo *KernelInfo::create() {
|
||||
return new KernelInfo();
|
||||
}
|
||||
|
||||
KernelInfo::~KernelInfo() {
|
||||
kernelArgInfo.clear();
|
||||
|
||||
for (auto &stringData : patchInfo.stringDataMap) {
|
||||
delete[] stringData.second.pStringData;
|
||||
}
|
||||
patchInfo.stringDataMap.clear();
|
||||
delete[] crossThreadData;
|
||||
}
|
||||
|
||||
cl_int KernelInfo::storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
if (pkernelArgInfo == nullptr) {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
} else {
|
||||
uint32_t argNum = pkernelArgInfo->ArgumentNumber;
|
||||
auto pCurArgAttrib = ptrOffset(
|
||||
reinterpret_cast<const char *>(pkernelArgInfo),
|
||||
sizeof(SPatchKernelArgumentInfo));
|
||||
|
||||
resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
|
||||
kernelArgInfo[argNum].addressQualifierStr = pCurArgAttrib;
|
||||
pCurArgAttrib += pkernelArgInfo->AddressQualifierSize;
|
||||
|
||||
kernelArgInfo[argNum].accessQualifierStr = pCurArgAttrib;
|
||||
pCurArgAttrib += pkernelArgInfo->AccessQualifierSize;
|
||||
|
||||
kernelArgInfo[argNum].name = pCurArgAttrib;
|
||||
pCurArgAttrib += pkernelArgInfo->ArgumentNameSize;
|
||||
|
||||
{
|
||||
auto argType = strchr(pCurArgAttrib, ';');
|
||||
DEBUG_BREAK_IF(argType == nullptr);
|
||||
|
||||
kernelArgInfo[argNum].typeStr.assign(pCurArgAttrib, argType - pCurArgAttrib);
|
||||
pCurArgAttrib += pkernelArgInfo->TypeNameSize;
|
||||
|
||||
++argType;
|
||||
}
|
||||
|
||||
kernelArgInfo[argNum].typeQualifierStr = pCurArgAttrib;
|
||||
|
||||
patchInfo.kernelArgumentInfo.push_back(pkernelArgInfo);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgument(
|
||||
const SPatchDataParameterBuffer *pDataParameterKernelArg) {
|
||||
uint32_t argNum = pDataParameterKernelArg->ArgumentNumber;
|
||||
uint32_t dataSize = pDataParameterKernelArg->DataSize;
|
||||
uint32_t offset = pDataParameterKernelArg->Offset;
|
||||
uint32_t sourceOffset = pDataParameterKernelArg->SourceOffset;
|
||||
|
||||
storeKernelArgPatchInfo(argNum, dataSize, offset, sourceOffset, 0);
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgument(
|
||||
const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg) {
|
||||
uint32_t argNum = pStatelessGlobalKernelArg->ArgumentNumber;
|
||||
uint32_t offsetSSH = pStatelessGlobalKernelArg->SurfaceStateHeapOffset;
|
||||
|
||||
usesSsh |= true;
|
||||
storeKernelArgPatchInfo(argNum, pStatelessGlobalKernelArg->DataParamSize, pStatelessGlobalKernelArg->DataParamOffset, 0, offsetSSH);
|
||||
kernelArgInfo[argNum].isBuffer = true;
|
||||
patchInfo.statelessGlobalMemObjKernelArgs.push_back(pStatelessGlobalKernelArg);
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgument(
|
||||
const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg) {
|
||||
uint32_t argNum = pImageMemObjKernelArg->ArgumentNumber;
|
||||
uint32_t offsetSurfaceState = pImageMemObjKernelArg->Offset;
|
||||
|
||||
usesSsh |= true;
|
||||
storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState);
|
||||
kernelArgInfo[argNum].isImage = true;
|
||||
|
||||
if (pImageMemObjKernelArg->Type == iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA) {
|
||||
kernelArgInfo[argNum].isMediaImage = true;
|
||||
}
|
||||
|
||||
if (pImageMemObjKernelArg->Type == iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA_BLOCK) {
|
||||
kernelArgInfo[argNum].isMediaBlockImage = true;
|
||||
}
|
||||
|
||||
kernelArgInfo[argNum].accessQualifier = pImageMemObjKernelArg->Writeable
|
||||
? CL_KERNEL_ARG_ACCESS_READ_WRITE
|
||||
: CL_KERNEL_ARG_ACCESS_READ_ONLY;
|
||||
patchInfo.imageMemObjKernelArgs.push_back(pImageMemObjKernelArg);
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgument(
|
||||
const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjKernelArg) {
|
||||
uint32_t argNum = pGlobalMemObjKernelArg->ArgumentNumber;
|
||||
uint32_t offsetSurfaceState = pGlobalMemObjKernelArg->Offset;
|
||||
|
||||
usesSsh |= true;
|
||||
storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState);
|
||||
kernelArgInfo[argNum].isBuffer = true;
|
||||
|
||||
patchInfo.globalMemObjKernelArgs.push_back(pGlobalMemObjKernelArg);
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgument(
|
||||
const SPatchSamplerKernelArgument *pSamplerArgument) {
|
||||
uint32_t argNum = pSamplerArgument->ArgumentNumber;
|
||||
uint32_t offsetSurfaceState = pSamplerArgument->Offset;
|
||||
|
||||
storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState);
|
||||
kernelArgInfo[argNum].samplerArgumentType = pSamplerArgument->Type;
|
||||
|
||||
if (this->name == "ve_enhance_intel" ||
|
||||
this->name == "ve_dn_enhance_intel" ||
|
||||
this->name == "ve_dn_di_enhance_intel") {
|
||||
kernelArgInfo[argNum].isAccelerator = true;
|
||||
kernelArgInfo[argNum].samplerArgumentType = iOpenCL::SAMPLER_OBJECT_VE;
|
||||
} else if (pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_TEXTURE) {
|
||||
DEBUG_BREAK_IF(pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_VME &&
|
||||
pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_VE &&
|
||||
pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_VD);
|
||||
kernelArgInfo[argNum].isAccelerator = true;
|
||||
isVmeWorkload = true;
|
||||
} else {
|
||||
kernelArgInfo[argNum].isSampler = true;
|
||||
}
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgument(
|
||||
const SPatchStatelessConstantMemoryObjectKernelArgument *pStatelessConstMemObjKernelArg) {
|
||||
uint32_t argNum = pStatelessConstMemObjKernelArg->ArgumentNumber;
|
||||
uint32_t offsetSSH = pStatelessConstMemObjKernelArg->SurfaceStateHeapOffset;
|
||||
|
||||
usesSsh |= true;
|
||||
storeKernelArgPatchInfo(argNum, pStatelessConstMemObjKernelArg->DataParamSize, pStatelessConstMemObjKernelArg->DataParamOffset, 0, offsetSSH);
|
||||
kernelArgInfo[argNum].isBuffer = true;
|
||||
patchInfo.statelessGlobalMemObjKernelArgs.push_back(reinterpret_cast<const SPatchStatelessGlobalMemoryObjectKernelArgument *>(pStatelessConstMemObjKernelArg));
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg) {
|
||||
uint32_t argNum = pStatelessDeviceQueueKernelArg->ArgumentNumber;
|
||||
|
||||
resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelArgInfo[argNum].isDeviceQueue = true;
|
||||
|
||||
storeKernelArgPatchInfo(argNum, pStatelessDeviceQueueKernelArg->DataParamSize, pStatelessDeviceQueueKernelArg->DataParamOffset, 0, pStatelessDeviceQueueKernelArg->SurfaceStateHeapOffset);
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(
|
||||
const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateStatelessPrivateSurface = pStatelessPrivateSurfaceArg;
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = pStatelessConstantMemorySurfaceWithInitializationArg;
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = pStatelessGlobalMemorySurfaceWithInitializationArg;
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchAllocateStatelessPrintfSurface *pStatelessPrintfSurfaceArg) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateStatelessPrintfSurface = pStatelessPrintfSurfaceArg;
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchAllocateStatelessEventPoolSurface *pStatelessEventPoolSurfaceArg) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateStatelessEventPoolSurface = pStatelessEventPoolSurfaceArg;
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = pStatelessDefaultDeviceQueueSurfaceArg;
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchString *pStringArg) {
|
||||
uint32_t stringIndex = pStringArg->Index;
|
||||
PrintfStringInfo printfStringInfo;
|
||||
printfStringInfo.SizeInBytes = pStringArg->StringSize;
|
||||
if (printfStringInfo.SizeInBytes) {
|
||||
printfStringInfo.pStringData = new char[printfStringInfo.SizeInBytes];
|
||||
if (printfStringInfo.pStringData != nullptr) {
|
||||
memcpy_s(printfStringInfo.pStringData, printfStringInfo.SizeInBytes, (cl_char *)pStringArg + sizeof(SPatchString), printfStringInfo.SizeInBytes);
|
||||
patchInfo.stringDataMap.insert(std::pair<uint32_t, PrintfStringInfo>(stringIndex, printfStringInfo));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo) {
|
||||
attributes = reinterpret_cast<const char *>(pKernelAttributesInfo) + sizeof(SPatchKernelAttributesInfo);
|
||||
|
||||
auto start = attributes.find("intel_reqd_sub_group_size(");
|
||||
if (start != std::string::npos) {
|
||||
start += strlen("intel_reqd_sub_group_size(");
|
||||
auto stop = attributes.find(")", start);
|
||||
std::stringstream requiredSubGroupSizeStr(attributes.substr(start, stop - start));
|
||||
requiredSubGroupSizeStr >> requiredSubGroupSize;
|
||||
}
|
||||
}
|
||||
|
||||
const char *KernelInfo::queryPrintfString(uint32_t index) const {
|
||||
auto printfInfo = patchInfo.stringDataMap.find(index);
|
||||
return printfInfo == patchInfo.stringDataMap.end() ? nullptr : printfInfo->second.pStringData;
|
||||
}
|
||||
|
||||
cl_int KernelInfo::resolveKernelInfo() {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unordered_map<std::string, uint32_t>::iterator iterUint;
|
||||
std::unordered_map<std::string, size_t>::iterator iterSizeT;
|
||||
|
||||
for (auto &argInfo : kernelArgInfo) {
|
||||
iterUint = accessQualifierMap.find(argInfo.accessQualifierStr);
|
||||
if (iterUint != accessQualifierMap.end()) {
|
||||
argInfo.accessQualifier = iterUint->second;
|
||||
} else {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
break;
|
||||
}
|
||||
|
||||
iterUint = addressQualifierMap.find(argInfo.addressQualifierStr);
|
||||
if (iterUint != addressQualifierMap.end()) {
|
||||
argInfo.addressQualifier = iterUint->second;
|
||||
} else {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
break;
|
||||
}
|
||||
|
||||
auto qualifierCount = sizeof(typeQualifiers) / sizeof(typeQualifiers[0]);
|
||||
|
||||
for (auto qualifierId = 0u; qualifierId < qualifierCount; qualifierId++) {
|
||||
if (strstr(argInfo.typeQualifierStr.c_str(), typeQualifiers[qualifierId].argTypeQualifier) != nullptr) {
|
||||
argInfo.typeQualifier |= typeQualifiers[qualifierId].argTypeQualifierValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t dataOffset, uint32_t sourceOffset, uint32_t offsetSSH) {
|
||||
resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
kernelArgPatchInfo.crossthreadOffset = dataOffset;
|
||||
kernelArgPatchInfo.size = dataSize;
|
||||
kernelArgPatchInfo.sourceOffset = sourceOffset;
|
||||
|
||||
kernelArgInfo[argNum].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo);
|
||||
kernelArgInfo[argNum].offsetHeap = offsetSSH;
|
||||
}
|
||||
|
||||
size_t KernelInfo::getSamplerStateArrayCount() const {
|
||||
size_t count = patchInfo.samplerStateArray ? (size_t)patchInfo.samplerStateArray->Count : 0;
|
||||
return count;
|
||||
}
|
||||
size_t KernelInfo::getSamplerStateArraySize(const HardwareInfo &hwInfo) const {
|
||||
size_t samplerStateArraySize = getSamplerStateArrayCount() * Sampler::getSamplerStateSize(hwInfo);
|
||||
return samplerStateArraySize;
|
||||
}
|
||||
|
||||
size_t KernelInfo::getBorderColorStateSize() const {
|
||||
size_t borderColorSize = 0;
|
||||
if (patchInfo.samplerStateArray) {
|
||||
borderColorSize = patchInfo.samplerStateArray->Offset - patchInfo.samplerStateArray->BorderColorOffset;
|
||||
}
|
||||
return borderColorSize;
|
||||
}
|
||||
|
||||
size_t KernelInfo::getBorderColorOffset() const {
|
||||
size_t borderColorOffset = 0;
|
||||
if (patchInfo.samplerStateArray) {
|
||||
borderColorOffset = patchInfo.samplerStateArray->BorderColorOffset;
|
||||
}
|
||||
return borderColorOffset;
|
||||
}
|
||||
|
||||
uint32_t KernelInfo::getConstantBufferSize() const {
|
||||
return patchInfo.dataParameterStream ? patchInfo.dataParameterStream->DataParameterStreamSize : 0;
|
||||
}
|
||||
} // namespace OCLRT
|
239
runtime/program/kernel_info.h
Normal file
239
runtime/program/kernel_info.h
Normal file
@ -0,0 +1,239 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "config.h"
|
||||
#include "CL/cl.h"
|
||||
#include "heap_info.h"
|
||||
#include "kernel_arg_info.h"
|
||||
#include "patch_info.h"
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
|
||||
namespace OCLRT {
|
||||
class BuiltinDispatchInfoBuilder;
|
||||
class Device;
|
||||
class Kernel;
|
||||
struct KernelInfo;
|
||||
struct KernelArgumentType;
|
||||
|
||||
extern std::unordered_map<std::string, uint32_t> accessQualifierMap;
|
||||
extern std::unordered_map<std::string, uint32_t> addressQualifierMap;
|
||||
extern std::map<std::string, size_t> typeSizeMap;
|
||||
|
||||
struct WorkloadInfo {
|
||||
uint32_t globalWorkOffsetOffsets[3];
|
||||
uint32_t globalWorkSizeOffsets[3];
|
||||
uint32_t localWorkSizeOffsets[3];
|
||||
uint32_t localWorkSizeOffsets2[3];
|
||||
uint32_t enqueuedLocalWorkSizeOffsets[3];
|
||||
uint32_t numWorkGroupsOffset[3];
|
||||
uint32_t maxWorkGroupSizeOffset;
|
||||
uint32_t workDimOffset;
|
||||
uint32_t slmStaticSize = 0;
|
||||
uint32_t simdSizeOffset;
|
||||
uint32_t parentEventOffset;
|
||||
uint32_t prefferedWkgMultipleOffset;
|
||||
|
||||
static const uint32_t undefinedOffset;
|
||||
static const uint32_t invalidParentEvent;
|
||||
|
||||
WorkloadInfo() {
|
||||
globalWorkOffsetOffsets[0] = undefinedOffset;
|
||||
globalWorkOffsetOffsets[1] = undefinedOffset;
|
||||
globalWorkOffsetOffsets[2] = undefinedOffset;
|
||||
globalWorkSizeOffsets[0] = undefinedOffset;
|
||||
globalWorkSizeOffsets[1] = undefinedOffset;
|
||||
globalWorkSizeOffsets[2] = undefinedOffset;
|
||||
localWorkSizeOffsets[0] = undefinedOffset;
|
||||
localWorkSizeOffsets[1] = undefinedOffset;
|
||||
localWorkSizeOffsets[2] = undefinedOffset;
|
||||
localWorkSizeOffsets2[0] = undefinedOffset;
|
||||
localWorkSizeOffsets2[1] = undefinedOffset;
|
||||
localWorkSizeOffsets2[2] = undefinedOffset;
|
||||
enqueuedLocalWorkSizeOffsets[0] = undefinedOffset;
|
||||
enqueuedLocalWorkSizeOffsets[1] = undefinedOffset;
|
||||
enqueuedLocalWorkSizeOffsets[2] = undefinedOffset;
|
||||
numWorkGroupsOffset[0] = undefinedOffset;
|
||||
numWorkGroupsOffset[1] = undefinedOffset;
|
||||
numWorkGroupsOffset[2] = undefinedOffset;
|
||||
maxWorkGroupSizeOffset = undefinedOffset;
|
||||
workDimOffset = undefinedOffset;
|
||||
simdSizeOffset = undefinedOffset;
|
||||
parentEventOffset = undefinedOffset;
|
||||
prefferedWkgMultipleOffset = undefinedOffset;
|
||||
}
|
||||
};
|
||||
|
||||
static const float YTilingRatioValue = 1.3862943611198906188344642429164f;
|
||||
|
||||
struct WorkSizeInfo {
|
||||
|
||||
uint32_t maxWorkGroupSize;
|
||||
uint32_t minWorkGroupSize;
|
||||
uint32_t hasBarriers;
|
||||
uint32_t simdSize;
|
||||
uint32_t slmTotalSize;
|
||||
GFXCORE_FAMILY coreFamily;
|
||||
uint32_t numThreadsPerSlice;
|
||||
uint32_t localMemSize;
|
||||
bool imgUsed = false;
|
||||
bool yTiledSurfaces = false;
|
||||
bool useRatio = false;
|
||||
bool useStrictRatio = false;
|
||||
float targetRatio = 0;
|
||||
|
||||
WorkSizeInfo(uint32_t maxWorkGroupSize, uint32_t hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, GFXCORE_FAMILY coreFamily, uint32_t numThreadsPerSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface);
|
||||
WorkSizeInfo(const DispatchInfo &dispatchInfo);
|
||||
void setIfUseImg(Kernel *pKernel);
|
||||
void setMinWorkGroupSize();
|
||||
void checkRatio(const size_t workItems[3]);
|
||||
};
|
||||
|
||||
struct KernelInfo {
|
||||
public:
|
||||
static KernelInfo *create();
|
||||
KernelInfo() {
|
||||
heapInfo = {};
|
||||
patchInfo = {};
|
||||
workloadInfo = {};
|
||||
kernelArgInfo = {};
|
||||
kernelNonArgInfo = {};
|
||||
childrenKernelsIdOffset = {};
|
||||
reqdWorkGroupSize[0] = WorkloadInfo::undefinedOffset;
|
||||
reqdWorkGroupSize[1] = WorkloadInfo::undefinedOffset;
|
||||
reqdWorkGroupSize[2] = WorkloadInfo::undefinedOffset;
|
||||
}
|
||||
|
||||
KernelInfo(const KernelInfo &) = delete;
|
||||
KernelInfo &operator=(const KernelInfo &) = delete;
|
||||
|
||||
~KernelInfo();
|
||||
|
||||
cl_int storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo);
|
||||
void storeKernelArgument(const SPatchDataParameterBuffer *pDataParameterKernelArg);
|
||||
void storeKernelArgument(const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg);
|
||||
void storeKernelArgument(const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg);
|
||||
void storeKernelArgument(const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjKernelArg);
|
||||
void storeKernelArgument(const SPatchStatelessConstantMemoryObjectKernelArgument *pStatelessConstMemObjKernelArg);
|
||||
void storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg);
|
||||
void storeKernelArgument(const SPatchSamplerKernelArgument *pSamplerKernelArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessPrintfSurface *pStatelessPrintfSurfaceArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessEventPoolSurface *pStatelessEventPoolSurfaceArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg);
|
||||
void storePatchToken(const SPatchString *pStringArg);
|
||||
void storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo);
|
||||
cl_int resolveKernelInfo();
|
||||
void resizeKernelArgInfoAndRegisterParameter(uint32_t argCount) {
|
||||
if (kernelArgInfo.size() <= argCount) {
|
||||
kernelArgInfo.resize(argCount + 1);
|
||||
}
|
||||
if (!kernelArgInfo[argCount].needPatch) {
|
||||
kernelArgInfo[argCount].needPatch = true;
|
||||
argumentsToPatchNum++;
|
||||
}
|
||||
}
|
||||
|
||||
void storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t crossthreadOffset, uint32_t sourceOffset, uint32_t offsetSSH);
|
||||
|
||||
const char *queryPrintfString(uint32_t index) const;
|
||||
|
||||
size_t getSamplerStateArrayCount() const;
|
||||
size_t getSamplerStateArraySize(const HardwareInfo &hwInfo) const;
|
||||
size_t getBorderColorStateSize() const;
|
||||
size_t getBorderColorOffset() const;
|
||||
unsigned int getMaxSimdSize() const {
|
||||
const auto executionEnvironment = patchInfo.executionEnvironment;
|
||||
if (executionEnvironment == nullptr) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (executionEnvironment->CompiledSIMD32) {
|
||||
return 32;
|
||||
}
|
||||
|
||||
if (executionEnvironment->CompiledSIMD16) {
|
||||
return 16;
|
||||
}
|
||||
|
||||
return 8;
|
||||
}
|
||||
bool hasDeviceEnqueue() const {
|
||||
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->HasDeviceEnqueue : false;
|
||||
}
|
||||
bool requiresSubgroupIndependentForwardProgress() const {
|
||||
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired : false;
|
||||
}
|
||||
size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
|
||||
auto requiredWorkGroupSizeX = patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
|
||||
auto requiredWorkGroupSizeY = patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
|
||||
auto requiredWorkGroupSizeZ = patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
|
||||
size_t maxRequiredWorkGroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
|
||||
if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) {
|
||||
maxRequiredWorkGroupSize = maxWorkGroupSize;
|
||||
}
|
||||
return maxRequiredWorkGroupSize;
|
||||
}
|
||||
|
||||
uint32_t getConstantBufferSize() const;
|
||||
int32_t getArgNumByName(const char *name) const {
|
||||
int32_t argNum = 0;
|
||||
for (auto &arg : kernelArgInfo) {
|
||||
if (arg.name == name) {
|
||||
return argNum;
|
||||
}
|
||||
++argNum;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string name;
|
||||
std::string attributes;
|
||||
HeapInfo heapInfo;
|
||||
PatchInfo patchInfo;
|
||||
std::vector<KernelArgInfo> kernelArgInfo;
|
||||
std::vector<KernelArgInfo> kernelNonArgInfo;
|
||||
WorkloadInfo workloadInfo;
|
||||
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
|
||||
bool usesSsh = false;
|
||||
bool requiresSshForBuffers = false;
|
||||
bool isValid = false;
|
||||
bool isVmeWorkload = false;
|
||||
char *crossThreadData = nullptr;
|
||||
size_t reqdWorkGroupSize[3];
|
||||
size_t requiredSubGroupSize = 0;
|
||||
uint32_t gpuPointerSize = 0;
|
||||
const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
|
||||
uint32_t argumentsToPatchNum = 0;
|
||||
uint32_t systemKernelOffset = 0;
|
||||
};
|
||||
} // namespace OCLRT
|
179
runtime/program/link.cpp
Normal file
179
runtime/program/link.cpp
Normal file
@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "runtime/compiler_interface/compiler_interface.h"
|
||||
#include "runtime/platform/platform.h"
|
||||
#include "runtime/helpers/validators.h"
|
||||
#include "program.h"
|
||||
#include "elf/writer.h"
|
||||
#include <cstring>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
cl_int Program::link(
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *deviceList,
|
||||
const char *buildOptions,
|
||||
cl_uint numInputPrograms,
|
||||
const cl_program *inputPrograms,
|
||||
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
|
||||
void *userData) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
cl_program program;
|
||||
CLElfLib::CElfWriter *pElfWriter = nullptr;
|
||||
Program *pInputProgObj;
|
||||
size_t dataSize;
|
||||
char *pData = nullptr;
|
||||
bool isCreateLibrary;
|
||||
CLElfLib::SSectionNode sectionNode;
|
||||
|
||||
do {
|
||||
if (((deviceList == nullptr) && (numDevices != 0)) ||
|
||||
((deviceList != nullptr) && (numDevices == 0))) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((numInputPrograms == 0) || (inputPrograms == nullptr)) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((funcNotify == nullptr) &&
|
||||
(userData != nullptr)) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((deviceList != nullptr) && validateObject(*deviceList) != CL_SUCCESS) {
|
||||
retVal = CL_INVALID_DEVICE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (buildStatus == CL_BUILD_IN_PROGRESS) {
|
||||
retVal = CL_INVALID_OPERATION;
|
||||
break;
|
||||
}
|
||||
|
||||
options = (buildOptions != nullptr) ? buildOptions : "";
|
||||
|
||||
isCreateLibrary = (strstr(options.c_str(), "-create-library") != nullptr);
|
||||
|
||||
buildStatus = CL_BUILD_IN_PROGRESS;
|
||||
|
||||
pElfWriter = CLElfLib::CElfWriter::create(CLElfLib::EH_TYPE_OPENCL_OBJECTS, CLElfLib::EH_MACHINE_NONE, 0);
|
||||
|
||||
StackVec<const Program *, 16> inputProgramsInternal;
|
||||
for (cl_uint i = 0; i < numInputPrograms; i++) {
|
||||
program = inputPrograms[i];
|
||||
if (program == nullptr) {
|
||||
retVal = CL_INVALID_PROGRAM;
|
||||
break;
|
||||
}
|
||||
pInputProgObj = castToObject<Program>(program);
|
||||
if (pInputProgObj == nullptr) {
|
||||
retVal = CL_INVALID_PROGRAM;
|
||||
break;
|
||||
}
|
||||
inputProgramsInternal.push_back(pInputProgObj);
|
||||
if ((pInputProgObj->llvmBinary == nullptr) || (pInputProgObj->llvmBinarySize == 0)) {
|
||||
retVal = CL_INVALID_PROGRAM;
|
||||
break;
|
||||
}
|
||||
sectionNode.Name = "";
|
||||
if (pInputProgObj->getIsSpirV()) {
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_SPIRV;
|
||||
} else {
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_LLVM_BINARY;
|
||||
}
|
||||
sectionNode.Flags = 0;
|
||||
sectionNode.pData = pInputProgObj->llvmBinary;
|
||||
sectionNode.DataSize = static_cast<unsigned int>(pInputProgObj->llvmBinarySize);
|
||||
|
||||
pElfWriter->addSection(§ionNode);
|
||||
}
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
pElfWriter->resolveBinary(nullptr, dataSize);
|
||||
pData = new char[dataSize];
|
||||
pElfWriter->resolveBinary(pData, dataSize);
|
||||
|
||||
CompilerInterface *pCompilerInterface = getCompilerInterface();
|
||||
if (!pCompilerInterface) {
|
||||
retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
TranslationArgs inputArgs = {};
|
||||
|
||||
inputArgs.pInput = pData;
|
||||
inputArgs.InputSize = (uint32_t)dataSize;
|
||||
inputArgs.pOptions = options.c_str();
|
||||
inputArgs.OptionsSize = (uint32_t)options.length();
|
||||
inputArgs.pInternalOptions = internalOptions.c_str();
|
||||
inputArgs.InternalOptionsSize = (uint32_t)internalOptions.length();
|
||||
inputArgs.pTracingOptions = nullptr;
|
||||
inputArgs.TracingOptionsCount = 0;
|
||||
|
||||
if (!isCreateLibrary) {
|
||||
retVal = pCompilerInterface->link(*this, inputArgs);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
retVal = processGenBinary();
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
|
||||
} else {
|
||||
retVal = pCompilerInterface->createLibrary(*this, inputArgs);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY;
|
||||
}
|
||||
updateNonUniformFlag(&*inputProgramsInternal.begin(), inputProgramsInternal.size());
|
||||
separateBlockKernels();
|
||||
} while (false);
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
buildStatus = CL_BUILD_ERROR;
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE;
|
||||
} else {
|
||||
buildStatus = CL_BUILD_SUCCESS;
|
||||
}
|
||||
|
||||
CLElfLib::CElfWriter::destroy(pElfWriter);
|
||||
delete[] pData;
|
||||
internalOptions.clear();
|
||||
|
||||
if (funcNotify != nullptr) {
|
||||
(*funcNotify)(this, userData);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
} // namespace OCLRT
|
96
runtime/program/patch_info.h
Normal file
96
runtime/program/patch_info.h
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "patch_list.h"
|
||||
#include "patch_g7.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
namespace OCLRT {
|
||||
using iOpenCL::SPatchMediaInterfaceDescriptorLoad;
|
||||
using iOpenCL::SPatchAllocateLocalSurface;
|
||||
using iOpenCL::SPatchMediaVFEState;
|
||||
using iOpenCL::SPatchInterfaceDescriptorData;
|
||||
using iOpenCL::SPatchSamplerStateArray;
|
||||
using iOpenCL::SPatchBindingTableState;
|
||||
using iOpenCL::SPatchDataParameterBuffer;
|
||||
using iOpenCL::SPatchStatelessGlobalMemoryObjectKernelArgument;
|
||||
using iOpenCL::SPatchGlobalMemoryObjectKernelArgument;
|
||||
using iOpenCL::SPatchStatelessConstantMemoryObjectKernelArgument;
|
||||
using iOpenCL::SPatchStatelessDeviceQueueKernelArgument;
|
||||
using iOpenCL::SPatchImageMemoryObjectKernelArgument;
|
||||
using iOpenCL::SPatchSamplerKernelArgument;
|
||||
using iOpenCL::SPatchDataParameterStream;
|
||||
using iOpenCL::SPatchThreadPayload;
|
||||
using iOpenCL::SPatchExecutionEnvironment;
|
||||
using iOpenCL::SPatchKernelAttributesInfo;
|
||||
using iOpenCL::SPatchKernelArgumentInfo;
|
||||
using iOpenCL::SKernelBinaryHeaderCommon;
|
||||
using iOpenCL::SProgramBinaryHeader;
|
||||
using iOpenCL::SPatchAllocateStatelessPrivateSurface;
|
||||
using iOpenCL::SPatchAllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
using iOpenCL::SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
using iOpenCL::SPatchAllocateStatelessPrintfSurface;
|
||||
using iOpenCL::SPatchAllocateStatelessEventPoolSurface;
|
||||
using iOpenCL::SPatchAllocateStatelessDefaultDeviceQueueSurface;
|
||||
using iOpenCL::SPatchString;
|
||||
using iOpenCL::SPatchGtpinFreeGRFInfo;
|
||||
using iOpenCL::SPatchStateSIP;
|
||||
|
||||
typedef struct TagPrintfStringInfo {
|
||||
size_t SizeInBytes;
|
||||
char *pStringData;
|
||||
} PrintfStringInfo, *PPrintfStringInfo;
|
||||
|
||||
struct PatchInfo {
|
||||
const SPatchMediaInterfaceDescriptorLoad *interfaceDescriptorDataLoad = nullptr;
|
||||
const SPatchAllocateLocalSurface *localsurface = nullptr;
|
||||
const SPatchMediaVFEState *mediavfestate = nullptr;
|
||||
const SPatchInterfaceDescriptorData *interfaceDescriptorData = nullptr;
|
||||
const SPatchSamplerStateArray *samplerStateArray = nullptr;
|
||||
const SPatchBindingTableState *bindingTableState = nullptr;
|
||||
::std::vector<const SPatchDataParameterBuffer *> dataParameterBuffers;
|
||||
::std::vector<const SPatchStatelessGlobalMemoryObjectKernelArgument *>
|
||||
statelessGlobalMemObjKernelArgs;
|
||||
::std::vector<const SPatchImageMemoryObjectKernelArgument *>
|
||||
imageMemObjKernelArgs;
|
||||
::std::vector<const SPatchGlobalMemoryObjectKernelArgument *>
|
||||
globalMemObjKernelArgs;
|
||||
const SPatchDataParameterStream *dataParameterStream = nullptr;
|
||||
const SPatchThreadPayload *threadPayload = nullptr;
|
||||
const SPatchExecutionEnvironment *executionEnvironment = nullptr;
|
||||
const SPatchKernelAttributesInfo *pKernelAttributesInfo = nullptr;
|
||||
const SPatchAllocateStatelessPrivateSurface *pAllocateStatelessPrivateSurface = nullptr;
|
||||
const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr;
|
||||
const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr;
|
||||
const SPatchAllocateStatelessPrintfSurface *pAllocateStatelessPrintfSurface = nullptr;
|
||||
const SPatchAllocateStatelessEventPoolSurface *pAllocateStatelessEventPoolSurface = nullptr;
|
||||
const SPatchAllocateStatelessDefaultDeviceQueueSurface *pAllocateStatelessDefaultDeviceQueueSurface = nullptr;
|
||||
::std::map<uint32_t, PrintfStringInfo> stringDataMap;
|
||||
::std::vector<const SPatchKernelArgumentInfo *> kernelArgumentInfo;
|
||||
|
||||
PatchInfo() {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OCLRT
|
189
runtime/program/print_formatter.cpp
Normal file
189
runtime/program/print_formatter.cpp
Normal file
@ -0,0 +1,189 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "print_formatter.h"
|
||||
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
PrintFormatter::PrintFormatter(Kernel &kernelArg, GraphicsAllocation &dataArg) : kernel(kernelArg),
|
||||
data(dataArg),
|
||||
buffer(nullptr),
|
||||
bufferSize(0),
|
||||
offset(0) {
|
||||
}
|
||||
|
||||
void PrintFormatter::printKernelOutput(const std::function<void(char *)> &print) {
|
||||
offset = 0;
|
||||
buffer = reinterpret_cast<uint8_t *>(data.getUnderlyingBuffer());
|
||||
|
||||
// first 4 bytes of the buffer store it's own size
|
||||
// before reading it size needs to be set to 4 because read() checks bounds and would fail if bufferSize was 0
|
||||
bufferSize = 4;
|
||||
read(&bufferSize);
|
||||
|
||||
uint32_t stringIndex = 0;
|
||||
|
||||
while (offset + 4 <= bufferSize) {
|
||||
read(&stringIndex);
|
||||
const char *formatString = kernel.getKernelInfo().queryPrintfString(stringIndex);
|
||||
if (formatString != nullptr) {
|
||||
printString(formatString, print);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PrintFormatter::printString(const char *formatString, const std::function<void(char *)> &print) {
|
||||
size_t length = strnlen_s(formatString, maxPrintfOutputLength);
|
||||
char output[maxPrintfOutputLength];
|
||||
|
||||
size_t cursor = 0;
|
||||
for (size_t i = 0; i <= length; i++) {
|
||||
if (formatString[i] == '\\')
|
||||
output[cursor++] = escapeChar(formatString[++i]);
|
||||
else if (formatString[i] == '%') {
|
||||
size_t end = i;
|
||||
if (end + 1 <= length && formatString[end + 1] == '%') {
|
||||
output[cursor++] = '%';
|
||||
continue;
|
||||
}
|
||||
|
||||
while (isConversionSpecifier(formatString[end++]) == false && end < length)
|
||||
;
|
||||
char dataFormat[maxPrintfOutputLength];
|
||||
|
||||
memcpy_s(dataFormat, maxPrintfOutputLength, formatString + i, end - i);
|
||||
dataFormat[end - i] = '\0';
|
||||
|
||||
if (formatString[end - 1] == 's')
|
||||
cursor += printStringToken(output + cursor, maxPrintfOutputLength - cursor, dataFormat);
|
||||
else
|
||||
cursor += printToken(output + cursor, maxPrintfOutputLength - cursor, dataFormat);
|
||||
|
||||
i = end - 1;
|
||||
} else {
|
||||
output[cursor++] = formatString[i];
|
||||
}
|
||||
}
|
||||
|
||||
print(output);
|
||||
}
|
||||
|
||||
void PrintFormatter::stripVectorFormat(const char *format, char *stripped) {
|
||||
while (*format != '\0') {
|
||||
if (*format != 'v') {
|
||||
*stripped = *format;
|
||||
} else if (*(format + 1) != '1') {
|
||||
format += 2;
|
||||
continue;
|
||||
|
||||
} else {
|
||||
format += 3;
|
||||
continue;
|
||||
}
|
||||
stripped++;
|
||||
format++;
|
||||
}
|
||||
*stripped = '\0';
|
||||
}
|
||||
|
||||
void PrintFormatter::stripVectorTypeConversion(char *format) {
|
||||
size_t len = strlen(format);
|
||||
if (len > 3 && format[len - 3] == 'h' && format[len - 2] == 'l') {
|
||||
format[len - 3] = format[len - 1];
|
||||
format[len - 2] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
size_t PrintFormatter::printToken(char *output, size_t size, const char *formatString) {
|
||||
PRINTF_DATA_TYPE type(PRINTF_DATA_TYPE::INVALID);
|
||||
read(&type);
|
||||
|
||||
switch (type) {
|
||||
case PRINTF_DATA_TYPE::BYTE:
|
||||
return typedPrintToken<int8_t>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::SHORT:
|
||||
return typedPrintToken<int16_t>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::INT:
|
||||
return typedPrintToken<int>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::FLOAT:
|
||||
return typedPrintToken<float>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::LONG:
|
||||
return typedPrintToken<int64_t>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::POINTER:
|
||||
return typedPrintToken<void *>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::DOUBLE:
|
||||
return typedPrintToken<double>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::VECTOR_BYTE:
|
||||
return typedPrintVectorToken<int8_t>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::VECTOR_SHORT:
|
||||
return typedPrintVectorToken<int16_t>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::VECTOR_INT:
|
||||
return typedPrintVectorToken<int>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::VECTOR_LONG:
|
||||
return typedPrintVectorToken<int64_t>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::VECTOR_FLOAT:
|
||||
return typedPrintVectorToken<float>(output, size, formatString);
|
||||
case PRINTF_DATA_TYPE::VECTOR_DOUBLE:
|
||||
return typedPrintVectorToken<double>(output, size, formatString);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
char PrintFormatter::escapeChar(char escape) {
|
||||
switch (escape) {
|
||||
case 'n':
|
||||
return '\n';
|
||||
default:
|
||||
return escape;
|
||||
}
|
||||
}
|
||||
|
||||
bool PrintFormatter::isConversionSpecifier(char c) {
|
||||
switch (c) {
|
||||
case 'd':
|
||||
case 'i':
|
||||
case 'o':
|
||||
case 'u':
|
||||
case 'x':
|
||||
case 'X':
|
||||
case 'a':
|
||||
case 'A':
|
||||
case 'e':
|
||||
case 'E':
|
||||
case 'f':
|
||||
case 'F':
|
||||
case 'g':
|
||||
case 'G':
|
||||
case 's':
|
||||
case 'c':
|
||||
case 'p':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} // namespace OCLRT
|
141
runtime/program/print_formatter.h
Normal file
141
runtime/program/print_formatter.h
Normal file
@ -0,0 +1,141 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "runtime/os_interface/print.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
|
||||
extern int memcpy_s(void *dst, size_t destSize, const void *src, size_t count);
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
enum class PRINTF_DATA_TYPE : int {
|
||||
INVALID,
|
||||
BYTE,
|
||||
SHORT,
|
||||
INT,
|
||||
FLOAT,
|
||||
STRING,
|
||||
LONG,
|
||||
POINTER,
|
||||
DOUBLE,
|
||||
VECTOR_BYTE,
|
||||
VECTOR_SHORT,
|
||||
VECTOR_INT,
|
||||
VECTOR_LONG,
|
||||
VECTOR_FLOAT,
|
||||
VECTOR_DOUBLE
|
||||
};
|
||||
|
||||
class PrintFormatter {
|
||||
public:
|
||||
PrintFormatter(Kernel &kernelArg, GraphicsAllocation &dataArg);
|
||||
void printKernelOutput(const std::function<void(char *)> &print = [](char *str) { printToSTDOUT(str); });
|
||||
|
||||
static const size_t maxPrintfOutputLength = 1024;
|
||||
|
||||
protected:
|
||||
void printString(const char *formatString, const std::function<void(char *)> &print);
|
||||
size_t printToken(char *output, size_t size, const char *formatString);
|
||||
|
||||
char escapeChar(char escape);
|
||||
bool isConversionSpecifier(char c);
|
||||
void stripVectorFormat(const char *format, char *stripped);
|
||||
void stripVectorTypeConversion(char *format);
|
||||
|
||||
template <class T>
|
||||
bool read(T *value) {
|
||||
if (offset + sizeof(T) <= bufferSize) {
|
||||
auto srcPtr = reinterpret_cast<T *>(buffer + offset);
|
||||
|
||||
if (isAligned(srcPtr)) {
|
||||
*value = *srcPtr;
|
||||
} else {
|
||||
memcpy_s(value, bufferSize - offset, srcPtr, sizeof(T));
|
||||
}
|
||||
offset += sizeof(T);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
size_t typedPrintToken(char *output, size_t size, const char *formatString) {
|
||||
T value = {0};
|
||||
read(&value);
|
||||
return simple_sprintf(output, size, formatString, value);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
size_t typedPrintVectorToken(char *output, size_t size, const char *formatString) {
|
||||
T value = {0};
|
||||
int valueCount = 0;
|
||||
read(&valueCount);
|
||||
|
||||
size_t charactersPrinted = 0;
|
||||
char strippedFormat[1024];
|
||||
|
||||
stripVectorFormat(formatString, strippedFormat);
|
||||
stripVectorTypeConversion(strippedFormat);
|
||||
|
||||
for (int i = 0; i < valueCount; i++) {
|
||||
read(&value);
|
||||
charactersPrinted += simple_sprintf(output + charactersPrinted, size - charactersPrinted, strippedFormat, value);
|
||||
if (i < valueCount - 1)
|
||||
charactersPrinted += simple_sprintf(output + charactersPrinted, size - charactersPrinted, "%c", ',');
|
||||
}
|
||||
|
||||
if (sizeof(T) < 4) {
|
||||
offset += (4 - sizeof(T)) * valueCount;
|
||||
}
|
||||
|
||||
return charactersPrinted;
|
||||
}
|
||||
|
||||
size_t printStringToken(char *output, size_t size, const char *formatString) {
|
||||
int index = 0;
|
||||
int type = 0;
|
||||
// additional read to discard the data type
|
||||
read(&type);
|
||||
read(&index);
|
||||
if (type == static_cast<int>(PRINTF_DATA_TYPE::STRING))
|
||||
return simple_sprintf(output, size, formatString, kernel.getKernelInfo().queryPrintfString(index));
|
||||
else
|
||||
return simple_sprintf(output, size, formatString, 0);
|
||||
}
|
||||
|
||||
Kernel &kernel;
|
||||
GraphicsAllocation &data;
|
||||
|
||||
uint8_t *buffer; // buffer extracted from the kernel, contains values to be printed
|
||||
uint32_t bufferSize; // size of the data contained in the buffer
|
||||
uint32_t offset; // current position in currently parsed buffer
|
||||
};
|
||||
};
|
81
runtime/program/printf_handler.cpp
Normal file
81
runtime/program/printf_handler.cpp
Normal file
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "printf_handler.h"
|
||||
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/program/print_formatter.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
#include "runtime/helpers/ptr_math.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
PrintfHandler::PrintfHandler(Device &deviceArg) : device(deviceArg) {}
|
||||
|
||||
PrintfHandler::~PrintfHandler() {
|
||||
device.getMemoryManager()->freeGraphicsMemory(printfSurface);
|
||||
}
|
||||
|
||||
PrintfHandler *PrintfHandler::create(const MultiDispatchInfo &multiDispatchInfo, Device &device) {
|
||||
if (multiDispatchInfo.usesStatelessPrintfSurface() ||
|
||||
(multiDispatchInfo.begin()->getKernel()->checkIfIsParentKernelAndBlocksUsesPrintf())) {
|
||||
return new PrintfHandler(device);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) {
|
||||
auto printfSurfaceSize = device.getDeviceInfo().printfBufferSize;
|
||||
if (printfSurfaceSize == 0) {
|
||||
return;
|
||||
}
|
||||
kernel = multiDispatchInfo.begin()->getKernel();
|
||||
|
||||
printfSurface = device.getMemoryManager()->createGraphicsAllocationWithRequiredBitness(printfSurfaceSize, nullptr);
|
||||
|
||||
*reinterpret_cast<uint32_t *>(printfSurface->getUnderlyingBuffer()) = printfSurfaceInitialDataSize;
|
||||
|
||||
auto printfPatchAddress = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getCrossThreadData()),
|
||||
kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset);
|
||||
|
||||
patchWithRequiredSize(printfPatchAddress, kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamSize, (uintptr_t)printfSurface->getGpuAddressToPatch());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap()),
|
||||
kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset);
|
||||
void *addressToPatch = printfSurface->getUnderlyingBuffer();
|
||||
size_t sizeToPatch = printfSurface->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&kernel->getContext(), surfaceState, sizeToPatch, addressToPatch, printfSurface);
|
||||
}
|
||||
}
|
||||
|
||||
void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
commandStreamReceiver.makeResident(*printfSurface);
|
||||
}
|
||||
|
||||
void PrintfHandler::printEnqueueOutput() {
|
||||
PrintFormatter printFormatter(*kernel, *printfSurface);
|
||||
printFormatter.printKernelOutput();
|
||||
}
|
||||
} // namespace OCLRT
|
54
runtime/program/printf_handler.h
Normal file
54
runtime/program/printf_handler.h
Normal file
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
struct MultiDispatchInfo;
|
||||
|
||||
class PrintfHandler {
|
||||
public:
|
||||
static PrintfHandler *create(const MultiDispatchInfo &multiDispatchInfo, Device &deviceArg);
|
||||
|
||||
~PrintfHandler();
|
||||
|
||||
void prepareDispatch(const MultiDispatchInfo &multiDispatchInfo);
|
||||
void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
void printEnqueueOutput();
|
||||
|
||||
GraphicsAllocation *getSurface() {
|
||||
return printfSurface;
|
||||
}
|
||||
|
||||
protected:
|
||||
PrintfHandler(Device &device);
|
||||
|
||||
static const uint32_t printfSurfaceInitialDataSize = sizeof(uint32_t);
|
||||
Device &device;
|
||||
Kernel *kernel = nullptr;
|
||||
GraphicsAllocation *printfSurface = nullptr;
|
||||
};
|
||||
} // namespace OCLRT
|
266
runtime/program/process_elf_binary.cpp
Normal file
266
runtime/program/process_elf_binary.cpp
Normal file
@ -0,0 +1,266 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "elf/reader.h"
|
||||
#include "elf/writer.h"
|
||||
#include "program.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
cl_int Program::processElfBinary(
|
||||
const void *pBinary,
|
||||
size_t binarySize,
|
||||
uint32_t &binaryVersion) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
CLElfLib::CElfReader *pElfReader = nullptr;
|
||||
const CLElfLib::SElf64Header *pElfHeader = nullptr;
|
||||
char *pSectionData = nullptr;
|
||||
size_t sectionDataSize = 0;
|
||||
|
||||
binaryVersion = iOpenCL::CURRENT_ICBE_VERSION;
|
||||
|
||||
if (CLElfLib::CElfReader::isValidElf64(pBinary, binarySize) == false) {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
}
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
delete[] elfBinary;
|
||||
elfBinarySize = 0;
|
||||
|
||||
elfBinary = new char[binarySize];
|
||||
|
||||
elfBinarySize = binarySize;
|
||||
memcpy_s(elfBinary, elfBinarySize, pBinary, binarySize);
|
||||
}
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
pElfReader = CLElfLib::CElfReader::create(
|
||||
(const char *)pBinary,
|
||||
binarySize);
|
||||
|
||||
if (pElfReader == nullptr) {
|
||||
retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
}
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
pElfHeader = pElfReader->getElfHeader();
|
||||
|
||||
switch (pElfHeader->Type) {
|
||||
case CLElfLib::EH_TYPE_OPENCL_EXECUTABLE:
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
|
||||
break;
|
||||
|
||||
case CLElfLib::EH_TYPE_OPENCL_LIBRARY:
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY;
|
||||
break;
|
||||
|
||||
case CLElfLib::EH_TYPE_OPENCL_OBJECTS:
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
|
||||
break;
|
||||
|
||||
default:
|
||||
retVal = CL_INVALID_BINARY;
|
||||
}
|
||||
}
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
// section 0 is always null
|
||||
for (uint32_t i = 1; i < pElfHeader->NumSectionHeaderEntries; i++) {
|
||||
const CLElfLib::SElf64SectionHeader *pSectionHeader = pElfReader->getSectionHeader(i);
|
||||
|
||||
pSectionData = nullptr;
|
||||
sectionDataSize = 0;
|
||||
|
||||
switch (pSectionHeader->Type) {
|
||||
case CLElfLib::SH_TYPE_SPIRV:
|
||||
isSpirV = true;
|
||||
// FALLTHROUGH
|
||||
case CLElfLib::SH_TYPE_OPENCL_LLVM_BINARY:
|
||||
pElfReader->getSectionData(i, pSectionData, sectionDataSize);
|
||||
if (pSectionData && sectionDataSize) {
|
||||
storeLlvmBinary(pSectionData, sectionDataSize);
|
||||
}
|
||||
break;
|
||||
|
||||
case CLElfLib::SH_TYPE_OPENCL_DEV_BINARY:
|
||||
pElfReader->getSectionData(i, pSectionData, sectionDataSize);
|
||||
if (pSectionData && sectionDataSize && validateGenBinaryHeader((SProgramBinaryHeader *)pSectionData)) {
|
||||
storeGenBinary(pSectionData, sectionDataSize);
|
||||
isCreatedFromBinary = true;
|
||||
} else {
|
||||
getProgramCompilerVersion((SProgramBinaryHeader *)pSectionData, binaryVersion);
|
||||
retVal = CL_INVALID_BINARY;
|
||||
}
|
||||
break;
|
||||
|
||||
case CLElfLib::SH_TYPE_OPENCL_OPTIONS:
|
||||
pElfReader->getSectionData(i, pSectionData, sectionDataSize);
|
||||
if (pSectionData && sectionDataSize) {
|
||||
options = pSectionData;
|
||||
}
|
||||
break;
|
||||
|
||||
case CLElfLib::SH_TYPE_STR_TBL:
|
||||
// We can skip the string table
|
||||
break;
|
||||
|
||||
default:
|
||||
retVal = CL_INVALID_BINARY;
|
||||
}
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
isProgramBinaryResolved = true;
|
||||
buildStatus = CL_BUILD_SUCCESS;
|
||||
|
||||
// Create an empty build log since program is effectively built
|
||||
updateBuildLog(pDevice, "", 1);
|
||||
}
|
||||
|
||||
CLElfLib::CElfReader::destroy(pElfReader);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int Program::resolveProgramBinary() {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
CLElfLib::E_EH_TYPE headerType;
|
||||
CLElfLib::CElfWriter *pElfWriter = nullptr;
|
||||
|
||||
if (isProgramBinaryResolved == false) {
|
||||
delete[] elfBinary;
|
||||
elfBinary = nullptr;
|
||||
elfBinarySize = 0;
|
||||
|
||||
switch (programBinaryType) {
|
||||
case CL_PROGRAM_BINARY_TYPE_EXECUTABLE:
|
||||
headerType = CLElfLib::EH_TYPE_OPENCL_EXECUTABLE;
|
||||
|
||||
if (!genBinary || !genBinarySize) {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
}
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_BINARY_TYPE_LIBRARY:
|
||||
headerType = CLElfLib::EH_TYPE_OPENCL_LIBRARY;
|
||||
|
||||
if (!llvmBinary || !llvmBinarySize) {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
}
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT:
|
||||
headerType = CLElfLib::EH_TYPE_OPENCL_OBJECTS;
|
||||
|
||||
if (!llvmBinary || !llvmBinarySize) {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
retVal = CL_INVALID_BINARY;
|
||||
}
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
pElfWriter = CLElfLib::CElfWriter::create(headerType, CLElfLib::EH_MACHINE_NONE, 0);
|
||||
|
||||
if (pElfWriter) {
|
||||
CLElfLib::SSectionNode sectionNode;
|
||||
|
||||
// Always add the options string
|
||||
sectionNode.Name = "BuildOptions";
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_OPTIONS;
|
||||
sectionNode.pData = (char *)options.c_str();
|
||||
sectionNode.DataSize = (uint32_t)(strlen(options.c_str()) + 1);
|
||||
|
||||
auto elfRetVal = pElfWriter->addSection(§ionNode);
|
||||
|
||||
if (elfRetVal) {
|
||||
// Add the LLVM component if available
|
||||
if (getIsSpirV()) {
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_SPIRV;
|
||||
} else {
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_LLVM_BINARY;
|
||||
}
|
||||
if (headerType == CLElfLib::EH_TYPE_OPENCL_LIBRARY) {
|
||||
sectionNode.Name = "Intel(R) OpenCL LLVM Archive";
|
||||
sectionNode.pData = (char *)llvmBinary;
|
||||
sectionNode.DataSize = (uint32_t)llvmBinarySize;
|
||||
elfRetVal = pElfWriter->addSection(§ionNode);
|
||||
} else {
|
||||
sectionNode.Name = "Intel(R) OpenCL LLVM Object";
|
||||
sectionNode.pData = (char *)llvmBinary;
|
||||
sectionNode.DataSize = (uint32_t)llvmBinarySize;
|
||||
elfRetVal = pElfWriter->addSection(§ionNode);
|
||||
}
|
||||
}
|
||||
|
||||
// Add the device binary if it exists
|
||||
if (elfRetVal && genBinary) {
|
||||
sectionNode.Name = "Intel(R) OpenCL Device Binary";
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_DEV_BINARY;
|
||||
sectionNode.pData = (char *)genBinary;
|
||||
sectionNode.DataSize = (uint32_t)genBinarySize;
|
||||
|
||||
elfRetVal = pElfWriter->addSection(§ionNode);
|
||||
}
|
||||
|
||||
// Add the device debug data if it exists
|
||||
if (elfRetVal && (debugData != nullptr)) {
|
||||
sectionNode.Name = "Intel(R) OpenCL Device Debug";
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_DEV_DEBUG;
|
||||
sectionNode.pData = debugData;
|
||||
sectionNode.DataSize = (uint32_t)debugDataSize;
|
||||
elfRetVal = pElfWriter->addSection(§ionNode);
|
||||
}
|
||||
|
||||
if (elfRetVal) {
|
||||
elfRetVal = pElfWriter->resolveBinary(elfBinary, elfBinarySize);
|
||||
}
|
||||
|
||||
if (elfRetVal) {
|
||||
elfBinary = new char[elfBinarySize];
|
||||
|
||||
elfRetVal = pElfWriter->resolveBinary(elfBinary, elfBinarySize);
|
||||
}
|
||||
|
||||
if (elfRetVal) {
|
||||
isProgramBinaryResolved = true;
|
||||
} else {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
}
|
||||
} else {
|
||||
retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
CLElfLib::CElfWriter::destroy(pElfWriter);
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
}
|
976
runtime/program/process_gen_binary.cpp
Normal file
976
runtime/program/process_gen_binary.cpp
Normal file
@ -0,0 +1,976 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
#include "runtime/helpers/hash.h"
|
||||
#include "runtime/helpers/ptr_math.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "patch_list.h"
|
||||
#include "patch_shared.h"
|
||||
#include "program.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace iOpenCL;
|
||||
|
||||
namespace OCLRT {
|
||||
extern bool familyEnabled[];
|
||||
|
||||
const KernelInfo *Program::getKernelInfo(
|
||||
const char *kernelName) const {
|
||||
if (kernelName == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto it = std::find_if(kernelInfoArray.begin(), kernelInfoArray.end(),
|
||||
[=](const KernelInfo *kInfo) { return (0 == strcmp(kInfo->name.c_str(), kernelName)); });
|
||||
|
||||
return (it != kernelInfoArray.end()) ? *it : nullptr;
|
||||
}
|
||||
|
||||
size_t Program::getNumKernels() const {
|
||||
return kernelInfoArray.size();
|
||||
}
|
||||
|
||||
const KernelInfo *Program::getKernelInfo(size_t ordinal) const {
|
||||
DEBUG_BREAK_IF(ordinal >= kernelInfoArray.size());
|
||||
return kernelInfoArray[ordinal];
|
||||
}
|
||||
|
||||
std::string Program::getKernelNamesString() const {
|
||||
std::string semiColonDelimitedKernelNameStr;
|
||||
|
||||
for (uint32_t i = 0; i < kernelInfoArray.size(); i++) {
|
||||
semiColonDelimitedKernelNameStr += kernelInfoArray[i]->name;
|
||||
if ((i + 1) != kernelInfoArray.size()) {
|
||||
semiColonDelimitedKernelNameStr += ";";
|
||||
}
|
||||
}
|
||||
|
||||
return semiColonDelimitedKernelNameStr;
|
||||
}
|
||||
|
||||
size_t Program::processKernel(
|
||||
const void *pKernelBlob,
|
||||
cl_int &retVal) {
|
||||
size_t sizeProcessed = 0;
|
||||
|
||||
do {
|
||||
auto pKernelInfo = KernelInfo::create();
|
||||
if (!pKernelInfo) {
|
||||
retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
auto pCurKernelPtr = pKernelBlob;
|
||||
pKernelInfo->heapInfo.pBlob = pKernelBlob;
|
||||
|
||||
pKernelInfo->heapInfo.pKernelHeader = reinterpret_cast<const SKernelBinaryHeaderCommon *>(pCurKernelPtr);
|
||||
pCurKernelPtr = ptrOffset(pCurKernelPtr, sizeof(SKernelBinaryHeaderCommon));
|
||||
|
||||
std::string readName{reinterpret_cast<const char *>(pCurKernelPtr), pKernelInfo->heapInfo.pKernelHeader->KernelNameSize};
|
||||
pKernelInfo->name = readName.c_str();
|
||||
pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->KernelNameSize);
|
||||
|
||||
pKernelInfo->heapInfo.pKernelHeap = pCurKernelPtr;
|
||||
pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->KernelHeapSize);
|
||||
|
||||
pKernelInfo->heapInfo.pGsh = pCurKernelPtr;
|
||||
pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->GeneralStateHeapSize);
|
||||
|
||||
pKernelInfo->heapInfo.pDsh = pCurKernelPtr;
|
||||
pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->DynamicStateHeapSize);
|
||||
|
||||
pKernelInfo->heapInfo.pSsh = const_cast<void *>(pCurKernelPtr);
|
||||
pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize);
|
||||
|
||||
pKernelInfo->heapInfo.pPatchList = pCurKernelPtr;
|
||||
|
||||
retVal = parsePatchList(*pKernelInfo);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
delete pKernelInfo;
|
||||
|
||||
sizeProcessed = ptrDiff(pCurKernelPtr, pKernelBlob);
|
||||
break;
|
||||
}
|
||||
|
||||
auto pKernelHeader = pKernelInfo->heapInfo.pKernelHeader;
|
||||
auto pKernel = ptrOffset(pKernelBlob, sizeof(SKernelBinaryHeaderCommon));
|
||||
|
||||
if (genBinary)
|
||||
pKernelInfo->gpuPointerSize = reinterpret_cast<const SProgramBinaryHeader *>(genBinary)->GPUPointerSizeInBytes;
|
||||
|
||||
uint32_t kernelSize =
|
||||
pKernelHeader->DynamicStateHeapSize +
|
||||
pKernelHeader->GeneralStateHeapSize +
|
||||
pKernelHeader->KernelHeapSize +
|
||||
pKernelHeader->KernelNameSize +
|
||||
pKernelHeader->PatchListSize +
|
||||
pKernelHeader->SurfaceStateHeapSize;
|
||||
|
||||
pKernelInfo->heapInfo.blobSize = kernelSize + sizeof(SKernelBinaryHeaderCommon);
|
||||
|
||||
uint32_t kernelCheckSum = pKernelInfo->heapInfo.pKernelHeader->CheckSum;
|
||||
|
||||
uint64_t hashValue = Hash::hash(reinterpret_cast<const char *>(pKernel), kernelSize);
|
||||
|
||||
uint32_t calcCheckSum = hashValue & 0xFFFFFFFF;
|
||||
pKernelInfo->isValid = (calcCheckSum == kernelCheckSum);
|
||||
|
||||
retVal = CL_SUCCESS;
|
||||
sizeProcessed = sizeof(SKernelBinaryHeaderCommon) + kernelSize;
|
||||
kernelInfoArray.push_back(pKernelInfo);
|
||||
if (pKernelInfo->hasDeviceEnqueue()) {
|
||||
parentKernelInfoArray.push_back(pKernelInfo);
|
||||
}
|
||||
if (pKernelInfo->requiresSubgroupIndependentForwardProgress()) {
|
||||
subgroupKernelInfoArray.push_back(pKernelInfo);
|
||||
}
|
||||
} while (false);
|
||||
|
||||
return sizeProcessed;
|
||||
}
|
||||
|
||||
cl_int Program::parsePatchList(KernelInfo &kernelInfo) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
auto pPatchList = kernelInfo.heapInfo.pPatchList;
|
||||
auto patchListSize = kernelInfo.heapInfo.pKernelHeader->PatchListSize;
|
||||
auto pCurPatchListPtr = pPatchList;
|
||||
uint32_t PrivateMemoryStatelessSizeOffset = 0xFFffFFff;
|
||||
uint32_t LocalMemoryStatelessWindowSizeOffset = 0xFFffFFff;
|
||||
uint32_t LocalMemoryStatelessWindowStartAddressOffset = 0xFFffFFff;
|
||||
|
||||
//Speed up containers by giving some pre-allocated storage
|
||||
kernelInfo.kernelArgInfo.reserve(10);
|
||||
kernelInfo.patchInfo.kernelArgumentInfo.reserve(10);
|
||||
kernelInfo.patchInfo.dataParameterBuffers.reserve(20);
|
||||
std::stringstream PatchTokens;
|
||||
|
||||
DBG_LOG(LogPatchTokens, "\nPATCH_TOKENs for kernel", kernelInfo.name);
|
||||
|
||||
while (ptrDiff(pCurPatchListPtr, pPatchList) < patchListSize) {
|
||||
uint32_t index = 0;
|
||||
uint32_t argNum = 0;
|
||||
auto pPatch = reinterpret_cast<const SPatchItemHeader *>(pCurPatchListPtr);
|
||||
const SPatchDataParameterBuffer *pDataParameterBuffer = nullptr;
|
||||
|
||||
switch (pPatch->Token) {
|
||||
case PATCH_TOKEN_SAMPLER_STATE_ARRAY:
|
||||
kernelInfo.patchInfo.samplerStateArray =
|
||||
reinterpret_cast<const SPatchSamplerStateArray *>(pPatch);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.SAMPLER_STATE_ARRAY", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .Offset", kernelInfo.patchInfo.samplerStateArray->Offset,
|
||||
"\n .Count", kernelInfo.patchInfo.samplerStateArray->Count,
|
||||
"\n .BorderColorOffset", kernelInfo.patchInfo.samplerStateArray->BorderColorOffset);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_BINDING_TABLE_STATE:
|
||||
kernelInfo.patchInfo.bindingTableState =
|
||||
reinterpret_cast<const SPatchBindingTableState *>(pPatch);
|
||||
kernelInfo.usesSsh = (kernelInfo.patchInfo.bindingTableState->Count > 0);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.BINDING_TABLE_STATE", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .Offset", kernelInfo.patchInfo.bindingTableState->Offset,
|
||||
"\n .Count", kernelInfo.patchInfo.bindingTableState->Count,
|
||||
"\n .SurfaceStateOffset", kernelInfo.patchInfo.bindingTableState->SurfaceStateOffset);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE:
|
||||
kernelInfo.patchInfo.localsurface =
|
||||
reinterpret_cast<const SPatchAllocateLocalSurface *>(pPatch);
|
||||
kernelInfo.workloadInfo.slmStaticSize = kernelInfo.patchInfo.localsurface->TotalInlineLocalMemorySize;
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.ALLOCATE_LOCAL_SURFACE", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .TotalInlineLocalMemorySize", kernelInfo.patchInfo.localsurface->TotalInlineLocalMemorySize);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_MEDIA_VFE_STATE:
|
||||
kernelInfo.patchInfo.mediavfestate =
|
||||
reinterpret_cast<const SPatchMediaVFEState *>(pPatch);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.MEDIA_VFE_STATE", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ScratchSpaceOffset", kernelInfo.patchInfo.mediavfestate->ScratchSpaceOffset,
|
||||
"\n .PerThreadScratchSpace", kernelInfo.patchInfo.mediavfestate->PerThreadScratchSpace);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_DATA_PARAMETER_BUFFER:
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.DATA_PARAMETER_BUFFER", pPatch->Token,
|
||||
"\n .Size", pPatch->Size);
|
||||
|
||||
pDataParameterBuffer = reinterpret_cast<const SPatchDataParameterBuffer *>(pPatch);
|
||||
kernelInfo.patchInfo.dataParameterBuffers.push_back(
|
||||
pDataParameterBuffer);
|
||||
argNum = pDataParameterBuffer->ArgumentNumber;
|
||||
switch (pDataParameterBuffer->Type) {
|
||||
case DATA_PARAMETER_KERNEL_ARGUMENT:
|
||||
kernelInfo.storeKernelArgument(pDataParameterBuffer);
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "KERNEL_ARGUMENT");
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_LOCAL_WORK_SIZE: {
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "LOCAL_WORK_SIZE");
|
||||
index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t);
|
||||
if (kernelInfo.workloadInfo.localWorkSizeOffsets[2] == WorkloadInfo::undefinedOffset) {
|
||||
kernelInfo.workloadInfo.localWorkSizeOffsets[index] =
|
||||
pDataParameterBuffer->Offset;
|
||||
} else {
|
||||
kernelInfo.workloadInfo.localWorkSizeOffsets2[index] =
|
||||
pDataParameterBuffer->Offset;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case DATA_PARAMETER_GLOBAL_WORK_OFFSET:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "GLOBAL_WORK_OFFSET");
|
||||
index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t);
|
||||
kernelInfo.workloadInfo.globalWorkOffsetOffsets[index] =
|
||||
pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "ENQUEUED_LOCAL_WORK_SIZE");
|
||||
index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t);
|
||||
kernelInfo.workloadInfo.enqueuedLocalWorkSizeOffsets[index] =
|
||||
pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_GLOBAL_WORK_SIZE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "GLOBAL_WORK_SIZE");
|
||||
index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t);
|
||||
kernelInfo.workloadInfo.globalWorkSizeOffsets[index] =
|
||||
pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_NUM_WORK_GROUPS:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "NUM_WORK_GROUPS");
|
||||
index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t);
|
||||
kernelInfo.workloadInfo.numWorkGroupsOffset[index] =
|
||||
pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_MAX_WORKGROUP_SIZE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "MAX_WORKGROUP_SIZE");
|
||||
kernelInfo.workloadInfo.maxWorkGroupSizeOffset = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_WORK_DIMENSIONS:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "WORK_DIMENSIONS");
|
||||
kernelInfo.workloadInfo.workDimOffset = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES: {
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
kernelArgPatchInfo.size = pDataParameterBuffer->DataSize;
|
||||
kernelArgPatchInfo.crossthreadOffset = pDataParameterBuffer->Offset;
|
||||
|
||||
kernelInfo.kernelArgInfo[argNum].slmAlignment = pDataParameterBuffer->SourceOffset;
|
||||
kernelInfo.kernelArgInfo[argNum].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo);
|
||||
} break;
|
||||
|
||||
case DATA_PARAMETER_IMAGE_WIDTH:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_WIDTH");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetImgWidth = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_IMAGE_HEIGHT:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_HEIGHT");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetImgHeight = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_IMAGE_DEPTH:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_DEPTH");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetImgDepth = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_COORDINATE_SNAP_WA_REQUIRED");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetSamplerSnapWa = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
case DATA_PARAMETER_SAMPLER_ADDRESS_MODE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_ADDRESS_MODE");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetSamplerAddressingMode = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
case DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_ADDRESS_MODE");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetSamplerNormalizedCoords = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
case DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_ADDRESS_MODE");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetChannelDataType = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_IMAGE_CHANNEL_ORDER:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_CHANNEL_ORDER");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetChannelOrder = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
case DATA_PARAMETER_IMAGE_ARRAY_SIZE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_ARRAY_SIZE");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetArraySize = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_OBJECT_ID:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "OBJECT_ID");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetObjectId = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_SIMD_SIZE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "SIMD_SIZE");
|
||||
kernelInfo.workloadInfo.simdSizeOffset = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_PARENT_EVENT:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "PARENT_EVENT");
|
||||
kernelInfo.workloadInfo.parentEventOffset = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "CHILD_BLOCK_SIMD_SIZE");
|
||||
kernelInfo.childrenKernelsIdOffset.push_back({argNum, pDataParameterBuffer->Offset});
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "PRIVATE_MEMORY_STATELESS_SIZE");
|
||||
PrivateMemoryStatelessSizeOffset = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "LOCAL_MEMORY_STATELESS_WINDOW_SIZE");
|
||||
LocalMemoryStatelessWindowSizeOffset = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS");
|
||||
LocalMemoryStatelessWindowStartAddressOffset = pDataParameterBuffer->Offset;
|
||||
pDevice->prepareSLMWindow();
|
||||
break;
|
||||
case DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "PREFERRED_WORKGROUP_MULTIPLE");
|
||||
kernelInfo.workloadInfo.prefferedWkgMultipleOffset = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
case DATA_PARAMETER_BUFFER_OFFSET:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "DATA_PARAMETER_BUFFER_OFFSET");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetBufferOffset = pDataParameterBuffer->Offset;
|
||||
break;
|
||||
case DATA_PARAMETER_NUM_HARDWARE_THREADS:
|
||||
case DATA_PARAMETER_PRINTF_SURFACE_SIZE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled", pDataParameterBuffer->Type);
|
||||
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"Program::parsePatchList.Unhandled Data parameter: %d\n", pDataParameterBuffer->Type);
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_VME_MB_BLOCK_TYPE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "VME_MB_BLOCK_TYPE");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetVmeMbBlockType = pDataParameterBuffer->Offset;
|
||||
DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t));
|
||||
break;
|
||||
case DATA_PARAMETER_VME_SUBPIXEL_MODE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "VME_SUBPIXEL_MODE");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetVmeSubpixelMode = pDataParameterBuffer->Offset;
|
||||
DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t));
|
||||
break;
|
||||
case DATA_PARAMETER_VME_SAD_ADJUST_MODE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "VME_SAD_ADJUST_MODE");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetVmeSadAdjustMode = pDataParameterBuffer->Offset;
|
||||
DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t));
|
||||
break;
|
||||
case DATA_PARAMETER_VME_SEARCH_PATH_TYPE:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "VME_SEARCH_PATH_TYPE");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetVmeSearchPathType = pDataParameterBuffer->Offset;
|
||||
DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t));
|
||||
break;
|
||||
case DATA_PARAMETER_IMAGE_NUM_SAMPLES:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_NUM_SAMPLES");
|
||||
kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
kernelInfo.kernelArgInfo[argNum].offsetNumSamples = pDataParameterBuffer->Offset;
|
||||
DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t));
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS:
|
||||
case DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER:
|
||||
case DATA_PARAMETER_STAGE_IN_GRID_ORIGIN:
|
||||
case DATA_PARAMETER_STAGE_IN_GRID_SIZE:
|
||||
break;
|
||||
|
||||
case DATA_PARAMETER_LOCAL_ID:
|
||||
case DATA_PARAMETER_EXECUTION_MASK:
|
||||
case DATA_PARAMETER_VME_IMAGE_TYPE:
|
||||
case DATA_PARAMETER_VME_MB_SKIP_BLOCK_TYPE:
|
||||
break;
|
||||
|
||||
default:
|
||||
DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled", pDataParameterBuffer->Type);
|
||||
DEBUG_BREAK_IF(true);
|
||||
}
|
||||
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n .ArgumentNumber", pDataParameterBuffer->ArgumentNumber,
|
||||
"\n .Offset", pDataParameterBuffer->Offset,
|
||||
"\n .DataSize", pDataParameterBuffer->DataSize,
|
||||
"\n .SourceOffset", pDataParameterBuffer->SourceOffset);
|
||||
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD:
|
||||
kernelInfo.patchInfo.interfaceDescriptorDataLoad =
|
||||
reinterpret_cast<const SPatchMediaInterfaceDescriptorLoad *>(pPatch);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.MEDIA_INTERFACE_DESCRIPTOR_LOAD", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .InterfaceDescriptorDataOffset", kernelInfo.patchInfo.interfaceDescriptorDataLoad->InterfaceDescriptorDataOffset);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA:
|
||||
kernelInfo.patchInfo.interfaceDescriptorData =
|
||||
reinterpret_cast<const SPatchInterfaceDescriptorData *>(pPatch);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.INTERFACE_DESCRIPTOR_DATA", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .Offset", kernelInfo.patchInfo.interfaceDescriptorData->Offset,
|
||||
"\n .SamplerStateOffset", kernelInfo.patchInfo.interfaceDescriptorData->SamplerStateOffset,
|
||||
"\n .KernelOffset", kernelInfo.patchInfo.interfaceDescriptorData->KernelOffset,
|
||||
"\n .BindingTableOffset", kernelInfo.patchInfo.interfaceDescriptorData->BindingTableOffset);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_THREAD_PAYLOAD:
|
||||
kernelInfo.patchInfo.threadPayload =
|
||||
reinterpret_cast<const SPatchThreadPayload *>(pPatch);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.THREAD_PAYLOAD", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .HeaderPresent", kernelInfo.patchInfo.threadPayload->HeaderPresent,
|
||||
"\n .LocalIDXPresent", kernelInfo.patchInfo.threadPayload->LocalIDXPresent,
|
||||
"\n .LocalIDYPresent", kernelInfo.patchInfo.threadPayload->LocalIDYPresent,
|
||||
"\n .LocalIDZPresent", kernelInfo.patchInfo.threadPayload->LocalIDZPresent,
|
||||
"\n .LocalIDFlattenedPresent", kernelInfo.patchInfo.threadPayload->LocalIDFlattenedPresent,
|
||||
"\n .IndirectPayloadStorage", kernelInfo.patchInfo.threadPayload->IndirectPayloadStorage,
|
||||
"\n .UnusedPerThreadConstantPresent", kernelInfo.patchInfo.threadPayload->UnusedPerThreadConstantPresent,
|
||||
"\n .GetLocalIDPresent", kernelInfo.patchInfo.threadPayload->GetLocalIDPresent,
|
||||
"\n .GetGroupIDPresent", kernelInfo.patchInfo.threadPayload->GetGroupIDPresent,
|
||||
"\n .GetGlobalOffsetPresent", kernelInfo.patchInfo.threadPayload->GetGlobalOffsetPresent);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_EXECUTION_ENVIRONMENT:
|
||||
kernelInfo.patchInfo.executionEnvironment =
|
||||
reinterpret_cast<const SPatchExecutionEnvironment *>(pPatch);
|
||||
if (kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeX != 0) {
|
||||
kernelInfo.reqdWorkGroupSize[0] = kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
|
||||
kernelInfo.reqdWorkGroupSize[1] = kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
|
||||
kernelInfo.reqdWorkGroupSize[2] = kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
|
||||
DEBUG_BREAK_IF(!(kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeY > 0));
|
||||
DEBUG_BREAK_IF(!(kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeZ > 0));
|
||||
}
|
||||
if (kernelInfo.patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers == false) {
|
||||
kernelInfo.requiresSshForBuffers = true;
|
||||
}
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.EXECUTION_ENVIRONMENT", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .RequiredWorkGroupSizeX", kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeX,
|
||||
"\n .RequiredWorkGroupSizeY", kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeY,
|
||||
"\n .RequiredWorkGroupSizeZ", kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeZ,
|
||||
"\n .LargestCompiledSIMDSize", kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize,
|
||||
"\n .CompiledSubGroupsNumber", kernelInfo.patchInfo.executionEnvironment->CompiledSubGroupsNumber,
|
||||
"\n .HasBarriers", kernelInfo.patchInfo.executionEnvironment->HasBarriers,
|
||||
"\n .DisableMidThreadPreemption", kernelInfo.patchInfo.executionEnvironment->DisableMidThreadPreemption,
|
||||
"\n .CompiledSIMD8", kernelInfo.patchInfo.executionEnvironment->CompiledSIMD8,
|
||||
"\n .CompiledSIMD16", kernelInfo.patchInfo.executionEnvironment->CompiledSIMD16,
|
||||
"\n .CompiledSIMD32", kernelInfo.patchInfo.executionEnvironment->CompiledSIMD32,
|
||||
"\n .HasDeviceEnqueue", kernelInfo.patchInfo.executionEnvironment->HasDeviceEnqueue,
|
||||
"\n .MayAccessUndeclaredResource", kernelInfo.patchInfo.executionEnvironment->MayAccessUndeclaredResource,
|
||||
"\n .UsesFencesForReadWriteImages", kernelInfo.patchInfo.executionEnvironment->UsesFencesForReadWriteImages,
|
||||
"\n .UsesStatelessSpillFill", kernelInfo.patchInfo.executionEnvironment->UsesStatelessSpillFill,
|
||||
"\n .IsCoherent", kernelInfo.patchInfo.executionEnvironment->IsCoherent,
|
||||
"\n .SubgroupIndependentForwardProgressRequired", kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_DATA_PARAMETER_STREAM:
|
||||
kernelInfo.patchInfo.dataParameterStream =
|
||||
reinterpret_cast<const SPatchDataParameterStream *>(pPatch);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.DATA_PARAMETER_STREAM", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .DataParameterStreamSize", kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_KERNEL_ARGUMENT_INFO: {
|
||||
auto pkernelArgInfo = reinterpret_cast<const SPatchKernelArgumentInfo *>(pPatch);
|
||||
kernelInfo.storeArgInfo(pkernelArgInfo);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.KERNEL_ARGUMENT_INFO", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ArgumentNumber", pkernelArgInfo->ArgumentNumber,
|
||||
"\n .AddressQualifierSize", pkernelArgInfo->AddressQualifierSize,
|
||||
"\n .AccessQualifierSize", pkernelArgInfo->AccessQualifierSize,
|
||||
"\n .ArgumentNameSize", pkernelArgInfo->ArgumentNameSize,
|
||||
"\n .TypeNameSize", pkernelArgInfo->TypeNameSize,
|
||||
"\n .TypeQualifierSize", pkernelArgInfo->TypeQualifierSize);
|
||||
break;
|
||||
}
|
||||
|
||||
case PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO:
|
||||
kernelInfo.patchInfo.pKernelAttributesInfo =
|
||||
reinterpret_cast<const SPatchKernelAttributesInfo *>(pPatch);
|
||||
kernelInfo.storePatchToken(kernelInfo.patchInfo.pKernelAttributesInfo);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.KERNEL_ATTRIBUTES_INFO", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .AttributesSize", kernelInfo.patchInfo.pKernelAttributesInfo->AttributesSize);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT: {
|
||||
const SPatchSamplerKernelArgument *pSamplerKernelObjectKernelArg = nullptr;
|
||||
|
||||
pSamplerKernelObjectKernelArg = reinterpret_cast<const SPatchSamplerKernelArgument *>(pPatch);
|
||||
kernelInfo.storeKernelArgument(pSamplerKernelObjectKernelArg);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.SAMPLER_KERNEL_ARGUMENT", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ArgumentNumber", pSamplerKernelObjectKernelArg->ArgumentNumber,
|
||||
"\n .Type", pSamplerKernelObjectKernelArg->Type,
|
||||
"\n .Offset", pSamplerKernelObjectKernelArg->Offset);
|
||||
};
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT: {
|
||||
const SPatchImageMemoryObjectKernelArgument *pImageMemObjectKernelArg = nullptr;
|
||||
|
||||
pImageMemObjectKernelArg =
|
||||
reinterpret_cast<const SPatchImageMemoryObjectKernelArgument *>(pPatch);
|
||||
kernelInfo.storeKernelArgument(pImageMemObjectKernelArg);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ArgumentNumber", pImageMemObjectKernelArg->ArgumentNumber,
|
||||
"\n .Type", pImageMemObjectKernelArg->Type,
|
||||
"\n .Offset", pImageMemObjectKernelArg->Offset,
|
||||
"\n .LocationIndex", pImageMemObjectKernelArg->LocationIndex,
|
||||
"\n .LocationIndex2", pImageMemObjectKernelArg->LocationIndex2,
|
||||
"\n .Transformable", pImageMemObjectKernelArg->Transformable);
|
||||
};
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: {
|
||||
const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjectKernelArg = nullptr;
|
||||
pGlobalMemObjectKernelArg =
|
||||
reinterpret_cast<const SPatchGlobalMemoryObjectKernelArgument *>(pPatch);
|
||||
kernelInfo.storeKernelArgument(pGlobalMemObjectKernelArg);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ArgumentNumber", pGlobalMemObjectKernelArg->ArgumentNumber,
|
||||
"\n .Offset", pGlobalMemObjectKernelArg->Offset,
|
||||
"\n .LocationIndex", pGlobalMemObjectKernelArg->LocationIndex,
|
||||
"\n .LocationIndex2", pGlobalMemObjectKernelArg->LocationIndex2);
|
||||
};
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: {
|
||||
const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalMemObjKernelArg = nullptr;
|
||||
|
||||
pStatelessGlobalMemObjKernelArg =
|
||||
reinterpret_cast<const SPatchStatelessGlobalMemoryObjectKernelArgument *>(pPatch);
|
||||
kernelInfo.storeKernelArgument(pStatelessGlobalMemObjKernelArg);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ArgumentNumber", pStatelessGlobalMemObjKernelArg->ArgumentNumber,
|
||||
"\n .SurfaceStateHeapOffset", pStatelessGlobalMemObjKernelArg->SurfaceStateHeapOffset,
|
||||
"\n .DataParamOffset", pStatelessGlobalMemObjKernelArg->DataParamOffset,
|
||||
"\n .DataParamSize", pStatelessGlobalMemObjKernelArg->DataParamSize,
|
||||
"\n .LocationIndex", pStatelessGlobalMemObjKernelArg->LocationIndex,
|
||||
"\n .LocationIndex2", pStatelessGlobalMemObjKernelArg->LocationIndex2);
|
||||
};
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT: {
|
||||
const SPatchStatelessConstantMemoryObjectKernelArgument *pPatchToken = reinterpret_cast<const SPatchStatelessConstantMemoryObjectKernelArgument *>(pPatch);
|
||||
kernelInfo.storeKernelArgument(pPatchToken);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ArgumentNumber", pPatchToken->ArgumentNumber,
|
||||
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
|
||||
"\n .DataParamOffset", pPatchToken->DataParamOffset,
|
||||
"\n .DataParamSize", pPatchToken->DataParamSize);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT: {
|
||||
const SPatchStatelessDeviceQueueKernelArgument *pPatchToken = reinterpret_cast<const SPatchStatelessDeviceQueueKernelArgument *>(pPatch);
|
||||
kernelInfo.storeKernelArgument(pPatchToken);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ArgumentNumber", pPatchToken->ArgumentNumber,
|
||||
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
|
||||
"\n .DataParamOffset", pPatchToken->DataParamOffset,
|
||||
"\n .DataParamSize", pPatchToken->DataParamSize);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY: {
|
||||
const SPatchAllocateStatelessPrivateSurface *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessPrivateSurface *>(pPatch);
|
||||
kernelInfo.storePatchToken(pPatchToken);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.ALLOCATE_STATELESS_PRIVATE_MEMORY", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
|
||||
"\n .DataParamOffset", pPatchToken->DataParamOffset,
|
||||
"\n .DataParamSize", pPatchToken->DataParamSize,
|
||||
"\n .PerThreadPrivateMemorySize", pPatchToken->PerThreadPrivateMemorySize);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION: {
|
||||
const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *>(pPatch);
|
||||
kernelInfo.storePatchToken(pPatchToken);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ConstantBufferIndex", pPatchToken->ConstantBufferIndex,
|
||||
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
|
||||
"\n .DataParamOffset", pPatchToken->DataParamOffset,
|
||||
"\n .DataParamSize", pPatchToken->DataParamSize);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION: {
|
||||
const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *>(pPatch);
|
||||
kernelInfo.storePatchToken(pPatchToken);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .GlobalBufferIndex", pPatchToken->GlobalBufferIndex,
|
||||
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
|
||||
"\n .DataParamOffset", pPatchToken->DataParamOffset,
|
||||
"\n .DataParamSize", pPatchToken->DataParamSize);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE: {
|
||||
const SPatchAllocateStatelessPrintfSurface *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessPrintfSurface *>(pPatch);
|
||||
kernelInfo.storePatchToken(pPatchToken);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.ALLOCATE_STATELESS_PRINTF_SURFACE", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .PrintfSurfaceIndex", pPatchToken->PrintfSurfaceIndex,
|
||||
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
|
||||
"\n .DataParamOffset", pPatchToken->DataParamOffset,
|
||||
"\n .DataParamSize", pPatchToken->DataParamSize);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE: {
|
||||
const SPatchAllocateStatelessEventPoolSurface *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessEventPoolSurface *>(pPatch);
|
||||
kernelInfo.storePatchToken(pPatchToken);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.ALLOCATE_STATELESS_EVENT_POOL_SURFACE", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .EventPoolSurfaceIndex", pPatchToken->EventPoolSurfaceIndex,
|
||||
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
|
||||
"\n .DataParamOffset", pPatchToken->DataParamOffset,
|
||||
"\n .DataParamSize", pPatchToken->DataParamSize);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE: {
|
||||
const SPatchAllocateStatelessDefaultDeviceQueueSurface *pPatchToken = reinterpret_cast<const SPatchAllocateStatelessDefaultDeviceQueueSurface *>(pPatch);
|
||||
kernelInfo.storePatchToken(pPatchToken);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset,
|
||||
"\n .DataParamOffset", pPatchToken->DataParamOffset,
|
||||
"\n .DataParamSize", pPatchToken->DataParamSize);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_STRING: {
|
||||
const SPatchString *pPatchToken = reinterpret_cast<const SPatchString *>(pPatch);
|
||||
kernelInfo.storePatchToken(pPatchToken);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.STRING", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .Index", pPatchToken->Index,
|
||||
"\n .StringSize", pPatchToken->StringSize);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_INLINE_VME_SAMPLER_INFO:
|
||||
kernelInfo.isVmeWorkload = true;
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.INLINE_VME_SAMPLER_INFO", pPatch->Token,
|
||||
"\n .Size", pPatch->Size);
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_GTPIN_FREE_GRF_INFO: {
|
||||
const SPatchGtpinFreeGRFInfo *pPatchToken = reinterpret_cast<const SPatchGtpinFreeGRFInfo *>(pPatch);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.PATCH_TOKEN_GTPIN_FREE_GRF_INFO", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .BufferSize", pPatchToken->BufferSize);
|
||||
} break;
|
||||
|
||||
case PATCH_TOKEN_STATE_SIP: {
|
||||
const SPatchStateSIP *pPatchToken = reinterpret_cast<const SPatchStateSIP *>(pPatch);
|
||||
kernelInfo.systemKernelOffset = pPatchToken->SystemKernelOffset;
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n.PATCH_TOKEN_STATE_SIP", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .SystemKernelOffset", pPatchToken->SystemKernelOffset);
|
||||
} break;
|
||||
|
||||
default:
|
||||
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, " Program::parsePatchList. Unknown Patch Token: %d\n", pPatch->Token);
|
||||
if (false == isSafeToSkipUnhandledToken(pPatch->Token)) {
|
||||
retVal = CL_INVALID_KERNEL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
pCurPatchListPtr = ptrOffset(pCurPatchListPtr, pPatch->Size);
|
||||
}
|
||||
|
||||
if (retVal == CL_SUCCESS) {
|
||||
retVal = kernelInfo.resolveKernelInfo();
|
||||
}
|
||||
|
||||
if (kernelInfo.patchInfo.dataParameterStream && kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize) {
|
||||
uint32_t crossThreadDataSize = kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize;
|
||||
kernelInfo.crossThreadData = new char[crossThreadDataSize];
|
||||
memset(kernelInfo.crossThreadData, 0x00, crossThreadDataSize);
|
||||
|
||||
if (LocalMemoryStatelessWindowStartAddressOffset != 0xFFffFFff) {
|
||||
*(uintptr_t *)&(kernelInfo.crossThreadData[LocalMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast<uintptr_t>(this->pDevice->getSLMWindowStartAddress());
|
||||
}
|
||||
|
||||
if (LocalMemoryStatelessWindowSizeOffset != 0xFFffFFff) {
|
||||
*(uint32_t *)&(kernelInfo.crossThreadData[LocalMemoryStatelessWindowSizeOffset]) = (uint32_t)this->pDevice->getDeviceInfo().localMemSize;
|
||||
}
|
||||
|
||||
if (kernelInfo.patchInfo.pAllocateStatelessPrivateSurface && (PrivateMemoryStatelessSizeOffset != 0xFFffFFff)) {
|
||||
*(uint32_t *)&(kernelInfo.crossThreadData[PrivateMemoryStatelessSizeOffset]) = kernelInfo.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize * this->getDevice(0).getDeviceInfo().computeUnitsUsedForScratch * kernelInfo.getMaxSimdSize();
|
||||
}
|
||||
|
||||
if (kernelInfo.workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) {
|
||||
*(uint32_t *)&(kernelInfo.crossThreadData[kernelInfo.workloadInfo.maxWorkGroupSizeOffset]) = (uint32_t)this->getDevice(0).getDeviceInfo().maxWorkGroupSize;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int Program::parseProgramScopePatchList() {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
cl_uint surfaceSize = 0;
|
||||
|
||||
auto pPatchList = programScopePatchList;
|
||||
auto patchListSize = programScopePatchListSize;
|
||||
auto pCurPatchListPtr = pPatchList;
|
||||
cl_uint headerSize = 0;
|
||||
|
||||
while (ptrDiff(pCurPatchListPtr, pPatchList) < patchListSize) {
|
||||
auto pPatch = reinterpret_cast<const SPatchItemHeader *>(pCurPatchListPtr);
|
||||
switch (pPatch->Token) {
|
||||
case PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO: {
|
||||
auto patch = *(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo *)pPatch;
|
||||
|
||||
if (constantSurface) {
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(constantSurface);
|
||||
}
|
||||
|
||||
surfaceSize = patch.InlineDataSize;
|
||||
headerSize = sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo);
|
||||
|
||||
constantSurface = pDevice->getMemoryManager()->createGraphicsAllocationWithRequiredBitness(surfaceSize, nullptr);
|
||||
|
||||
memcpy_s(constantSurface->getUnderlyingBuffer(), surfaceSize, (cl_char *)pPatch + headerSize, surfaceSize);
|
||||
pCurPatchListPtr = ptrOffset(pCurPatchListPtr, surfaceSize);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n .ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ConstantBufferIndex", patch.ConstantBufferIndex,
|
||||
"\n .InitializationDataSize", patch.InlineDataSize);
|
||||
};
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO: {
|
||||
auto patch = *(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo *)pPatch;
|
||||
|
||||
if (globalSurface) {
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(globalSurface);
|
||||
}
|
||||
|
||||
surfaceSize = patch.InlineDataSize;
|
||||
globalVarTotalSize += (size_t)surfaceSize;
|
||||
headerSize = sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo);
|
||||
globalSurface = pDevice->getMemoryManager()->createGraphicsAllocationWithRequiredBitness(surfaceSize, nullptr);
|
||||
memcpy_s(globalSurface->getUnderlyingBuffer(), surfaceSize, (cl_char *)pPatch + headerSize, surfaceSize);
|
||||
pCurPatchListPtr = ptrOffset(pCurPatchListPtr, surfaceSize);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n .ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .BufferType", patch.Type,
|
||||
"\n .GlobalBufferIndex", patch.GlobalBufferIndex,
|
||||
"\n .InitializationDataSize", patch.InlineDataSize);
|
||||
};
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO:
|
||||
if (globalSurface != nullptr) {
|
||||
auto patch = *(SPatchGlobalPointerProgramBinaryInfo *)pPatch;
|
||||
if ((patch.GlobalBufferIndex == 0) && (patch.BufferIndex == 0) && (patch.BufferType == PROGRAM_SCOPE_GLOBAL_BUFFER)) {
|
||||
void *pPtr = (void *)((uintptr_t)globalSurface->getUnderlyingBuffer() + (uintptr_t)patch.GlobalPointerOffset);
|
||||
if (globalSurface->is32BitAllocation) {
|
||||
*reinterpret_cast<uint32_t *>(pPtr) += static_cast<uint32_t>(globalSurface->getGpuAddressToPatch());
|
||||
} else {
|
||||
*reinterpret_cast<uintptr_t *>(pPtr) += reinterpret_cast<uintptr_t>(globalSurface->getUnderlyingBuffer());
|
||||
}
|
||||
} else {
|
||||
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Program::parseProgramScopePatchList. Unhandled Data parameter: %d\n", pPatch->Token);
|
||||
}
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n .GLOBAL_POINTER_PROGRAM_BINARY_INFO", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .GlobalBufferIndex", patch.GlobalBufferIndex,
|
||||
"\n .GlobalPointerOffset", patch.GlobalPointerOffset,
|
||||
"\n .BufferType", patch.BufferType,
|
||||
"\n .BufferIndex", patch.BufferIndex);
|
||||
}
|
||||
break;
|
||||
|
||||
case PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO:
|
||||
if (constantSurface != nullptr) {
|
||||
auto patch = *(SPatchConstantPointerProgramBinaryInfo *)pPatch;
|
||||
if ((patch.ConstantBufferIndex == 0) && (patch.BufferIndex == 0) && (patch.BufferType == PROGRAM_SCOPE_CONSTANT_BUFFER)) {
|
||||
void *pPtr = (uintptr_t *)((uintptr_t)constantSurface->getUnderlyingBuffer() + (uintptr_t)patch.ConstantPointerOffset);
|
||||
if (constantSurface->is32BitAllocation) {
|
||||
*reinterpret_cast<uint32_t *>(pPtr) += static_cast<uint32_t>(constantSurface->getGpuAddressToPatch());
|
||||
} else {
|
||||
*reinterpret_cast<uintptr_t *>(pPtr) += reinterpret_cast<uintptr_t>(constantSurface->getUnderlyingBuffer());
|
||||
}
|
||||
|
||||
} else {
|
||||
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Program::parseProgramScopePatchList. Unhandled Data parameter: %d\n", pPatch->Token);
|
||||
}
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n .CONSTANT_POINTER_PROGRAM_BINARY_INFO", pPatch->Token,
|
||||
"\n .Size", pPatch->Size,
|
||||
"\n .ConstantBufferIndex", patch.ConstantBufferIndex,
|
||||
"\n .ConstantPointerOffset", patch.ConstantPointerOffset,
|
||||
"\n .BufferType", patch.BufferType,
|
||||
"\n .BufferIndex", patch.BufferIndex);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
if (false == isSafeToSkipUnhandledToken(pPatch->Token)) {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
}
|
||||
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, " Program::parseProgramScopePatchList. Unknown Patch Token: %d\n", pPatch->Token);
|
||||
DBG_LOG(LogPatchTokens,
|
||||
"\n .Program Unknown Patch Token", pPatch->Token,
|
||||
"\n .Size", pPatch->Size);
|
||||
}
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
pCurPatchListPtr = ptrOffset(pCurPatchListPtr, pPatch->Size);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int Program::processGenBinary() {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
for (auto &i : kernelInfoArray)
|
||||
delete i;
|
||||
kernelInfoArray.clear();
|
||||
|
||||
do {
|
||||
if (!genBinary || genBinarySize == 0) {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
break;
|
||||
}
|
||||
|
||||
auto pCurBinaryPtr = genBinary;
|
||||
auto pGenBinaryHeader = reinterpret_cast<const SProgramBinaryHeader *>(pCurBinaryPtr);
|
||||
if (!validateGenBinaryHeader(pGenBinaryHeader)) {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
break;
|
||||
}
|
||||
|
||||
pCurBinaryPtr = ptrOffset(pCurBinaryPtr, sizeof(SProgramBinaryHeader));
|
||||
programScopePatchList = pCurBinaryPtr;
|
||||
programScopePatchListSize = pGenBinaryHeader->PatchListSize;
|
||||
|
||||
if (programScopePatchListSize != 0u) {
|
||||
retVal = parseProgramScopePatchList();
|
||||
}
|
||||
|
||||
pCurBinaryPtr = ptrOffset(pCurBinaryPtr, pGenBinaryHeader->PatchListSize);
|
||||
|
||||
auto numKernels = pGenBinaryHeader->NumberOfKernels;
|
||||
for (uint32_t i = 0; i < numKernels && retVal == CL_SUCCESS; i++) {
|
||||
|
||||
size_t bytesProcessed = processKernel(pCurBinaryPtr, retVal);
|
||||
pCurBinaryPtr = ptrOffset(pCurBinaryPtr, bytesProcessed);
|
||||
}
|
||||
} while (false);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool Program::validateGenBinaryDevice(GFXCORE_FAMILY device) const {
|
||||
bool isValid = familyEnabled[device];
|
||||
|
||||
return isValid;
|
||||
}
|
||||
|
||||
bool Program::validateGenBinaryHeader(const iOpenCL::SProgramBinaryHeader *pGenBinaryHeader) const {
|
||||
return pGenBinaryHeader->Magic == MAGIC_CL &&
|
||||
pGenBinaryHeader->Version == CURRENT_ICBE_VERSION &&
|
||||
validateGenBinaryDevice(static_cast<GFXCORE_FAMILY>(pGenBinaryHeader->Device));
|
||||
}
|
||||
} // namespace OCLRT
|
57
runtime/program/process_spir_binary.cpp
Normal file
57
runtime/program/process_spir_binary.cpp
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "program.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
bool Program::isValidSpirvBinary(
|
||||
const void *pBinary,
|
||||
size_t binarySize) {
|
||||
|
||||
const uint32_t magicWord[2] = {0x03022307, 0x07230203};
|
||||
bool retVal = false;
|
||||
|
||||
if (pBinary && (binarySize > sizeof(uint32_t))) {
|
||||
if ((memcmp(pBinary, &magicWord[0], sizeof(uint32_t)) == 0) ||
|
||||
(memcmp(pBinary, &magicWord[1], sizeof(uint32_t)) == 0)) {
|
||||
retVal = true;
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int Program::processSpirBinary(
|
||||
const void *pBinary,
|
||||
size_t binarySize,
|
||||
bool isSpirV) {
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE;
|
||||
|
||||
std::string binaryString(static_cast<const char *>(pBinary), binarySize);
|
||||
sourceCode.swap(binaryString);
|
||||
|
||||
buildStatus = CL_BUILD_NONE;
|
||||
this->isSpirV = isSpirV;
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
443
runtime/program/program.cpp
Normal file
443
runtime/program/program.cpp
Normal file
@ -0,0 +1,443 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "program.h"
|
||||
#include "elf/writer.h"
|
||||
#include "runtime/context/context.h"
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "runtime/compiler_interface/compiler_interface.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
const std::string Program::clOptNameClVer("-cl-std=CL");
|
||||
const std::string Program::clOptNameUniformWgs{"-cl-uniform-work-group-size"};
|
||||
|
||||
Program::Program() : Program(nullptr) {
|
||||
numDevices = 0;
|
||||
}
|
||||
|
||||
Program::Program(Context *context, bool isBuiltIn) : context(context), isBuiltIn(isBuiltIn) {
|
||||
if (this->context && !this->isBuiltIn) {
|
||||
this->context->incRefInternal();
|
||||
}
|
||||
blockKernelManager = new BlockKernelManager();
|
||||
pDevice = context ? context->getDevice(0) : nullptr;
|
||||
numDevices = 1;
|
||||
elfBinary = nullptr;
|
||||
elfBinarySize = 0;
|
||||
genBinary = nullptr;
|
||||
genBinarySize = 0;
|
||||
llvmBinary = nullptr;
|
||||
llvmBinarySize = 0;
|
||||
debugData = nullptr;
|
||||
debugDataSize = 0;
|
||||
buildStatus = CL_BUILD_NONE;
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE;
|
||||
isCreatedFromBinary = false;
|
||||
isProgramBinaryResolved = false;
|
||||
constantSurface = nullptr;
|
||||
globalSurface = nullptr;
|
||||
globalVarTotalSize = 0;
|
||||
programScopePatchListSize = 0;
|
||||
programScopePatchList = nullptr;
|
||||
programOptionVersion = 12u;
|
||||
allowNonUniform = false;
|
||||
char paramValue[32] = {};
|
||||
bool force32BitAddressess = false;
|
||||
|
||||
if (pDevice) {
|
||||
pDevice->getDeviceInfo(CL_DEVICE_VERSION, 32, paramValue, nullptr);
|
||||
if (strstr(paramValue, "2.1")) {
|
||||
internalOptions = "-ocl-version=210 ";
|
||||
} else if (strstr(paramValue, "2.0")) {
|
||||
internalOptions = "-ocl-version=200 ";
|
||||
} else if (strstr(paramValue, "1.2")) {
|
||||
internalOptions = "-ocl-version=120 ";
|
||||
}
|
||||
force32BitAddressess = pDevice->getDeviceInfo().force32BitAddressess;
|
||||
|
||||
if (force32BitAddressess) {
|
||||
internalOptions += "-m32 ";
|
||||
}
|
||||
pDevice->increaseProgramCount();
|
||||
|
||||
bool forceStateless = is32bit | DebugManager.flags.DisableStatelessToStatefulOptimization.get();
|
||||
|
||||
if (force32BitAddressess) {
|
||||
forceStateless = true;
|
||||
}
|
||||
|
||||
if (forceStateless) {
|
||||
internalOptions += "-cl-intel-greater-than-4GB-buffer-required ";
|
||||
}
|
||||
}
|
||||
|
||||
if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get()) {
|
||||
internalOptions += "-cl-intel-has-buffer-offset-arg ";
|
||||
}
|
||||
}
|
||||
|
||||
Program::~Program() {
|
||||
if (context && !isBuiltIn) {
|
||||
context->decRefInternal();
|
||||
}
|
||||
delete[] genBinary;
|
||||
genBinary = nullptr;
|
||||
|
||||
delete[] llvmBinary;
|
||||
llvmBinary = nullptr;
|
||||
|
||||
delete[] debugData;
|
||||
debugData = nullptr;
|
||||
|
||||
delete[] elfBinary;
|
||||
elfBinary = nullptr;
|
||||
elfBinarySize = 0;
|
||||
|
||||
for (auto &i : kernelInfoArray) {
|
||||
delete i;
|
||||
}
|
||||
|
||||
freeBlockPrivateSurfaces();
|
||||
|
||||
delete blockKernelManager;
|
||||
|
||||
if (constantSurface) {
|
||||
auto memoryManager = pDevice->getMemoryManager();
|
||||
memoryManager->freeGraphicsMemory(constantSurface);
|
||||
constantSurface = nullptr;
|
||||
}
|
||||
|
||||
if (globalSurface) {
|
||||
auto memoryManager = pDevice->getMemoryManager();
|
||||
memoryManager->freeGraphicsMemory(globalSurface);
|
||||
globalSurface = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
cl_int Program::createProgramFromBinary(
|
||||
const void *pBinary,
|
||||
size_t binarySize) {
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
uint32_t binaryVersion = iOpenCL::CURRENT_ICBE_VERSION;
|
||||
|
||||
if (Program::isValidLlvmBinary(pBinary, binarySize)) {
|
||||
retVal = processSpirBinary(pBinary, binarySize, false);
|
||||
} else if (Program::isValidSpirvBinary(pBinary, binarySize)) {
|
||||
retVal = processSpirBinary(pBinary, binarySize, true);
|
||||
} else {
|
||||
retVal = processElfBinary(pBinary, binarySize, binaryVersion);
|
||||
if (retVal == CL_SUCCESS) {
|
||||
isCreatedFromBinary = true;
|
||||
} else if (binaryVersion != iOpenCL::CURRENT_ICBE_VERSION) {
|
||||
// Version of compiler used to create program binary is invalid,
|
||||
// needs to recompile program binary from its LLVM (if available).
|
||||
// if recompile fails propagate error retVal from previous function
|
||||
if (!rebuildProgramFromLLVM()) {
|
||||
retVal = CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int Program::rebuildProgramFromLLVM() {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
size_t dataSize;
|
||||
char *pData = nullptr;
|
||||
CLElfLib::CElfWriter *pElfWriter = nullptr;
|
||||
|
||||
do {
|
||||
if (!Program::isValidLlvmBinary(llvmBinary, llvmBinarySize)) {
|
||||
retVal = CL_INVALID_PROGRAM;
|
||||
break;
|
||||
}
|
||||
|
||||
pElfWriter = CLElfLib::CElfWriter::create(CLElfLib::EH_TYPE_OPENCL_OBJECTS, CLElfLib::EH_MACHINE_NONE, 0);
|
||||
|
||||
CLElfLib::SSectionNode sectionNode;
|
||||
sectionNode.Name = "";
|
||||
sectionNode.Type = CLElfLib::SH_TYPE_OPENCL_LLVM_BINARY;
|
||||
sectionNode.Flags = 0;
|
||||
sectionNode.pData = llvmBinary;
|
||||
sectionNode.DataSize = static_cast<unsigned int>(llvmBinarySize);
|
||||
pElfWriter->addSection(§ionNode);
|
||||
|
||||
pElfWriter->resolveBinary(nullptr, dataSize);
|
||||
pData = new char[dataSize];
|
||||
pElfWriter->resolveBinary(pData, dataSize);
|
||||
|
||||
CompilerInterface *pCompilerInterface = getCompilerInterface();
|
||||
if (nullptr == pCompilerInterface) {
|
||||
retVal = CL_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
TranslationArgs inputArgs = {};
|
||||
inputArgs.pInput = pData;
|
||||
inputArgs.InputSize = static_cast<unsigned int>(dataSize);
|
||||
inputArgs.pOptions = options.c_str();
|
||||
inputArgs.OptionsSize = static_cast<unsigned int>(options.length());
|
||||
inputArgs.pInternalOptions = internalOptions.c_str();
|
||||
inputArgs.InternalOptionsSize = static_cast<unsigned int>(internalOptions.length());
|
||||
inputArgs.pTracingOptions = nullptr;
|
||||
inputArgs.TracingOptionsCount = 0;
|
||||
|
||||
retVal = pCompilerInterface->link(*this, inputArgs);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
retVal = processGenBinary();
|
||||
if (retVal != CL_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
|
||||
isCreatedFromBinary = true;
|
||||
isProgramBinaryResolved = true;
|
||||
} while (false);
|
||||
|
||||
CLElfLib::CElfWriter::destroy(pElfWriter);
|
||||
delete[] pData;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Program::getProgramCompilerVersion(
|
||||
SProgramBinaryHeader *pSectionData,
|
||||
uint32_t &binaryVersion) const {
|
||||
if (pSectionData != nullptr) {
|
||||
binaryVersion = pSectionData->Version;
|
||||
}
|
||||
}
|
||||
|
||||
bool Program::isValidLlvmBinary(
|
||||
const void *pBinary,
|
||||
size_t binarySize) {
|
||||
|
||||
const char *pLlvmMagic = "BC\xc0\xde";
|
||||
bool retVal = false;
|
||||
|
||||
if (pBinary && (binarySize > (strlen(pLlvmMagic) + 1))) {
|
||||
if (strstr((char *)pBinary, pLlvmMagic) != nullptr) {
|
||||
retVal = true;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Program::setSource(char *pSourceString) {
|
||||
sourceCode = pSourceString;
|
||||
}
|
||||
|
||||
cl_int Program::getSource(char *&pBinary, unsigned int &dataSize) const {
|
||||
cl_int retVal = CL_INVALID_PROGRAM;
|
||||
pBinary = nullptr;
|
||||
dataSize = 0;
|
||||
if (!sourceCode.empty()) {
|
||||
pBinary = (char *)(sourceCode.c_str());
|
||||
dataSize = (unsigned int)(sourceCode.size());
|
||||
retVal = CL_SUCCESS;
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Program::storeGenBinary(
|
||||
const void *pSrc,
|
||||
const size_t srcSize) {
|
||||
storeBinary(genBinary, genBinarySize, pSrc, srcSize);
|
||||
}
|
||||
|
||||
void Program::storeLlvmBinary(
|
||||
const void *pSrc,
|
||||
const size_t srcSize) {
|
||||
storeBinary(llvmBinary, llvmBinarySize, pSrc, srcSize);
|
||||
}
|
||||
|
||||
void Program::storeDebugData(
|
||||
const void *pSrc,
|
||||
const size_t srcSize) {
|
||||
storeBinary(debugData, debugDataSize, pSrc, srcSize);
|
||||
}
|
||||
|
||||
void Program::storeBinary(
|
||||
char *&pDst,
|
||||
size_t &dstSize,
|
||||
const void *pSrc,
|
||||
const size_t srcSize) {
|
||||
dstSize = 0;
|
||||
|
||||
DEBUG_BREAK_IF(!(pSrc && srcSize > 0));
|
||||
|
||||
delete[] pDst;
|
||||
pDst = new char[srcSize];
|
||||
|
||||
dstSize = (cl_uint)srcSize;
|
||||
memcpy_s(pDst, dstSize, pSrc, srcSize);
|
||||
}
|
||||
|
||||
void Program::updateBuildLog(const Device *pDevice, const char *pErrorString,
|
||||
size_t errorStringSize) {
|
||||
if ((pErrorString == nullptr) || (errorStringSize == 0) || (pErrorString[0] == '\0')) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (pErrorString[errorStringSize - 1] == '\0') {
|
||||
--errorStringSize;
|
||||
}
|
||||
|
||||
auto it = buildLog.find(pDevice);
|
||||
|
||||
if (it == buildLog.end()) {
|
||||
buildLog[pDevice].assign(pErrorString, pErrorString + errorStringSize);
|
||||
return;
|
||||
}
|
||||
|
||||
buildLog[pDevice].append("\n");
|
||||
buildLog[pDevice].append(pErrorString, pErrorString + errorStringSize);
|
||||
}
|
||||
|
||||
const char *Program::getBuildLog(const Device *pDevice) const {
|
||||
const char *entry = nullptr;
|
||||
|
||||
auto it = buildLog.find(pDevice);
|
||||
|
||||
if (it != buildLog.end()) {
|
||||
entry = it->second.c_str();
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
CompilerInterface *Program::getCompilerInterface() const {
|
||||
return CompilerInterface::getInstance();
|
||||
}
|
||||
|
||||
void Program::separateBlockKernels() {
|
||||
if ((0 == parentKernelInfoArray.size()) && (0 == subgroupKernelInfoArray.size())) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto allKernelInfos(kernelInfoArray);
|
||||
kernelInfoArray.clear();
|
||||
for (auto &i : allKernelInfos) {
|
||||
auto end = i->name.rfind("_dispatch_");
|
||||
if (end != std::string::npos) {
|
||||
bool baseKernelFound = false;
|
||||
std::string baseKernelName(i->name, 0, end);
|
||||
for (auto &j : parentKernelInfoArray) {
|
||||
if (j->name.compare(baseKernelName) == 0) {
|
||||
baseKernelFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!baseKernelFound) {
|
||||
for (auto &j : subgroupKernelInfoArray) {
|
||||
if (j->name.compare(baseKernelName) == 0) {
|
||||
baseKernelFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (baseKernelFound) {
|
||||
//Parent or subgroup kernel found -> child kernel
|
||||
blockKernelManager->addBlockKernelInfo(i);
|
||||
} else {
|
||||
kernelInfoArray.push_back(i);
|
||||
}
|
||||
} else {
|
||||
//Regular kernel found
|
||||
kernelInfoArray.push_back(i);
|
||||
}
|
||||
}
|
||||
allKernelInfos.clear();
|
||||
}
|
||||
|
||||
void Program::allocateBlockPrivateSurfaces() {
|
||||
size_t blockCount = blockKernelManager->getCount();
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *info = blockKernelManager->getBlockKernelInfo(i);
|
||||
|
||||
if (info->patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
size_t privateSize = info->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize;
|
||||
|
||||
if (privateSize > 0 && blockKernelManager->getPrivateSurface(i) == nullptr) {
|
||||
privateSize *= getDevice(0).getDeviceInfo().computeUnitsUsedForScratch * info->getMaxSimdSize();
|
||||
|
||||
auto *privateSurface = getDevice(0).getMemoryManager()->createGraphicsAllocationWithRequiredBitness(privateSize, nullptr);
|
||||
blockKernelManager->pushPrivateSurface(privateSurface, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Program::freeBlockPrivateSurfaces() {
|
||||
size_t blockCount = blockKernelManager->getCount();
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
|
||||
auto *privateSurface = blockKernelManager->getPrivateSurface(i);
|
||||
|
||||
if (privateSurface != nullptr) {
|
||||
blockKernelManager->pushPrivateSurface(nullptr, i);
|
||||
getDevice(0).getMemoryManager()->freeGraphicsMemory(privateSurface);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Program::updateNonUniformFlag() {
|
||||
//Look for -cl-std=CL substring and extract value behind which can be 1.2 2.0 2.1 and convert to value
|
||||
auto pos = options.find(clOptNameClVer);
|
||||
if (pos == std::string::npos) {
|
||||
programOptionVersion = 12u; //Default is 1.2
|
||||
} else {
|
||||
std::stringstream ss{options.c_str() + pos + clOptNameClVer.size()};
|
||||
uint32_t majorV, minorV;
|
||||
char dot;
|
||||
ss >> majorV;
|
||||
ss >> dot;
|
||||
ss >> minorV;
|
||||
programOptionVersion = majorV * 10u + minorV;
|
||||
}
|
||||
|
||||
if (programOptionVersion >= 20u && options.find(clOptNameUniformWgs) == std::string::npos) {
|
||||
allowNonUniform = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Program::updateNonUniformFlag(const Program **inputPrograms, size_t numInputPrograms) {
|
||||
bool allowNonUniform = true;
|
||||
for (cl_uint i = 0; i < numInputPrograms; i++) {
|
||||
allowNonUniform = allowNonUniform && inputPrograms[i]->getAllowNonUniform();
|
||||
}
|
||||
this->allowNonUniform = allowNonUniform;
|
||||
}
|
||||
} // namespace OCLRT
|
330
runtime/program/program.h
Normal file
330
runtime/program/program.h
Normal file
@ -0,0 +1,330 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "block_kernel_manager.h"
|
||||
#include "elf/reader.h"
|
||||
#include "kernel_info.h"
|
||||
#include "runtime/api/cl_types.h"
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
#include "runtime/helpers/stdio.h"
|
||||
#include "runtime/helpers/string_helpers.h"
|
||||
#include "igfxfmid.h"
|
||||
#include "patch_list.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#define OCLRT_ALIGN(a, b) ((((a) % (b)) != 0) ? ((a) - ((a) % (b)) + (b)) : (a))
|
||||
|
||||
namespace OCLRT {
|
||||
class Context;
|
||||
class CompilerInterface;
|
||||
template <>
|
||||
struct OpenCLObjectMapper<_cl_program> {
|
||||
typedef class Program DerivedType;
|
||||
};
|
||||
|
||||
bool isSafeToSkipUnhandledToken(unsigned int token);
|
||||
|
||||
class Program : public BaseObject<_cl_program> {
|
||||
public:
|
||||
static const cl_ulong objectMagic = 0x5651C89100AAACFELL;
|
||||
|
||||
// Create program from binary
|
||||
template <typename T = Program>
|
||||
static T *create(
|
||||
cl_context context,
|
||||
cl_uint numDevices,
|
||||
const cl_device_id *deviceList,
|
||||
const size_t *lengths,
|
||||
const unsigned char **binaries,
|
||||
cl_int *binaryStatus,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
// Create program from source
|
||||
template <typename T = Program>
|
||||
static T *create(
|
||||
cl_context context,
|
||||
cl_uint count,
|
||||
const char **strings,
|
||||
const size_t *lengths,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
template <typename T = Program>
|
||||
static T *create(
|
||||
const char *nullTerminatedString,
|
||||
Context *context,
|
||||
Device &device,
|
||||
bool isBuiltIn,
|
||||
cl_int *errcodeRet);
|
||||
|
||||
template <typename T = Program>
|
||||
static T *createFromGenBinary(
|
||||
Context *context,
|
||||
const void *binary,
|
||||
size_t size,
|
||||
bool isBuiltIn,
|
||||
cl_int *errcodeRet) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
T *program = nullptr;
|
||||
|
||||
if ((binary == nullptr) || (size == 0)) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
if (CL_SUCCESS == retVal) {
|
||||
program = new T(context, isBuiltIn);
|
||||
program->numDevices = 1;
|
||||
program->storeGenBinary(binary, size);
|
||||
program->isCreatedFromBinary = true;
|
||||
program->programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
|
||||
program->isProgramBinaryResolved = true;
|
||||
program->buildStatus = CL_BUILD_SUCCESS;
|
||||
}
|
||||
|
||||
if (errcodeRet) {
|
||||
*errcodeRet = retVal;
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
template <typename T = Program>
|
||||
static T *createFromIL(Context *context,
|
||||
const void *il,
|
||||
size_t length,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
Program(Context *context, bool isBuiltIn = false);
|
||||
~Program() override;
|
||||
|
||||
Program(const Program &) = delete;
|
||||
Program &operator=(const Program &) = delete;
|
||||
|
||||
cl_int build(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions,
|
||||
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
|
||||
void *userData, bool enableCaching);
|
||||
|
||||
cl_int build(const cl_device_id device, const char *buildOptions, bool enableCaching,
|
||||
std::unordered_map<std::string, BuiltinDispatchInfoBuilder *> &builtinsMap);
|
||||
|
||||
cl_int build(const char *pKernelData, size_t kernelDataSize);
|
||||
|
||||
MOCKABLE_VIRTUAL cl_int processGenBinary();
|
||||
|
||||
cl_int compile(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions,
|
||||
cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames,
|
||||
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
|
||||
void *userData);
|
||||
|
||||
cl_int link(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions,
|
||||
cl_uint numInputPrograms, const cl_program *inputPrograms,
|
||||
void(CL_CALLBACK *funcNotify)(cl_program program, void *userData),
|
||||
void *userData);
|
||||
|
||||
size_t getNumKernels() const;
|
||||
const KernelInfo *getKernelInfo(const char *kernelName) const;
|
||||
const KernelInfo *getKernelInfo(size_t ordinal) const;
|
||||
|
||||
cl_int getInfo(cl_program_info paramName, size_t paramValueSize,
|
||||
void *paramValue, size_t *paramValueSizeRet);
|
||||
|
||||
cl_int getBuildInfo(cl_device_id device, cl_program_build_info paramName,
|
||||
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;
|
||||
|
||||
Context &getContext() const {
|
||||
return *context;
|
||||
}
|
||||
|
||||
Context *getContextPtr() const {
|
||||
return context;
|
||||
}
|
||||
|
||||
const Device &getDevice(cl_uint deviceOrdinal) const {
|
||||
return *pDevice;
|
||||
}
|
||||
|
||||
cl_uint getNumDevices() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL cl_int processElfBinary(const void *pBinary, size_t binarySize, uint32_t &binaryVersion);
|
||||
cl_int processSpirBinary(const void *pBinary, size_t binarySize, bool isSpirV);
|
||||
|
||||
void setSource(char *pSourceString);
|
||||
|
||||
cl_int getSource(char *&pBinary, unsigned int &dataSize) const;
|
||||
|
||||
void storeGenBinary(const void *pSrc, const size_t srcSize);
|
||||
|
||||
char *getGenBinary(size_t &genBinarySize) const {
|
||||
genBinarySize = this->genBinarySize;
|
||||
return this->genBinary;
|
||||
}
|
||||
|
||||
void storeLlvmBinary(const void *pSrc, const size_t srcSize);
|
||||
|
||||
void storeDebugData(const void *pSrc, const size_t srcSize);
|
||||
|
||||
void updateBuildLog(const Device *pDevice, const char *pErrorString, const size_t errorStringSize);
|
||||
|
||||
const char *getBuildLog(const Device *pDevice) const;
|
||||
|
||||
cl_uint getProgramBinaryType() const {
|
||||
return programBinaryType;
|
||||
}
|
||||
|
||||
bool getIsSpirV() const {
|
||||
return isSpirV;
|
||||
}
|
||||
|
||||
size_t getProgramScopePatchListSize() const {
|
||||
return programScopePatchListSize;
|
||||
}
|
||||
|
||||
GraphicsAllocation *getConstantSurface() const {
|
||||
return constantSurface;
|
||||
}
|
||||
|
||||
GraphicsAllocation *getGlobalSurface() const {
|
||||
return globalSurface;
|
||||
}
|
||||
|
||||
BlockKernelManager *getBlockKernelManager() const {
|
||||
return blockKernelManager;
|
||||
}
|
||||
|
||||
void allocateBlockPrivateSurfaces();
|
||||
void freeBlockPrivateSurfaces();
|
||||
|
||||
const std::string &getOptions() const { return options; }
|
||||
|
||||
const std::string &getInternalOptions() const { return internalOptions; }
|
||||
|
||||
bool getAllowNonUniform() const {
|
||||
return allowNonUniform;
|
||||
}
|
||||
bool getIsBuiltIn() const {
|
||||
return isBuiltIn;
|
||||
}
|
||||
uint32_t getProgramOptionVersion() const {
|
||||
return programOptionVersion;
|
||||
}
|
||||
|
||||
static bool isValidLlvmBinary(const void *pBinary, size_t binarySize);
|
||||
static bool isValidSpirvBinary(const void *pBinary, size_t binarySize);
|
||||
|
||||
protected:
|
||||
Program();
|
||||
|
||||
MOCKABLE_VIRTUAL bool isSafeToSkipUnhandledToken(unsigned int token) const;
|
||||
|
||||
MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize);
|
||||
|
||||
bool optionsAreNew(const char *options) const;
|
||||
|
||||
cl_int processElfHeader(const CLElfLib::SElf64Header *pElfHeader,
|
||||
cl_program_binary_type &binaryType, uint32_t &numSections);
|
||||
|
||||
void getProgramCompilerVersion(SProgramBinaryHeader *pSectionData, uint32_t &binaryVersion) const;
|
||||
|
||||
cl_int resolveProgramBinary();
|
||||
|
||||
cl_int parseProgramScopePatchList();
|
||||
|
||||
MOCKABLE_VIRTUAL cl_int rebuildProgramFromLLVM();
|
||||
|
||||
cl_int parsePatchList(KernelInfo &pKernelInfo);
|
||||
|
||||
size_t processKernel(const void *pKernelBlob, cl_int &retVal);
|
||||
|
||||
void storeBinary(char *&pDst, size_t &dstSize, const void *pSrc, const size_t srcSize);
|
||||
|
||||
bool validateGenBinaryDevice(GFXCORE_FAMILY device) const;
|
||||
bool validateGenBinaryHeader(const iOpenCL::SProgramBinaryHeader *pGenBinaryHeader) const;
|
||||
|
||||
std::string getKernelNamesString() const;
|
||||
|
||||
MOCKABLE_VIRTUAL CompilerInterface *getCompilerInterface() const;
|
||||
|
||||
void separateBlockKernels();
|
||||
|
||||
void updateNonUniformFlag();
|
||||
void updateNonUniformFlag(const Program **inputProgram, size_t numInputPrograms);
|
||||
|
||||
static const std::string clOptNameClVer;
|
||||
static const std::string clOptNameUniformWgs;
|
||||
// clang-format off
|
||||
cl_program_binary_type programBinaryType;
|
||||
bool isSpirV = false;
|
||||
char* elfBinary;
|
||||
size_t elfBinarySize;
|
||||
|
||||
char* genBinary;
|
||||
size_t genBinarySize;
|
||||
|
||||
char* llvmBinary;
|
||||
size_t llvmBinarySize;
|
||||
|
||||
char* debugData;
|
||||
size_t debugDataSize;
|
||||
|
||||
std::vector<KernelInfo*> kernelInfoArray;
|
||||
std::vector<KernelInfo*> parentKernelInfoArray;
|
||||
std::vector<KernelInfo*> subgroupKernelInfoArray;
|
||||
BlockKernelManager * blockKernelManager;
|
||||
|
||||
const void* programScopePatchList;
|
||||
size_t programScopePatchListSize;
|
||||
|
||||
GraphicsAllocation* constantSurface;
|
||||
GraphicsAllocation* globalSurface;
|
||||
|
||||
size_t globalVarTotalSize;
|
||||
|
||||
cl_build_status buildStatus;
|
||||
bool isCreatedFromBinary;
|
||||
bool isProgramBinaryResolved;
|
||||
|
||||
std::string sourceCode;
|
||||
std::string options;
|
||||
std::string internalOptions;
|
||||
std::string hashFileName;
|
||||
std::string hashFilePath;
|
||||
|
||||
uint32_t programOptionVersion;
|
||||
bool allowNonUniform;
|
||||
|
||||
std::map<const Device*, std::string> buildLog;
|
||||
|
||||
Context* context;
|
||||
Device* pDevice;
|
||||
cl_uint numDevices;
|
||||
|
||||
bool isBuiltIn;
|
||||
|
||||
friend class OfflineCompiler;
|
||||
// clang-format on
|
||||
};
|
||||
} // namespace OCLRT
|
Reference in New Issue
Block a user