Program refactor

* decouple program_info
* move global allocations relocation to linker
* remove obsolete tests
* initial cleanup to kernel_info kernelInfo
* unified patchtoken validation

Change-Id: I0567cd6d607b4f3cf44e6caf33681f6210760f76
This commit is contained in:
Jaroslaw Chodor
2020-01-11 18:25:26 +01:00
committed by sys_ocldev
parent 570b09850d
commit f057712fa7
68 changed files with 3442 additions and 1705 deletions

View File

@@ -35,6 +35,7 @@
#include "runtime/helpers/queue_helpers.h"
#include "runtime/helpers/validators.h"
#include "runtime/kernel/kernel.h"
#include "runtime/kernel/kernel_info_cl.h"
#include "runtime/mem_obj/buffer.h"
#include "runtime/mem_obj/image.h"
#include "runtime/mem_obj/mem_obj_helper.h"
@@ -1576,11 +1577,6 @@ cl_kernel CL_API_CALL clCreateKernel(cl_program clProgram,
break;
}
if (pKernelInfo->isValid == false) {
retVal = CL_INVALID_PROGRAM_EXECUTABLE;
break;
}
kernel = Kernel::create(
pProgram,
*pKernelInfo,
@@ -1592,9 +1588,7 @@ cl_kernel CL_API_CALL clCreateKernel(cl_program clProgram,
if (errcodeRet) {
*errcodeRet = retVal;
}
if (kernel != nullptr) {
gtpinNotifyKernelCreate(kernel);
}
gtpinNotifyKernelCreate(kernel);
TRACING_EXIT(clCreateKernel, &kernel);
return kernel;
}
@@ -1621,17 +1615,14 @@ cl_int CL_API_CALL clCreateKernelsInProgram(cl_program clProgram,
return retVal;
}
for (unsigned int ordinal = 0; ordinal < numKernelsInProgram; ++ordinal) {
const auto kernelInfo = program->getKernelInfo(ordinal);
for (unsigned int i = 0; i < numKernelsInProgram; ++i) {
const auto kernelInfo = program->getKernelInfo(i);
DEBUG_BREAK_IF(kernelInfo == nullptr);
DEBUG_BREAK_IF(!kernelInfo->isValid);
kernels[ordinal] = Kernel::create(
kernels[i] = Kernel::create(
program,
*kernelInfo,
nullptr);
if (kernels[ordinal] != nullptr) {
gtpinNotifyKernelCreate(kernels[ordinal]);
}
gtpinNotifyKernelCreate(kernels[i]);
}
}
@@ -4367,7 +4358,7 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
return retVal;
}
cl_int kernelArgAddressQualifier = pKernel->getKernelArgAddressQualifier(argIndex);
cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo().kernelArgInfo[argIndex].metadata.addressQualifier);
if ((kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_GLOBAL) &&
(kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_CONSTANT)) {
retVal = CL_INVALID_ARG_VALUE;
@@ -5140,7 +5131,7 @@ cl_int CL_API_CALL clGetDeviceGlobalVariablePointerINTEL(
Program *pProgram = (Program *)(program);
const auto &symbols = pProgram->getSymbols();
auto symbolIt = symbols.find(globalVariableName);
if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.type == NEO::SymbolInfo::Function)) {
if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment == NEO::SegmentType::Instructions)) {
retVal = CL_INVALID_ARG_VALUE;
} else {
if (globalVariableSizeRet != nullptr) {
@@ -5172,7 +5163,7 @@ cl_int CL_API_CALL clGetDeviceFunctionPointerINTEL(
Program *pProgram = (Program *)(program);
const auto &symbols = pProgram->getSymbols();
auto symbolIt = symbols.find(functionName);
if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.type != NEO::SymbolInfo::Function)) {
if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment != NEO::SegmentType::Instructions)) {
retVal = CL_INVALID_ARG_VALUE;
} else {
*functionPointerRet = static_cast<cl_ulong>(symbolIt->second.gpuAddress);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2019 Intel Corporation
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -84,9 +84,7 @@ class BuiltinDispatchInfoBuilder {
template <typename KernelNameT, typename... KernelsDescArgsT>
void grabKernels(KernelNameT &&kernelName, Kernel *&kernelDst, KernelsDescArgsT &&... kernelsDesc) {
const KernelInfo *kernelInfo = prog->getKernelInfo(kernelName);
if (!kernelInfo) {
return;
}
UNRECOVERABLE_IF(nullptr == kernelInfo);
cl_int err = 0;
kernelDst = Kernel::create(prog.get(), *kernelInfo, &err);
kernelDst->isBuiltIn = true;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019 Intel Corporation
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -16,6 +16,7 @@
#include "patch_shared.h"
#include "program_debug_data.h"
#include <cstdint>
#include <limits>
#include <memory>

View File

@@ -9,6 +9,7 @@
#include "core/helpers/hw_info.h"
#include "runtime/compiler_interface/patchtokens_decoder.h"
#include "runtime/program/kernel_arg_info.h"
#include "igfxfmid.h"
@@ -90,6 +91,11 @@ inline ValidatorError validate(const ProgramFromPatchtokens &decodedProgram,
return ValidatorError::InvalidBinary;
}
if ((decodedProgram.header->GPUPointerSizeInBytes != 4U) && (decodedProgram.header->GPUPointerSizeInBytes != 8U)) {
outErrReason = "Invalid pointer size";
return ValidatorError::InvalidBinary;
}
if (false == isDeviceSupported(static_cast<GFXCORE_FAMILY>(decodedProgram.header->Device))) {
outErrReason = "Unsupported device binary, device GFXCORE_FAMILY : " + std::to_string(decodedProgram.header->Device);
return ValidatorError::InvalidBinary;
@@ -107,6 +113,25 @@ inline ValidatorError validate(const ProgramFromPatchtokens &decodedProgram,
return ValidatorError::InvalidBinary;
}
if (nullptr == decodedKernel.tokens.executionEnvironment) {
outErrReason = "Missing execution environment";
return ValidatorError::InvalidBinary;
} else {
switch (decodedKernel.tokens.executionEnvironment->LargestCompiledSIMDSize) {
case 1:
break;
case 8:
break;
case 16:
break;
case 32:
break;
default:
outErrReason = "Invalid LargestCompiledSIMDSize";
return ValidatorError::InvalidBinary;
}
}
if (decodedKernel.tokens.allocateLocalSurface) {
if (sharedLocalMemorySize < decodedKernel.tokens.allocateLocalSurface->TotalInlineLocalMemorySize) {
outErrReason = "KernelFromPatchtokens requires too much SLM";
@@ -114,6 +139,24 @@ inline ValidatorError validate(const ProgramFromPatchtokens &decodedProgram,
}
}
for (auto &kernelArg : decodedKernel.tokens.kernelArgs) {
if (kernelArg.argInfo == nullptr) {
outErrReason = "Missing kernelArgInfo";
return ValidatorError::InvalidBinary;
}
auto argInfoInlineData = getInlineData(kernelArg.argInfo);
auto accessQualifier = KernelArgMetadata::parseAccessQualifier(parseLimitedString(argInfoInlineData.accessQualifier.begin(), argInfoInlineData.accessQualifier.size()));
if (KernelArgMetadata::AccessQualifier::Unknown == accessQualifier) {
outErrReason = "Unhandled access qualifier";
return ValidatorError::InvalidBinary;
}
auto addressQualifier = KernelArgMetadata::parseAddressSpace(parseLimitedString(argInfoInlineData.addressQualifier.begin(), argInfoInlineData.addressQualifier.size()));
if (KernelArgMetadata::AddressSpaceQualifier::Unknown == addressQualifier) {
outErrReason = "Unhandled address qualifier";
return ValidatorError::InvalidBinary;
}
}
for (const auto &unhandledToken : decodedKernel.unhandledTokens) {
if (false == tokenValidator.isSafeToSkipUnhandledToken(unhandledToken->Token)) {
outErrReason = "Unhandled required kernel-scope Patch Token : " + std::to_string(unhandledToken->Token);

View File

@@ -58,6 +58,9 @@ void gtpinNotifyContextDestroy(cl_context context) {
}
void gtpinNotifyKernelCreate(cl_kernel kernel) {
if (nullptr == kernel) {
return;
}
if (isGTPinInitialized) {
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates();

View File

@@ -14,6 +14,7 @@ set(RUNTIME_SRCS_KERNEL
${CMAKE_CURRENT_SOURCE_DIR}/kernel.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel.inl
${CMAKE_CURRENT_SOURCE_DIR}/kernel_execution_type.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_extra.cpp
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_KERNEL})

View File

@@ -34,6 +34,7 @@
#include "runtime/helpers/surface_formats.h"
#include "runtime/kernel/image_transformer.h"
#include "runtime/kernel/kernel.inl"
#include "runtime/kernel/kernel_info_cl.h"
#include "runtime/mem_obj/buffer.h"
#include "runtime/mem_obj/image.h"
#include "runtime/mem_obj/pipe.h"
@@ -337,22 +338,20 @@ cl_int Kernel::initialize() {
// set the argument handler
auto &argInfo = kernelInfo.kernelArgInfo[i];
if (argInfo.addressQualifier == CL_KERNEL_ARG_ADDRESS_LOCAL) {
if (argInfo.metadata.addressQualifier == KernelArgMetadata::AddressSpaceQualifier::Local) {
kernelArgHandlers[i] = &Kernel::setArgLocal;
} else if (argInfo.isAccelerator) {
kernelArgHandlers[i] = &Kernel::setArgAccelerator;
} else if (argInfo.typeQualifierStr.find("pipe") != std::string::npos) {
} else if (argInfo.metadata.typeQualifiers.pipeQual) {
kernelArgHandlers[i] = &Kernel::setArgPipe;
kernelArguments[i].type = PIPE_OBJ;
} else if (argInfo.isImage) {
kernelArgHandlers[i] = &Kernel::setArgImage;
kernelArguments[i].type = IMAGE_OBJ;
usingImages = true;
DEBUG_BREAK_IF(argInfo.typeStr.find("image") == std::string::npos);
} else if (argInfo.isSampler) {
kernelArgHandlers[i] = &Kernel::setArgSampler;
kernelArguments[i].type = SAMPLER_OBJ;
DEBUG_BREAK_IF(!(*argInfo.typeStr.c_str() == '\0' || argInfo.typeStr.find("sampler") != std::string::npos));
} else if (argInfo.isBuffer) {
kernelArgHandlers[i] = &Kernel::setArgBuffer;
kernelArguments[i].type = BUFFER_OBJ;
@@ -506,37 +505,44 @@ cl_int Kernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t
const void *pSrc = nullptr;
size_t srcSize = 0;
auto numArgs = (cl_uint)kernelInfo.kernelArgInfo.size();
auto argInfoIdx = kernelInfo.kernelArgInfo[argIndx];
const auto &argInfo = kernelInfo.kernelArgInfo[argIndx];
if (argIndx >= numArgs) {
retVal = CL_INVALID_ARG_INDEX;
return retVal;
}
cl_kernel_arg_address_qualifier addressQualifier;
cl_kernel_arg_access_qualifier accessQualifier;
cl_kernel_arg_type_qualifier typeQualifier;
switch (paramName) {
case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
srcSize = sizeof(cl_uint);
pSrc = &argInfoIdx.addressQualifier;
addressQualifier = asClKernelArgAddressQualifier(argInfo.metadata.addressQualifier);
srcSize = sizeof(addressQualifier);
pSrc = &addressQualifier;
break;
case CL_KERNEL_ARG_ACCESS_QUALIFIER:
srcSize = sizeof(cl_uint);
pSrc = &argInfoIdx.accessQualifier;
break;
case CL_KERNEL_ARG_TYPE_NAME:
srcSize = argInfoIdx.typeStr.length() + 1;
pSrc = argInfoIdx.typeStr.c_str();
accessQualifier = asClKernelArgAccessQualifier(argInfo.metadata.accessQualifier);
srcSize = sizeof(accessQualifier);
pSrc = &accessQualifier;
break;
case CL_KERNEL_ARG_TYPE_QUALIFIER:
srcSize = sizeof(argInfoIdx.typeQualifier);
pSrc = &argInfoIdx.typeQualifier;
typeQualifier = asClKernelArgTypeQualifier(argInfo.metadata.typeQualifiers);
srcSize = sizeof(typeQualifier);
pSrc = &typeQualifier;
break;
case CL_KERNEL_ARG_TYPE_NAME:
srcSize = argInfo.metadataExtended->type.length() + 1;
pSrc = argInfo.metadataExtended->type.c_str();
break;
case CL_KERNEL_ARG_NAME:
srcSize = argInfoIdx.name.length() + 1;
pSrc = argInfoIdx.name.c_str();
srcSize = argInfo.metadataExtended->argName.length() + 1;
pSrc = argInfo.metadataExtended->argName.c_str();
break;
default:
@@ -2290,10 +2296,10 @@ cl_int Kernel::checkCorrectImageAccessQualifier(cl_uint argIndex,
MemObj *pMemObj = nullptr;
WithCastToInternal(mem, &pMemObj);
if (pMemObj) {
cl_kernel_arg_access_qualifier accessQualifier = getKernelInfo().kernelArgInfo[argIndex].accessQualifier;
auto accessQualifier = getKernelInfo().kernelArgInfo[argIndex].metadata.accessQualifier;
cl_mem_flags flags = pMemObj->getMemoryPropertiesFlags();
if ((accessQualifier == CL_KERNEL_ARG_ACCESS_READ_ONLY && ((flags | CL_MEM_WRITE_ONLY) == flags)) ||
(accessQualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY && ((flags | CL_MEM_READ_ONLY) == flags))) {
if ((accessQualifier == KernelArgMetadata::AccessQualifier::ReadOnly && ((flags | CL_MEM_WRITE_ONLY) == flags)) ||
(accessQualifier == KernelArgMetadata::AccessQualifier::WriteOnly && ((flags | CL_MEM_READ_ONLY) == flags))) {
return CL_INVALID_ARG_VALUE;
}
} else {
@@ -2341,7 +2347,7 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF
auto &context = this->program->getContext();
if (context.isProvidingPerformanceHints()) {
context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION,
kernelInfo.name.c_str(), i, kernelInfo.kernelArgInfo.at(i).name.c_str());
kernelInfo.name.c_str(), i, kernelInfo.kernelArgInfo.at(i).metadataExtended->argName.c_str());
}
}
}

View File

@@ -72,15 +72,12 @@ class Kernel : public BaseObject<_cl_kernel> {
const void *argVal);
template <typename kernel_t = Kernel, typename program_t = Program>
static kernel_t *create(Program *program, const KernelInfo &kernelInfo, cl_int *errcodeRet) {
static kernel_t *create(program_t *program, const KernelInfo &kernelInfo, cl_int *errcodeRet) {
cl_int retVal;
kernel_t *pKernel = nullptr;
do {
// copy the kernel data into our new allocation
pKernel = new kernel_t(program, kernelInfo, program->getDevice(0));
retVal = pKernel->initialize();
} while (false);
pKernel = new kernel_t(program, kernelInfo, program->getDevice(0));
retVal = pKernel->initialize();
if (retVal != CL_SUCCESS) {
delete pKernel;
@@ -179,10 +176,6 @@ class Kernel : public BaseObject<_cl_kernel> {
return kernelInfo.kernelArgInfo.size();
}
uint32_t getKernelArgAddressQualifier(uint32_t argIndex) const {
return kernelInfo.kernelArgInfo[argIndex].addressQualifier;
}
bool requiresSshForBuffers() const {
return kernelInfo.requiresSshForBuffers;
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "runtime/program/kernel_arg_info.h"
#include "CL/cl.h"
namespace NEO {
constexpr cl_kernel_arg_access_qualifier asClKernelArgAccessQualifier(KernelArgMetadata::AccessQualifier accessQualifier) {
using namespace KernelArgMetadata;
switch (accessQualifier) {
default:
return 0U;
case AccessQualifier::None:
return CL_KERNEL_ARG_ACCESS_NONE;
case AccessQualifier::ReadOnly:
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
case AccessQualifier::WriteOnly:
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
case AccessQualifier::ReadWrite:
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
}
}
constexpr cl_kernel_arg_address_qualifier asClKernelArgAddressQualifier(KernelArgMetadata::AddressSpaceQualifier addressQualifier) {
using namespace KernelArgMetadata;
switch (addressQualifier) {
default:
return 0U;
case AddressSpaceQualifier::Global:
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
case AddressSpaceQualifier::Local:
return CL_KERNEL_ARG_ADDRESS_LOCAL;
case AddressSpaceQualifier::Private:
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
case AddressSpaceQualifier::Constant:
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
}
}
constexpr cl_kernel_arg_type_qualifier asClKernelArgTypeQualifier(KernelArgMetadata::TypeQualifiers typeQualifiers) {
using namespace KernelArgMetadata;
cl_kernel_arg_type_qualifier ret = 0U;
ret |= (typeQualifiers.constQual) ? CL_KERNEL_ARG_TYPE_CONST : 0U;
ret |= (typeQualifiers.volatileQual) ? CL_KERNEL_ARG_TYPE_VOLATILE : 0U;
ret |= (typeQualifiers.restrictQual) ? CL_KERNEL_ARG_TYPE_RESTRICT : 0U;
ret |= (typeQualifiers.pipeQual) ? CL_KERNEL_ARG_TYPE_PIPE : 0U;
return ret;
}
} // namespace NEO

View File

@@ -11,6 +11,7 @@
#include "runtime/device/device.h"
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/validators.h"
#include "runtime/program/kernel_info.h"
#include "program.h"
@@ -56,7 +57,7 @@ cl_int Program::getInfo(cl_program_info paramName, size_t paramValueSize,
break;
case CL_PROGRAM_KERNEL_NAMES:
kernelNamesString = getKernelNamesString();
kernelNamesString = concatenateKernelNames(kernelInfoArray);
pSrc = kernelNamesString.c_str();
retSize = srcSize = kernelNamesString.length() + 1;

View File

@@ -1,16 +1,186 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "CL/cl.h"
#include "core/compiler_interface/compiler_options/compiler_options_base.h"
#include "core/utilities/const_stringref.h"
#include <cstdint>
#include <cstring>
#include <memory>
#include <string>
namespace NEO {
namespace KernelArgMetadata {
enum class AccessQualifier : uint8_t {
Unknown,
None,
ReadOnly,
WriteOnly,
ReadWrite,
};
namespace AccessQualifierStrings {
constexpr ConstStringRef none = "NONE";
constexpr ConstStringRef readOnly = "read_only";
constexpr ConstStringRef writeOnly = "write_only";
constexpr ConstStringRef readWrite = "read_write";
constexpr ConstStringRef underscoreReadOnly = "__read_only";
constexpr ConstStringRef underscoreWriteOnly = "__write_only";
constexpr ConstStringRef underscoreReadWrite = "__read_write";
} // namespace AccessQualifierStrings
enum class AddressSpaceQualifier : uint8_t {
Unknown,
Global,
Local,
Private,
Constant
};
namespace AddressSpaceQualifierStrings {
constexpr ConstStringRef addrGlobal = "__global";
constexpr ConstStringRef addrLocal = "__local";
constexpr ConstStringRef addrPrivate = "__private";
constexpr ConstStringRef addrConstant = "__constant";
constexpr ConstStringRef addrNotSpecified = "not_specified";
} // namespace AddressSpaceQualifierStrings
constexpr AccessQualifier parseAccessQualifier(ConstStringRef str) {
using namespace AccessQualifierStrings;
if (str.empty() || (none == str)) {
return AccessQualifier::None;
}
if (str.length() < 3) {
return AccessQualifier::Unknown;
}
ConstStringRef strNoUnderscore = ('_' == str[0]) ? ConstStringRef(str.data() + 2, str.length() - 2) : str;
static_assert(writeOnly[0] != readOnly[0], "");
static_assert(writeOnly[0] != readWrite[0], "");
if (strNoUnderscore[0] == writeOnly[0]) {
return (writeOnly == strNoUnderscore) ? AccessQualifier::WriteOnly : AccessQualifier::Unknown;
}
if (readOnly == strNoUnderscore) {
return AccessQualifier::ReadOnly;
}
return (readWrite == strNoUnderscore) ? AccessQualifier::ReadWrite : AccessQualifier::Unknown;
}
constexpr AddressSpaceQualifier parseAddressSpace(ConstStringRef str) {
using namespace AddressSpaceQualifierStrings;
if (str.empty()) {
return AddressSpaceQualifier::Global;
}
if (str.length() < 3) {
return AddressSpaceQualifier::Unknown;
}
switch (str[2]) {
default:
return AddressSpaceQualifier::Unknown;
case addrNotSpecified[2]:
return (str == addrNotSpecified) ? AddressSpaceQualifier::Private : AddressSpaceQualifier::Unknown;
case addrGlobal[2]:
return (str == addrGlobal) ? AddressSpaceQualifier::Global : AddressSpaceQualifier::Unknown;
case addrLocal[2]:
return (str == addrLocal) ? AddressSpaceQualifier::Local : AddressSpaceQualifier::Unknown;
case addrPrivate[2]:
return (str == addrPrivate) ? AddressSpaceQualifier::Private : AddressSpaceQualifier::Unknown;
case addrConstant[2]:
return (str == addrConstant) ? AddressSpaceQualifier::Constant : AddressSpaceQualifier::Unknown;
}
}
union TypeQualifiers {
uint8_t packed = 0U;
struct {
bool constQual : 1;
bool volatileQual : 1;
bool restrictQual : 1;
bool pipeQual : 1;
bool unknownQual : 1;
};
bool empty() const {
return 0U == packed;
}
};
namespace TypeQualifierStrings {
constexpr ConstStringRef qualConst = "const";
constexpr ConstStringRef qualVolatile = "volatile";
constexpr ConstStringRef qualRestrict = "restrict";
constexpr ConstStringRef qualPipe = "pipe";
} // namespace TypeQualifierStrings
inline TypeQualifiers parseTypeQualifiers(ConstStringRef str) {
using namespace TypeQualifierStrings;
TypeQualifiers ret = {};
auto tokenized = CompilerOptions::tokenize(str);
for (const auto &tok : tokenized) {
bool knownQualifier = true;
switch (tok[0]) {
default:
knownQualifier = false;
break;
case qualConst[0]:
knownQualifier = (qualConst == tok);
ret.constQual |= knownQualifier;
break;
case qualVolatile[0]:
knownQualifier = (qualVolatile == tok);
ret.volatileQual |= knownQualifier;
break;
case qualRestrict[0]:
knownQualifier = (qualRestrict == tok);
ret.restrictQual |= knownQualifier;
break;
case qualPipe[0]:
knownQualifier = (qualPipe == tok);
ret.pipeQual |= knownQualifier;
break;
}
ret.unknownQual |= !knownQualifier;
}
return ret;
}
} // namespace KernelArgMetadata
inline std::string parseLimitedString(const char *str, size_t maxSize) {
std::string ret{str, str + maxSize};
size_t minSize = strlen(ret.c_str());
ret.assign(str, minSize);
return ret;
}
struct ArgTypeMetadata {
uint32_t argByValSize = 0U;
KernelArgMetadata::AccessQualifier accessQualifier = {};
KernelArgMetadata::AddressSpaceQualifier addressQualifier = {};
KernelArgMetadata::TypeQualifiers typeQualifiers = {};
};
static_assert(sizeof(ArgTypeMetadata) <= 8, "");
struct ArgTypeMetadataExtended {
std::string argName;
std::string type;
std::string accessQualifier;
std::string addressQualifier;
std::string typeQualifiers;
};
struct KernelArgPatchInfo {
uint32_t crossthreadOffset = 0;
uint32_t size = 0;
@@ -18,15 +188,18 @@ struct KernelArgPatchInfo {
};
struct KernelArgInfo {
KernelArgInfo() = default;
~KernelArgInfo() = default;
KernelArgInfo(const KernelArgInfo &rhs) = delete;
KernelArgInfo &operator=(const KernelArgInfo &) = delete;
KernelArgInfo(KernelArgInfo &&) = default;
KernelArgInfo &operator=(KernelArgInfo &&) = default;
static constexpr uint32_t undefinedOffset = (uint32_t)-1;
std::string name;
std::string typeStr;
std::string accessQualifierStr;
std::string addressQualifierStr;
std::string typeQualifierStr;
uint32_t offsetHeap = 0;
std::vector<KernelArgPatchInfo> kernelArgPatchInfoVector;
ArgTypeMetadata metadata;
std::unique_ptr<ArgTypeMetadataExtended> metadataExtended;
uint32_t slmAlignment = 0;
bool isImage = false;
bool isMediaImage = false;
@@ -37,6 +210,11 @@ struct KernelArgInfo {
bool isBuffer = false;
bool pureStatefulBufferAccess = false;
bool isReadOnly = false;
bool needPatch = false;
bool isTransformable = false;
uint32_t offsetHeap = 0;
std::vector<KernelArgPatchInfo> kernelArgPatchInfoVector;
uint32_t samplerArgumentType = 0;
uint32_t offsetImgWidth = undefinedOffset;
uint32_t offsetImgHeight = undefinedOffset;
@@ -59,13 +237,6 @@ struct KernelArgInfo {
uint32_t offsetFlatWidth = undefinedOffset;
uint32_t offsetFlatHeight = undefinedOffset;
uint32_t offsetFlatPitch = undefinedOffset;
bool needPatch = false;
bool isTransformable = false;
cl_kernel_arg_access_qualifier accessQualifier = CL_KERNEL_ARG_ACCESS_NONE;
cl_kernel_arg_address_qualifier addressQualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL;
cl_kernel_arg_type_qualifier typeQualifier = CL_KERNEL_ARG_TYPE_NONE;
KernelArgInfo() = default;
};
} // namespace NEO

View File

@@ -26,41 +26,11 @@
namespace NEO {
const uint32_t WorkloadInfo::undefinedOffset = (uint32_t)-1;
const uint32_t WorkloadInfo::invalidParentEvent = (uint32_t)-1;
std::unordered_map<std::string, uint32_t> accessQualifierMap = {
{"", CL_KERNEL_ARG_ACCESS_NONE},
{"NONE", CL_KERNEL_ARG_ACCESS_NONE},
{"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
{"__read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
{"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
{"__write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
{"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE},
{"__read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE},
};
std::unordered_map<std::string, uint32_t> addressQualifierMap = {
{"", CL_KERNEL_ARG_ADDRESS_GLOBAL},
{"__global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
{"__local", CL_KERNEL_ARG_ADDRESS_LOCAL},
{"__private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
{"__constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
{"not_specified", CL_KERNEL_ARG_ADDRESS_PRIVATE},
};
struct KernelArgumentType {
const char *argTypeQualifier;
uint64_t argTypeQualifierValue;
};
constexpr KernelArgumentType typeQualifiers[] = {
{"const", CL_KERNEL_ARG_TYPE_CONST},
{"volatile", CL_KERNEL_ARG_TYPE_VOLATILE},
{"restrict", CL_KERNEL_ARG_TYPE_RESTRICT},
{"pipe", CL_KERNEL_ARG_TYPE_PIPE},
};
std::map<std::string, size_t> typeSizeMap = {
{"char", sizeof(cl_char)},
{"char2", sizeof(cl_char2)},
@@ -235,26 +205,12 @@ void KernelInfo::storePatchToken(const SPatchExecutionEnvironment *execEnv) {
}
}
void KernelInfo::storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo) {
if (pkernelArgInfo == nullptr) {
return;
}
uint32_t argNum = pkernelArgInfo->ArgumentNumber;
void KernelInfo::storeArgInfo(uint32_t argNum, ArgTypeMetadata metadata, std::unique_ptr<ArgTypeMetadataExtended> metadataExtended) {
resizeKernelArgInfoAndRegisterParameter(argNum);
auto inlineData = PatchTokenBinary::getInlineData(pkernelArgInfo);
kernelArgInfo[argNum].addressQualifierStr = std::string(inlineData.addressQualifier.begin(), inlineData.addressQualifier.end()).c_str();
kernelArgInfo[argNum].accessQualifierStr = std::string(inlineData.accessQualifier.begin(), inlineData.accessQualifier.end()).c_str();
kernelArgInfo[argNum].name = std::string(inlineData.argName.begin(), inlineData.argName.end()).c_str();
auto argTypeDelim = strchr(inlineData.typeName.begin(), ';');
DEBUG_BREAK_IF(argTypeDelim == nullptr);
kernelArgInfo[argNum].typeStr = std::string(inlineData.typeName.begin(), ptrDiff(argTypeDelim, inlineData.typeName.begin())).c_str();
kernelArgInfo[argNum].typeQualifierStr = std::string(inlineData.typeQualifiers.begin(), inlineData.typeQualifiers.end()).c_str();
patchInfo.kernelArgumentInfo.push_back(pkernelArgInfo);
auto &argInfo = kernelArgInfo[argNum];
argInfo.metadata = metadata;
argInfo.metadataExtended = std::move(metadataExtended);
argInfo.isReadOnly |= argInfo.metadata.typeQualifiers.constQual;
}
void KernelInfo::storeKernelArgument(
@@ -295,9 +251,11 @@ void KernelInfo::storeKernelArgument(
kernelArgInfo[argNum].isMediaBlockImage = true;
}
kernelArgInfo[argNum].accessQualifier = pImageMemObjKernelArg->Writeable
? CL_KERNEL_ARG_ACCESS_READ_WRITE
: CL_KERNEL_ARG_ACCESS_READ_ONLY;
kernelArgInfo[argNum].metadata.accessQualifier = pImageMemObjKernelArg->Writeable
? KernelArgMetadata::AccessQualifier::ReadWrite
: KernelArgMetadata::AccessQualifier::ReadOnly;
kernelArgInfo[argNum].metadata.argByValSize = sizeof(cl_mem);
kernelArgInfo[argNum].isTransformable = pImageMemObjKernelArg->Transformable != 0;
patchInfo.imageMemObjKernelArgs.push_back(pImageMemObjKernelArg);
@@ -311,8 +269,6 @@ void KernelInfo::storeKernelArgument(
usesSsh |= true;
storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState);
kernelArgInfo[argNum].isBuffer = true;
patchInfo.globalMemObjKernelArgs.push_back(pGlobalMemObjKernelArg);
}
void KernelInfo::storeKernelArgument(
@@ -417,43 +373,6 @@ void KernelInfo::storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBu
patchInfo.pAllocateSyncBuffer = pAllocateSyncBuffer;
}
cl_int KernelInfo::resolveKernelInfo() {
cl_int retVal = CL_SUCCESS;
std::unordered_map<std::string, uint32_t>::iterator iterUint;
std::unordered_map<std::string, size_t>::iterator iterSizeT;
for (auto &argInfo : kernelArgInfo) {
iterUint = accessQualifierMap.find(argInfo.accessQualifierStr);
if (iterUint != accessQualifierMap.end()) {
argInfo.accessQualifier = iterUint->second;
} else {
retVal = CL_INVALID_BINARY;
break;
}
iterUint = addressQualifierMap.find(argInfo.addressQualifierStr);
if (iterUint != addressQualifierMap.end()) {
argInfo.addressQualifier = iterUint->second;
} else {
retVal = CL_INVALID_BINARY;
break;
}
auto qualifierCount = sizeof(typeQualifiers) / sizeof(typeQualifiers[0]);
for (auto qualifierId = 0u; qualifierId < qualifierCount; qualifierId++) {
if (strstr(argInfo.typeQualifierStr.c_str(), typeQualifiers[qualifierId].argTypeQualifier) != nullptr) {
argInfo.typeQualifier |= typeQualifiers[qualifierId].argTypeQualifierValue;
if (argInfo.typeQualifier == CL_KERNEL_ARG_TYPE_CONST) {
argInfo.isReadOnly = true;
}
}
}
}
return retVal;
}
void KernelInfo::storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t dataOffset, uint32_t sourceOffset, uint32_t offsetSSH) {
resizeKernelArgInfoAndRegisterParameter(argNum);
@@ -505,4 +424,17 @@ bool KernelInfo::createKernelAllocation(uint32_t rootDeviceIndex, MemoryManager
return memoryManager->copyMemoryToAllocation(kernelAllocation, heapInfo.pKernelHeap, kernelIsaSize);
}
std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos) {
std::string semiColonDelimitedKernelNameStr;
for (const auto &kernelInfo : kernelInfos) {
if (!semiColonDelimitedKernelNameStr.empty()) {
semiColonDelimitedKernelNameStr += ';';
}
semiColonDelimitedKernelNameStr += kernelInfo->name;
}
return semiColonDelimitedKernelNameStr;
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -7,11 +7,11 @@
#pragma once
#include "core/helpers/hw_info.h"
#include "core/utilities/arrayref.h"
#include "core/utilities/const_stringref.h"
#include "runtime/program/heap_info.h"
#include "runtime/program/kernel_arg_info.h"
#include "CL/cl.h"
#include "ocl_igc_shared/gtpin/gtpin_driver_common.h"
#include "patch_info.h"
#include <algorithm>
@@ -23,6 +23,10 @@
#include <unordered_map>
#include <vector>
namespace gtpin {
typedef struct igc_info_s igc_info_t;
}
namespace NEO {
class BuiltinDispatchInfoBuilder;
class Device;
@@ -33,13 +37,11 @@ struct KernelArgumentType;
class GraphicsAllocation;
class MemoryManager;
extern std::unordered_map<std::string, uint32_t> accessQualifierMap;
extern std::unordered_map<std::string, uint32_t> addressQualifierMap;
extern std::map<std::string, size_t> typeSizeMap;
struct WorkloadInfo {
static const uint32_t undefinedOffset;
static const uint32_t invalidParentEvent;
enum : uint32_t { undefinedOffset = std::numeric_limits<uint32_t>::max() };
enum : uint32_t { invalidParentEvent = std::numeric_limits<uint32_t>::max() };
uint32_t globalWorkOffsetOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset};
uint32_t globalWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset};
@@ -90,6 +92,13 @@ struct DebugData {
const char *genIsa = nullptr;
};
struct DeviceInfoKernelPayloadConstants {
void *slmWindow = nullptr;
uint32_t slmWindowSize = 0U;
uint32_t computeUnitsUsedForScratch = 0U;
uint32_t maxWorkGroupSize = 0U;
};
struct KernelInfo {
public:
KernelInfo() = default;
@@ -97,7 +106,7 @@ struct KernelInfo {
KernelInfo &operator=(const KernelInfo &) = delete;
~KernelInfo();
void storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo);
void storeArgInfo(uint32_t argNum, ArgTypeMetadata metadata, std::unique_ptr<ArgTypeMetadataExtended> metadataExtended);
void storeKernelArgument(const SPatchDataParameterBuffer *pDataParameterKernelArg);
void storeKernelArgument(const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg);
void storeKernelArgument(const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg);
@@ -117,7 +126,6 @@ struct KernelInfo {
void storePatchToken(const SPatchAllocateSystemThreadSurface *pSystemThreadSurface);
void storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBuffer);
GraphicsAllocation *getGraphicsAllocation() const { return this->kernelAllocation; }
cl_int resolveKernelInfo();
void resizeKernelArgInfoAndRegisterParameter(uint32_t argCount) {
if (kernelArgInfo.size() <= argCount) {
kernelArgInfo.resize(argCount + 1);
@@ -171,7 +179,7 @@ struct KernelInfo {
int32_t getArgNumByName(const char *name) const {
int32_t argNum = 0;
for (auto &arg : kernelArgInfo) {
if (arg.name == name) {
if (arg.metadataExtended && (arg.metadataExtended->argName == name)) {
return argNum;
}
++argNum;
@@ -191,7 +199,6 @@ struct KernelInfo {
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
bool usesSsh = false;
bool requiresSshForBuffers = false;
bool isValid = false;
bool isVmeWorkload = false;
char *crossThreadData = nullptr;
size_t reqdWorkGroupSize[3] = {WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset};
@@ -210,4 +217,7 @@ struct KernelInfo {
bool computeMode = false;
const gtpin::igc_info_t *igcInfoForGtpin = nullptr;
};
std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos);
} // namespace NEO

View File

@@ -10,6 +10,8 @@
#include "runtime/compiler_interface/patchtokens_decoder.h"
#include "runtime/program/kernel_info.h"
#include <cstring>
namespace NEO {
using namespace iOpenCL;
@@ -29,14 +31,42 @@ inline uint32_t getOffset(T *token) {
return WorkloadInfo::undefinedOffset;
}
void populateKernelInfoArgMetadata(KernelInfo &dstKernelInfoArg, const SPatchKernelArgumentInfo *src) {
if (nullptr == src) {
return;
}
uint32_t argNum = src->ArgumentNumber;
auto inlineData = PatchTokenBinary::getInlineData(src);
auto metadataExtended = std::make_unique<ArgTypeMetadataExtended>();
metadataExtended->addressQualifier = parseLimitedString(inlineData.addressQualifier.begin(), inlineData.addressQualifier.size());
metadataExtended->accessQualifier = parseLimitedString(inlineData.accessQualifier.begin(), inlineData.accessQualifier.size());
metadataExtended->argName = parseLimitedString(inlineData.argName.begin(), inlineData.argName.size());
auto argTypeFull = parseLimitedString(inlineData.typeName.begin(), inlineData.typeName.size());
const char *argTypeDelim = strchr(argTypeFull.data(), ';');
if (nullptr == argTypeDelim) {
argTypeDelim = argTypeFull.data() + argTypeFull.size();
}
metadataExtended->type = std::string(argTypeFull.data(), argTypeDelim).c_str();
metadataExtended->typeQualifiers = parseLimitedString(inlineData.typeQualifiers.begin(), inlineData.typeQualifiers.size());
ArgTypeMetadata metadata = {};
metadata.accessQualifier = KernelArgMetadata::parseAccessQualifier(metadataExtended->accessQualifier);
metadata.addressQualifier = KernelArgMetadata::parseAddressSpace(metadataExtended->addressQualifier);
metadata.typeQualifiers = KernelArgMetadata::parseTypeQualifiers(metadataExtended->typeQualifiers);
dstKernelInfoArg.storeArgInfo(argNum, metadata, std::move(metadataExtended));
}
void populateKernelInfoArg(KernelInfo &dstKernelInfo, KernelArgInfo &dstKernelInfoArg, const PatchTokenBinary::KernelArgFromPatchtokens &src) {
dstKernelInfoArg.needPatch = true;
dstKernelInfo.storeArgInfo(src.argInfo);
populateKernelInfoArgMetadata(dstKernelInfo, src.argInfo);
if (src.objectArg != nullptr) {
switch (src.objectArg->Token) {
default:
UNRECOVERABLE_IF(true);
case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT:
UNRECOVERABLE_IF(PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT != src.objectArg->Token);
dstKernelInfo.storeKernelArgument(reinterpret_cast<const SPatchImageMemoryObjectKernelArgument *>(src.objectArg));
break;
case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT:
@@ -111,7 +141,9 @@ void populateKernelInfoArg(KernelInfo &dstKernelInfo, KernelArgInfo &dstKernelIn
dstKernelInfoArg.offsetObjectId = getOffset(src.objectId);
}
void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src) {
void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes,
const DeviceInfoKernelPayloadConstants &constants) {
UNRECOVERABLE_IF(nullptr == src.header);
dst.heapInfo.pKernelHeader = src.header;
dst.name = std::string(src.name.begin(), src.name.end()).c_str();
dst.heapInfo.pKernelHeap = src.isa.begin();
@@ -132,9 +164,7 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
dst.patchInfo.threadPayload = src.tokens.threadPayload;
dst.patchInfo.dataParameterStream = src.tokens.dataParameterStream;
dst.patchInfo.kernelArgumentInfo.reserve(src.tokens.kernelArgs.size());
dst.kernelArgInfo.resize(src.tokens.kernelArgs.size());
dst.argumentsToPatchNum = static_cast<uint32_t>(src.tokens.kernelArgs.size());
for (size_t i = 0U; i < src.tokens.kernelArgs.size(); ++i) {
auto &decodedKernelArg = src.tokens.kernelArgs[i];
@@ -184,7 +214,38 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
dst.igcInfoForGtpin = reinterpret_cast<const gtpin::igc_info_t *>(src.tokens.gtpinInfo + 1);
}
dst.isValid = (false == NEO::PatchTokenBinary::hasInvalidChecksum(src));
dst.gpuPointerSize = gpuPointerSizeInBytes;
if (dst.patchInfo.dataParameterStream && dst.patchInfo.dataParameterStream->DataParameterStreamSize) {
uint32_t crossThreadDataSize = dst.patchInfo.dataParameterStream->DataParameterStreamSize;
dst.crossThreadData = new char[crossThreadDataSize];
memset(dst.crossThreadData, 0x00, crossThreadDataSize);
uint32_t privateMemoryStatelessSizeOffset = dst.workloadInfo.privateMemoryStatelessSizeOffset;
uint32_t localMemoryStatelessWindowSizeOffset = dst.workloadInfo.localMemoryStatelessWindowSizeOffset;
uint32_t localMemoryStatelessWindowStartAddressOffset = dst.workloadInfo.localMemoryStatelessWindowStartAddressOffset;
if (localMemoryStatelessWindowStartAddressOffset != WorkloadInfo::undefinedOffset) {
*(uintptr_t *)&(dst.crossThreadData[localMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast<uintptr_t>(constants.slmWindow);
}
if (localMemoryStatelessWindowSizeOffset != WorkloadInfo::undefinedOffset) {
*(uint32_t *)&(dst.crossThreadData[localMemoryStatelessWindowSizeOffset]) = constants.slmWindowSize;
}
uint32_t privateMemorySize = 0U;
if (dst.patchInfo.pAllocateStatelessPrivateSurface) {
privateMemorySize = dst.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize * constants.computeUnitsUsedForScratch * dst.getMaxSimdSize();
}
if (privateMemoryStatelessSizeOffset != WorkloadInfo::undefinedOffset) {
*(uint32_t *)&(dst.crossThreadData[privateMemoryStatelessSizeOffset]) = privateMemorySize;
}
if (dst.workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) {
*(uint32_t *)&(dst.crossThreadData[dst.workloadInfo.maxWorkGroupSizeOffset]) = constants.maxWorkGroupSize;
}
}
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019 Intel Corporation
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -7,14 +7,18 @@
#pragma once
#include <cstdint>
namespace NEO {
struct DeviceInfoKernelPayloadConstants;
struct KernelInfo;
namespace PatchTokenBinary {
struct KernelFromPatchtokens;
}
void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src);
void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes,
const DeviceInfoKernelPayloadConstants &constant);
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -60,8 +60,6 @@ struct PatchInfo {
statelessGlobalMemObjKernelArgs;
::std::vector<const SPatchImageMemoryObjectKernelArgument *>
imageMemObjKernelArgs;
::std::vector<const SPatchGlobalMemoryObjectKernelArgument *>
globalMemObjKernelArgs;
const SPatchDataParameterStream *dataParameterStream = nullptr;
const SPatchThreadPayload *threadPayload = nullptr;
const SPatchExecutionEnvironment *executionEnvironment = nullptr;
@@ -75,7 +73,6 @@ struct PatchInfo {
const SPatchAllocateStatelessDefaultDeviceQueueSurface *pAllocateStatelessDefaultDeviceQueueSurface = nullptr;
const SPatchAllocateSystemThreadSurface *pAllocateSystemThreadSurface = nullptr;
::std::unordered_map<uint32_t, std::string> stringDataMap;
::std::vector<const SPatchKernelArgumentInfo *> kernelArgumentInfo;
};
} // namespace NEO

View File

@@ -10,6 +10,9 @@
#include "core/helpers/ptr_math.h"
#include "core/helpers/string.h"
#include "core/memory_manager/unified_memory_manager.h"
#include "core/program/program_info.h"
#include "core/program/program_info_from_patchtokens.h"
#include "core/program/program_initialization.h"
#include "runtime/compiler_interface/patchtokens_decoder.h"
#include "runtime/compiler_interface/patchtokens_dumper.h"
#include "runtime/compiler_interface/patchtokens_validator.inl"
@@ -53,124 +56,6 @@ const KernelInfo *Program::getKernelInfo(size_t ordinal) const {
return kernelInfoArray[ordinal];
}
std::string Program::getKernelNamesString() const {
std::string semiColonDelimitedKernelNameStr;
for (auto kernelInfo : kernelInfoArray) {
if (!semiColonDelimitedKernelNameStr.empty()) {
semiColonDelimitedKernelNameStr += ';';
}
semiColonDelimitedKernelNameStr += kernelInfo->name;
}
return semiColonDelimitedKernelNameStr;
}
void Program::populateKernelInfo(
const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram,
uint32_t kernelNum,
cl_int &retVal) {
auto kernelInfo = std::make_unique<KernelInfo>();
const PatchTokenBinary::KernelFromPatchtokens &decodedKernel = decodedProgram.kernels[kernelNum];
NEO::populateKernelInfo(*kernelInfo, decodedKernel);
retVal = kernelInfo->resolveKernelInfo();
if (retVal != CL_SUCCESS) {
return;
}
kernelInfo->gpuPointerSize = decodedProgram.header->GPUPointerSizeInBytes;
if (decodedKernel.tokens.programSymbolTable) {
prepareLinkerInputStorage();
linkerInput->decodeExportedFunctionsSymbolTable(decodedKernel.tokens.programSymbolTable + 1, decodedKernel.tokens.programSymbolTable->NumEntries, kernelNum);
}
if (decodedKernel.tokens.programRelocationTable) {
prepareLinkerInputStorage();
linkerInput->decodeRelocationTable(decodedKernel.tokens.programRelocationTable + 1, decodedKernel.tokens.programRelocationTable->NumEntries, kernelNum);
}
if (kernelInfo->patchInfo.dataParameterStream && kernelInfo->patchInfo.dataParameterStream->DataParameterStreamSize) {
uint32_t crossThreadDataSize = kernelInfo->patchInfo.dataParameterStream->DataParameterStreamSize;
kernelInfo->crossThreadData = new char[crossThreadDataSize];
memset(kernelInfo->crossThreadData, 0x00, crossThreadDataSize);
uint32_t privateMemoryStatelessSizeOffset = kernelInfo->workloadInfo.privateMemoryStatelessSizeOffset;
uint32_t localMemoryStatelessWindowSizeOffset = kernelInfo->workloadInfo.localMemoryStatelessWindowSizeOffset;
uint32_t localMemoryStatelessWindowStartAddressOffset = kernelInfo->workloadInfo.localMemoryStatelessWindowStartAddressOffset;
if (localMemoryStatelessWindowStartAddressOffset != 0xFFffFFff) {
*(uintptr_t *)&(kernelInfo->crossThreadData[localMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast<uintptr_t>(this->executionEnvironment.memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment));
}
if (localMemoryStatelessWindowSizeOffset != 0xFFffFFff) {
*(uint32_t *)&(kernelInfo->crossThreadData[localMemoryStatelessWindowSizeOffset]) = (uint32_t)this->pDevice->getDeviceInfo().localMemSize;
}
if (kernelInfo->patchInfo.pAllocateStatelessPrivateSurface && (privateMemoryStatelessSizeOffset != 0xFFffFFff)) {
*(uint32_t *)&(kernelInfo->crossThreadData[privateMemoryStatelessSizeOffset]) = kernelInfo->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize * this->getDevice(0).getDeviceInfo().computeUnitsUsedForScratch * kernelInfo->getMaxSimdSize();
}
if (kernelInfo->workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) {
*(uint32_t *)&(kernelInfo->crossThreadData[kernelInfo->workloadInfo.maxWorkGroupSizeOffset]) = (uint32_t)this->getDevice(0).getDeviceInfo().maxWorkGroupSize;
}
}
if (kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && this->pDevice) {
retVal = kernelInfo->createKernelAllocation(this->pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager()) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY;
}
DEBUG_BREAK_IF(kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && !this->pDevice);
if (retVal != CL_SUCCESS) {
return;
}
if (kernelInfo->hasDeviceEnqueue()) {
parentKernelInfoArray.push_back(kernelInfo.get());
}
if (kernelInfo->requiresSubgroupIndependentForwardProgress()) {
subgroupKernelInfoArray.push_back(kernelInfo.get());
}
kernelInfoArray.push_back(kernelInfo.release());
}
inline uint64_t readMisalignedUint64(const uint64_t *address) {
const uint32_t *addressBits = reinterpret_cast<const uint32_t *>(address);
return static_cast<uint64_t>(static_cast<uint64_t>(addressBits[1]) << 32) | addressBits[0];
}
GraphicsAllocation *allocateGlobalsSurface(NEO::Context *ctx, NEO::ClDevice *device, size_t size, bool constant, bool globalsAreExported, const void *initData) {
UNRECOVERABLE_IF(device == nullptr);
if (globalsAreExported && (ctx != nullptr) && (ctx->getSVMAllocsManager() != nullptr)) {
NEO::SVMAllocsManager::SvmAllocationProperties svmProps = {};
svmProps.coherent = false;
svmProps.readOnly = constant;
svmProps.hostPtrReadOnly = constant;
auto ptr = ctx->getSVMAllocsManager()->createSVMAlloc(device->getRootDeviceIndex(), size, svmProps);
DEBUG_BREAK_IF(ptr == nullptr);
if (ptr == nullptr) {
return nullptr;
}
auto svmAlloc = ctx->getSVMAllocsManager()->getSVMAlloc(ptr);
UNRECOVERABLE_IF(svmAlloc == nullptr);
auto gpuAlloc = svmAlloc->gpuAllocation;
UNRECOVERABLE_IF(gpuAlloc == nullptr);
device->getMemoryManager()->copyMemoryToAllocation(gpuAlloc, initData, static_cast<uint32_t>(size));
return ctx->getSVMAllocsManager()->getSVMAlloc(ptr)->gpuAllocation;
} else {
auto allocationType = constant ? GraphicsAllocation::AllocationType::CONSTANT_SURFACE : GraphicsAllocation::AllocationType::GLOBAL_SURFACE;
auto gpuAlloc = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), size, allocationType});
DEBUG_BREAK_IF(gpuAlloc == nullptr);
if (gpuAlloc == nullptr) {
return nullptr;
}
memcpy_s(gpuAlloc->getUnderlyingBuffer(), gpuAlloc->getUnderlyingBufferSize(), initData, size);
return gpuAlloc;
}
}
cl_int Program::isHandled(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) const {
std::string validatorErrMessage;
std::string validatorWarnings;
@@ -191,76 +76,27 @@ cl_int Program::isHandled(const PatchTokenBinary::ProgramFromPatchtokens &decode
return CL_SUCCESS;
}
void Program::processProgramScopeMetadata(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) {
if (decodedProgram.programScopeTokens.symbolTable != nullptr) {
const auto patch = decodedProgram.programScopeTokens.symbolTable;
this->prepareLinkerInputStorage();
this->linkerInput->decodeGlobalVariablesSymbolTable(patch + 1, patch->NumEntries);
}
if (decodedProgram.programScopeTokens.allocateConstantMemorySurface.size() != 0) {
pDevice->getMemoryManager()->freeGraphicsMemory(this->constantSurface);
auto exportsGlobals = (linkerInput && linkerInput->getTraits().exportsGlobalConstants);
size_t globalConstantsSurfaceSize = decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]->InlineDataSize;
const void *globalConstantsInitData = NEO::PatchTokenBinary::getInlineData(decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]);
this->constantSurface = allocateGlobalsSurface(context, pDevice, globalConstantsSurfaceSize, true, exportsGlobals, globalConstantsInitData);
}
if (decodedProgram.programScopeTokens.allocateGlobalMemorySurface.size() != 0) {
pDevice->getMemoryManager()->freeGraphicsMemory(this->globalSurface);
auto exportsGlobals = (linkerInput && linkerInput->getTraits().exportsGlobalVariables);
size_t globalVariablesSurfaceSize = decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]->InlineDataSize;
const void *globalVariablesInitData = NEO::PatchTokenBinary::getInlineData(decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]);
this->globalVarTotalSize = globalVariablesSurfaceSize;
this->globalSurface = allocateGlobalsSurface(context, pDevice, globalVariablesSurfaceSize, false, exportsGlobals, globalVariablesInitData);
}
for (const auto &globalConstantPointerToken : decodedProgram.programScopeTokens.constantPointer) {
NEO::GraphicsAllocation *srcSurface = this->constantSurface;
if (globalConstantPointerToken->BufferType != PROGRAM_SCOPE_CONSTANT_BUFFER) {
UNRECOVERABLE_IF(globalConstantPointerToken->BufferType != PROGRAM_SCOPE_GLOBAL_BUFFER);
srcSurface = this->globalSurface;
}
UNRECOVERABLE_IF(srcSurface == nullptr);
UNRECOVERABLE_IF(this->constantSurface == nullptr);
auto offset = readMisalignedUint64(&globalConstantPointerToken->ConstantPointerOffset);
UNRECOVERABLE_IF(this->constantSurface->getUnderlyingBufferSize() < ((offset + constantSurface->is32BitAllocation()) ? 4 : sizeof(uintptr_t)));
void *patchOffset = ptrOffset(this->constantSurface->getUnderlyingBuffer(), static_cast<size_t>(offset));
patchIncrement(patchOffset, constantSurface->is32BitAllocation() ? 4 : sizeof(uintptr_t), srcSurface->getGpuAddressToPatch());
}
for (const auto &globalVariablePointerToken : decodedProgram.programScopeTokens.globalPointer) {
NEO::GraphicsAllocation *srcSurface = this->globalSurface;
if (globalVariablePointerToken->BufferType != PROGRAM_SCOPE_GLOBAL_BUFFER) {
UNRECOVERABLE_IF(globalVariablePointerToken->BufferType != PROGRAM_SCOPE_CONSTANT_BUFFER);
srcSurface = this->constantSurface;
}
UNRECOVERABLE_IF(srcSurface == nullptr);
UNRECOVERABLE_IF(this->globalSurface == nullptr);
auto offset = readMisalignedUint64(&globalVariablePointerToken->GlobalPointerOffset);
UNRECOVERABLE_IF(this->globalSurface->getUnderlyingBufferSize() < ((offset + globalSurface->is32BitAllocation()) ? 4 : sizeof(uintptr_t)));
void *patchOffset = ptrOffset(this->globalSurface->getUnderlyingBuffer(), static_cast<size_t>(offset));
patchIncrement(patchOffset, globalSurface->is32BitAllocation() ? 4 : sizeof(uintptr_t), srcSurface->getGpuAddressToPatch());
}
}
cl_int Program::linkBinary() {
if (linkerInput == nullptr) {
return CL_SUCCESS;
}
Linker linker(*linkerInput);
Linker::Segment globals;
Linker::Segment constants;
Linker::Segment exportedFunctions;
Linker::SegmentInfo globals;
Linker::SegmentInfo constants;
Linker::SegmentInfo exportedFunctions;
Linker::PatchableSegment globalsForPatching;
Linker::PatchableSegment constantsForPatching;
if (this->globalSurface != nullptr) {
globals.gpuAddress = static_cast<uintptr_t>(this->globalSurface->getGpuAddress());
globals.segmentSize = this->globalSurface->getUnderlyingBufferSize();
globalsForPatching.hostPointer = this->globalSurface->getUnderlyingBuffer();
globalsForPatching.segmentSize = this->globalSurface->getUnderlyingBufferSize();
}
if (this->constantSurface != nullptr) {
constants.gpuAddress = static_cast<uintptr_t>(this->constantSurface->getGpuAddress());
constants.segmentSize = this->constantSurface->getUnderlyingBufferSize();
constantsForPatching.hostPointer = this->constantSurface->getUnderlyingBuffer();
constantsForPatching.segmentSize = this->constantSurface->getUnderlyingBufferSize();
}
if (this->linkerInput->getExportedFunctionsSegmentId() >= 0) {
// Exported functions reside in instruction heap of one of kernels
@@ -283,6 +119,7 @@ cl_int Program::linkBinary() {
Linker::UnresolvedExternals unresolvedExternalsInfo;
bool linkSuccess = linker.link(globals, constants, exportedFunctions,
globalsForPatching, constantsForPatching,
isaSegmentsForPatching, unresolvedExternalsInfo);
this->symbols = linker.extractRelocatedSymbols();
if (false == linkSuccess) {
@@ -295,6 +132,9 @@ cl_int Program::linkBinary() {
return CL_INVALID_BINARY;
} else if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
for (const auto &kernelInfo : this->kernelInfoArray) {
if (nullptr == kernelInfo->getGraphicsAllocation()) {
continue;
}
auto &kernHeapInfo = kernelInfo->heapInfo;
auto segmentId = &kernelInfo - &this->kernelInfoArray[0];
this->pDevice->getMemoryManager()->copyMemoryToAllocation(kernelInfo->getGraphicsAllocation(),
@@ -307,30 +147,83 @@ cl_int Program::linkBinary() {
cl_int Program::processGenBinary() {
cleanCurrentKernelInfo();
if (this->constantSurface || this->globalSurface) {
pDevice->getMemoryManager()->freeGraphicsMemory(this->constantSurface);
pDevice->getMemoryManager()->freeGraphicsMemory(this->globalSurface);
this->constantSurface = nullptr;
this->globalSurface = nullptr;
}
auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(genBinary.get()), genBinarySize);
ProgramInfo programInfo;
auto ret = this->processPatchTokensBinary(blob, programInfo);
if (CL_SUCCESS != ret) {
return ret;
}
return this->processProgramInfo(programInfo);
}
cl_int Program::processPatchTokensBinary(ArrayRef<const uint8_t> src, ProgramInfo &dst) {
NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram = {};
NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(blob, decodedProgram);
NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(src, decodedProgram);
DBG_LOG(LogPatchTokens, NEO::PatchTokenBinary::asString(decodedProgram).c_str());
cl_int retVal = this->isHandled(decodedProgram);
if (CL_SUCCESS != retVal) {
return retVal;
}
auto numKernels = decodedProgram.header->NumberOfKernels;
for (uint32_t i = 0; i < numKernels && retVal == CL_SUCCESS; i++) {
populateKernelInfo(decodedProgram, i, retVal);
NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants;
if (this->pDevice) {
deviceInfoConstants.maxWorkGroupSize = (uint32_t)this->pDevice->getDeviceInfo().maxWorkGroupSize;
deviceInfoConstants.computeUnitsUsedForScratch = this->pDevice->getDeviceInfo().computeUnitsUsedForScratch;
deviceInfoConstants.slmWindowSize = (uint32_t)this->pDevice->getDeviceInfo().localMemSize;
if (requiresLocalMemoryWindowVA(decodedProgram)) {
deviceInfoConstants.slmWindow = this->executionEnvironment.memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment);
}
}
if (retVal != CL_SUCCESS) {
return retVal;
NEO::populateProgramInfo(dst, decodedProgram, deviceInfoConstants);
return CL_SUCCESS;
}
cl_int Program::processProgramInfo(ProgramInfo &src) {
this->linkerInput = std::move(src.linkerInput);
this->kernelInfoArray = std::move(src.kernelInfos);
auto svmAllocsManager = context ? context->getSVMAllocsManager() : nullptr;
if (src.globalConstants.size != 0) {
UNRECOVERABLE_IF(nullptr == pDevice);
this->constantSurface = allocateGlobalsSurface(svmAllocsManager, pDevice->getDevice(), src.globalConstants.size, true, linkerInput.get(), src.globalConstants.initData);
}
processProgramScopeMetadata(decodedProgram);
if (src.globalVariables.size != 0) {
UNRECOVERABLE_IF(nullptr == pDevice);
this->globalSurface = allocateGlobalsSurface(svmAllocsManager, pDevice->getDevice(), src.globalVariables.size, false, linkerInput.get(), src.globalVariables.initData);
}
retVal = linkBinary();
this->globalVarTotalSize = src.globalVariables.size;
return retVal;
for (auto &kernelInfo : this->kernelInfoArray) {
cl_int retVal = CL_SUCCESS;
if (kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && this->pDevice) {
retVal = kernelInfo->createKernelAllocation(this->pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager()) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY;
}
DEBUG_BREAK_IF(kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && !this->pDevice);
if (retVal != CL_SUCCESS) {
return retVal;
}
if (kernelInfo->hasDeviceEnqueue()) {
parentKernelInfoArray.push_back(kernelInfo);
}
if (kernelInfo->requiresSubgroupIndependentForwardProgress()) {
subgroupKernelInfoArray.push_back(kernelInfo);
}
}
return linkBinary();
}
bool Program::validateGenBinaryDevice(GFXCORE_FAMILY device) const {

View File

@@ -456,9 +456,4 @@ void Program::updateNonUniformFlag(const Program **inputPrograms, size_t numInpu
this->allowNonUniform = allowNonUniform;
}
void Program::prepareLinkerInputStorage() {
if (this->linkerInput == nullptr) {
this->linkerInput = std::make_unique<LinkerInput>();
}
}
} // namespace NEO

View File

@@ -9,6 +9,7 @@
#include "core/compiler_interface/compiler_interface.h"
#include "core/compiler_interface/linker.h"
#include "core/elf/writer.h"
#include "core/program/program_info.h"
#include "core/utilities/const_stringref.h"
#include "runtime/api/cl_types.h"
#include "runtime/helpers/base_object.h"
@@ -131,6 +132,8 @@ class Program : public BaseObject<_cl_program> {
std::unordered_map<std::string, BuiltinDispatchInfoBuilder *> &builtinsMap);
MOCKABLE_VIRTUAL cl_int processGenBinary();
MOCKABLE_VIRTUAL cl_int processPatchTokensBinary(ArrayRef<const uint8_t> src, ProgramInfo &dst);
MOCKABLE_VIRTUAL cl_int processProgramInfo(ProgramInfo &dst);
cl_int compile(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions,
cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames,
@@ -269,16 +272,12 @@ class Program : public BaseObject<_cl_program> {
MOCKABLE_VIRTUAL cl_int linkBinary();
MOCKABLE_VIRTUAL cl_int isHandled(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) const;
void processProgramScopeMetadata(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram);
void populateKernelInfo(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram, uint32_t kernelNum, cl_int &retVal);
MOCKABLE_VIRTUAL cl_int rebuildProgramFromIr();
bool validateGenBinaryDevice(GFXCORE_FAMILY device) const;
bool validateGenBinaryHeader(const iOpenCL::SProgramBinaryHeader *pGenBinaryHeader) const;
std::string getKernelNamesString() const;
void separateBlockKernels();
void updateNonUniformFlag();
@@ -292,8 +291,6 @@ class Program : public BaseObject<_cl_program> {
MOCKABLE_VIRTUAL bool appendKernelDebugOptions();
void notifyDebuggerWithSourceCode(std::string &filename);
void prepareLinkerInputStorage();
static const std::string clOptNameClVer;
cl_program_binary_type programBinaryType;
@@ -354,6 +351,4 @@ class Program : public BaseObject<_cl_program> {
bool kernelDebugEnabled = false;
};
GraphicsAllocation *allocateGlobalsSurface(NEO::Context *ctx, NEO::ClDevice *device, size_t size, bool constant, bool globalsAreExported, const void *initData);
} // namespace NEO

View File

@@ -178,9 +178,9 @@ void FileLogger<DebugLevel>::dumpKernelArgs(const Kernel *kernel) {
auto &argInfo = kernel->getKernelInfo().kernelArgInfo[i];
if (argInfo.addressQualifier == CL_KERNEL_ARG_ADDRESS_LOCAL) {
if (argInfo.metadata.addressQualifier == KernelArgMetadata::AddressSpaceQualifier::Local) {
type = "local";
} else if (argInfo.typeStr.find("image") != std::string::npos) {
} else if (argInfo.isImage) {
type = "image";
auto clMem = (const cl_mem)kernel->getKernelArg(i);
auto memObj = castToObject<MemObj>(clMem);
@@ -189,9 +189,9 @@ void FileLogger<DebugLevel>::dumpKernelArgs(const Kernel *kernel) {
size = memObj->getSize();
flags = memObj->getMemoryPropertiesFlags();
}
} else if (argInfo.typeStr.find("sampler") != std::string::npos) {
} else if (argInfo.isSampler) {
type = "sampler";
} else if (argInfo.typeStr.find("*") != std::string::npos) {
} else if (argInfo.isBuffer) {
type = "buffer";
auto clMem = (const cl_mem)kernel->getKernelArg(i);
auto memObj = castToObject<MemObj>(clMem);