mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
Program refactor
* decouple program_info * move global allocations relocation to linker * remove obsolete tests * initial cleanup to kernel_info kernelInfo * unified patchtoken validation Change-Id: I0567cd6d607b4f3cf44e6caf33681f6210760f76
This commit is contained in:
committed by
sys_ocldev
parent
570b09850d
commit
f057712fa7
@@ -35,6 +35,7 @@
|
||||
#include "runtime/helpers/queue_helpers.h"
|
||||
#include "runtime/helpers/validators.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "runtime/kernel/kernel_info_cl.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/mem_obj/mem_obj_helper.h"
|
||||
@@ -1576,11 +1577,6 @@ cl_kernel CL_API_CALL clCreateKernel(cl_program clProgram,
|
||||
break;
|
||||
}
|
||||
|
||||
if (pKernelInfo->isValid == false) {
|
||||
retVal = CL_INVALID_PROGRAM_EXECUTABLE;
|
||||
break;
|
||||
}
|
||||
|
||||
kernel = Kernel::create(
|
||||
pProgram,
|
||||
*pKernelInfo,
|
||||
@@ -1592,9 +1588,7 @@ cl_kernel CL_API_CALL clCreateKernel(cl_program clProgram,
|
||||
if (errcodeRet) {
|
||||
*errcodeRet = retVal;
|
||||
}
|
||||
if (kernel != nullptr) {
|
||||
gtpinNotifyKernelCreate(kernel);
|
||||
}
|
||||
gtpinNotifyKernelCreate(kernel);
|
||||
TRACING_EXIT(clCreateKernel, &kernel);
|
||||
return kernel;
|
||||
}
|
||||
@@ -1621,17 +1615,14 @@ cl_int CL_API_CALL clCreateKernelsInProgram(cl_program clProgram,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
for (unsigned int ordinal = 0; ordinal < numKernelsInProgram; ++ordinal) {
|
||||
const auto kernelInfo = program->getKernelInfo(ordinal);
|
||||
for (unsigned int i = 0; i < numKernelsInProgram; ++i) {
|
||||
const auto kernelInfo = program->getKernelInfo(i);
|
||||
DEBUG_BREAK_IF(kernelInfo == nullptr);
|
||||
DEBUG_BREAK_IF(!kernelInfo->isValid);
|
||||
kernels[ordinal] = Kernel::create(
|
||||
kernels[i] = Kernel::create(
|
||||
program,
|
||||
*kernelInfo,
|
||||
nullptr);
|
||||
if (kernels[ordinal] != nullptr) {
|
||||
gtpinNotifyKernelCreate(kernels[ordinal]);
|
||||
}
|
||||
gtpinNotifyKernelCreate(kernels[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4367,7 +4358,7 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int kernelArgAddressQualifier = pKernel->getKernelArgAddressQualifier(argIndex);
|
||||
cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo().kernelArgInfo[argIndex].metadata.addressQualifier);
|
||||
if ((kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_GLOBAL) &&
|
||||
(kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_CONSTANT)) {
|
||||
retVal = CL_INVALID_ARG_VALUE;
|
||||
@@ -5140,7 +5131,7 @@ cl_int CL_API_CALL clGetDeviceGlobalVariablePointerINTEL(
|
||||
Program *pProgram = (Program *)(program);
|
||||
const auto &symbols = pProgram->getSymbols();
|
||||
auto symbolIt = symbols.find(globalVariableName);
|
||||
if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.type == NEO::SymbolInfo::Function)) {
|
||||
if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment == NEO::SegmentType::Instructions)) {
|
||||
retVal = CL_INVALID_ARG_VALUE;
|
||||
} else {
|
||||
if (globalVariableSizeRet != nullptr) {
|
||||
@@ -5172,7 +5163,7 @@ cl_int CL_API_CALL clGetDeviceFunctionPointerINTEL(
|
||||
Program *pProgram = (Program *)(program);
|
||||
const auto &symbols = pProgram->getSymbols();
|
||||
auto symbolIt = symbols.find(functionName);
|
||||
if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.type != NEO::SymbolInfo::Function)) {
|
||||
if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment != NEO::SegmentType::Instructions)) {
|
||||
retVal = CL_INVALID_ARG_VALUE;
|
||||
} else {
|
||||
*functionPointerRet = static_cast<cl_ulong>(symbolIt->second.gpuAddress);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2019 Intel Corporation
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -84,9 +84,7 @@ class BuiltinDispatchInfoBuilder {
|
||||
template <typename KernelNameT, typename... KernelsDescArgsT>
|
||||
void grabKernels(KernelNameT &&kernelName, Kernel *&kernelDst, KernelsDescArgsT &&... kernelsDesc) {
|
||||
const KernelInfo *kernelInfo = prog->getKernelInfo(kernelName);
|
||||
if (!kernelInfo) {
|
||||
return;
|
||||
}
|
||||
UNRECOVERABLE_IF(nullptr == kernelInfo);
|
||||
cl_int err = 0;
|
||||
kernelDst = Kernel::create(prog.get(), *kernelInfo, &err);
|
||||
kernelDst->isBuiltIn = true;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "patch_shared.h"
|
||||
#include "program_debug_data.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
#include "core/helpers/hw_info.h"
|
||||
#include "runtime/compiler_interface/patchtokens_decoder.h"
|
||||
#include "runtime/program/kernel_arg_info.h"
|
||||
|
||||
#include "igfxfmid.h"
|
||||
|
||||
@@ -90,6 +91,11 @@ inline ValidatorError validate(const ProgramFromPatchtokens &decodedProgram,
|
||||
return ValidatorError::InvalidBinary;
|
||||
}
|
||||
|
||||
if ((decodedProgram.header->GPUPointerSizeInBytes != 4U) && (decodedProgram.header->GPUPointerSizeInBytes != 8U)) {
|
||||
outErrReason = "Invalid pointer size";
|
||||
return ValidatorError::InvalidBinary;
|
||||
}
|
||||
|
||||
if (false == isDeviceSupported(static_cast<GFXCORE_FAMILY>(decodedProgram.header->Device))) {
|
||||
outErrReason = "Unsupported device binary, device GFXCORE_FAMILY : " + std::to_string(decodedProgram.header->Device);
|
||||
return ValidatorError::InvalidBinary;
|
||||
@@ -107,6 +113,25 @@ inline ValidatorError validate(const ProgramFromPatchtokens &decodedProgram,
|
||||
return ValidatorError::InvalidBinary;
|
||||
}
|
||||
|
||||
if (nullptr == decodedKernel.tokens.executionEnvironment) {
|
||||
outErrReason = "Missing execution environment";
|
||||
return ValidatorError::InvalidBinary;
|
||||
} else {
|
||||
switch (decodedKernel.tokens.executionEnvironment->LargestCompiledSIMDSize) {
|
||||
case 1:
|
||||
break;
|
||||
case 8:
|
||||
break;
|
||||
case 16:
|
||||
break;
|
||||
case 32:
|
||||
break;
|
||||
default:
|
||||
outErrReason = "Invalid LargestCompiledSIMDSize";
|
||||
return ValidatorError::InvalidBinary;
|
||||
}
|
||||
}
|
||||
|
||||
if (decodedKernel.tokens.allocateLocalSurface) {
|
||||
if (sharedLocalMemorySize < decodedKernel.tokens.allocateLocalSurface->TotalInlineLocalMemorySize) {
|
||||
outErrReason = "KernelFromPatchtokens requires too much SLM";
|
||||
@@ -114,6 +139,24 @@ inline ValidatorError validate(const ProgramFromPatchtokens &decodedProgram,
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &kernelArg : decodedKernel.tokens.kernelArgs) {
|
||||
if (kernelArg.argInfo == nullptr) {
|
||||
outErrReason = "Missing kernelArgInfo";
|
||||
return ValidatorError::InvalidBinary;
|
||||
}
|
||||
auto argInfoInlineData = getInlineData(kernelArg.argInfo);
|
||||
auto accessQualifier = KernelArgMetadata::parseAccessQualifier(parseLimitedString(argInfoInlineData.accessQualifier.begin(), argInfoInlineData.accessQualifier.size()));
|
||||
if (KernelArgMetadata::AccessQualifier::Unknown == accessQualifier) {
|
||||
outErrReason = "Unhandled access qualifier";
|
||||
return ValidatorError::InvalidBinary;
|
||||
}
|
||||
auto addressQualifier = KernelArgMetadata::parseAddressSpace(parseLimitedString(argInfoInlineData.addressQualifier.begin(), argInfoInlineData.addressQualifier.size()));
|
||||
if (KernelArgMetadata::AddressSpaceQualifier::Unknown == addressQualifier) {
|
||||
outErrReason = "Unhandled address qualifier";
|
||||
return ValidatorError::InvalidBinary;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &unhandledToken : decodedKernel.unhandledTokens) {
|
||||
if (false == tokenValidator.isSafeToSkipUnhandledToken(unhandledToken->Token)) {
|
||||
outErrReason = "Unhandled required kernel-scope Patch Token : " + std::to_string(unhandledToken->Token);
|
||||
|
||||
@@ -58,6 +58,9 @@ void gtpinNotifyContextDestroy(cl_context context) {
|
||||
}
|
||||
|
||||
void gtpinNotifyKernelCreate(cl_kernel kernel) {
|
||||
if (nullptr == kernel) {
|
||||
return;
|
||||
}
|
||||
if (isGTPinInitialized) {
|
||||
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
|
||||
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates();
|
||||
|
||||
@@ -14,6 +14,7 @@ set(RUNTIME_SRCS_KERNEL
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_execution_type.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_extra.cpp
|
||||
)
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_KERNEL})
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "runtime/helpers/surface_formats.h"
|
||||
#include "runtime/kernel/image_transformer.h"
|
||||
#include "runtime/kernel/kernel.inl"
|
||||
#include "runtime/kernel/kernel_info_cl.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/mem_obj/pipe.h"
|
||||
@@ -337,22 +338,20 @@ cl_int Kernel::initialize() {
|
||||
|
||||
// set the argument handler
|
||||
auto &argInfo = kernelInfo.kernelArgInfo[i];
|
||||
if (argInfo.addressQualifier == CL_KERNEL_ARG_ADDRESS_LOCAL) {
|
||||
if (argInfo.metadata.addressQualifier == KernelArgMetadata::AddressSpaceQualifier::Local) {
|
||||
kernelArgHandlers[i] = &Kernel::setArgLocal;
|
||||
} else if (argInfo.isAccelerator) {
|
||||
kernelArgHandlers[i] = &Kernel::setArgAccelerator;
|
||||
} else if (argInfo.typeQualifierStr.find("pipe") != std::string::npos) {
|
||||
} else if (argInfo.metadata.typeQualifiers.pipeQual) {
|
||||
kernelArgHandlers[i] = &Kernel::setArgPipe;
|
||||
kernelArguments[i].type = PIPE_OBJ;
|
||||
} else if (argInfo.isImage) {
|
||||
kernelArgHandlers[i] = &Kernel::setArgImage;
|
||||
kernelArguments[i].type = IMAGE_OBJ;
|
||||
usingImages = true;
|
||||
DEBUG_BREAK_IF(argInfo.typeStr.find("image") == std::string::npos);
|
||||
} else if (argInfo.isSampler) {
|
||||
kernelArgHandlers[i] = &Kernel::setArgSampler;
|
||||
kernelArguments[i].type = SAMPLER_OBJ;
|
||||
DEBUG_BREAK_IF(!(*argInfo.typeStr.c_str() == '\0' || argInfo.typeStr.find("sampler") != std::string::npos));
|
||||
} else if (argInfo.isBuffer) {
|
||||
kernelArgHandlers[i] = &Kernel::setArgBuffer;
|
||||
kernelArguments[i].type = BUFFER_OBJ;
|
||||
@@ -506,37 +505,44 @@ cl_int Kernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t
|
||||
const void *pSrc = nullptr;
|
||||
size_t srcSize = 0;
|
||||
auto numArgs = (cl_uint)kernelInfo.kernelArgInfo.size();
|
||||
auto argInfoIdx = kernelInfo.kernelArgInfo[argIndx];
|
||||
const auto &argInfo = kernelInfo.kernelArgInfo[argIndx];
|
||||
|
||||
if (argIndx >= numArgs) {
|
||||
retVal = CL_INVALID_ARG_INDEX;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_kernel_arg_address_qualifier addressQualifier;
|
||||
cl_kernel_arg_access_qualifier accessQualifier;
|
||||
cl_kernel_arg_type_qualifier typeQualifier;
|
||||
|
||||
switch (paramName) {
|
||||
case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
|
||||
srcSize = sizeof(cl_uint);
|
||||
pSrc = &argInfoIdx.addressQualifier;
|
||||
addressQualifier = asClKernelArgAddressQualifier(argInfo.metadata.addressQualifier);
|
||||
srcSize = sizeof(addressQualifier);
|
||||
pSrc = &addressQualifier;
|
||||
break;
|
||||
|
||||
case CL_KERNEL_ARG_ACCESS_QUALIFIER:
|
||||
srcSize = sizeof(cl_uint);
|
||||
pSrc = &argInfoIdx.accessQualifier;
|
||||
break;
|
||||
|
||||
case CL_KERNEL_ARG_TYPE_NAME:
|
||||
srcSize = argInfoIdx.typeStr.length() + 1;
|
||||
pSrc = argInfoIdx.typeStr.c_str();
|
||||
accessQualifier = asClKernelArgAccessQualifier(argInfo.metadata.accessQualifier);
|
||||
srcSize = sizeof(accessQualifier);
|
||||
pSrc = &accessQualifier;
|
||||
break;
|
||||
|
||||
case CL_KERNEL_ARG_TYPE_QUALIFIER:
|
||||
srcSize = sizeof(argInfoIdx.typeQualifier);
|
||||
pSrc = &argInfoIdx.typeQualifier;
|
||||
typeQualifier = asClKernelArgTypeQualifier(argInfo.metadata.typeQualifiers);
|
||||
srcSize = sizeof(typeQualifier);
|
||||
pSrc = &typeQualifier;
|
||||
break;
|
||||
|
||||
case CL_KERNEL_ARG_TYPE_NAME:
|
||||
srcSize = argInfo.metadataExtended->type.length() + 1;
|
||||
pSrc = argInfo.metadataExtended->type.c_str();
|
||||
break;
|
||||
|
||||
case CL_KERNEL_ARG_NAME:
|
||||
srcSize = argInfoIdx.name.length() + 1;
|
||||
pSrc = argInfoIdx.name.c_str();
|
||||
srcSize = argInfo.metadataExtended->argName.length() + 1;
|
||||
pSrc = argInfo.metadataExtended->argName.c_str();
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -2290,10 +2296,10 @@ cl_int Kernel::checkCorrectImageAccessQualifier(cl_uint argIndex,
|
||||
MemObj *pMemObj = nullptr;
|
||||
WithCastToInternal(mem, &pMemObj);
|
||||
if (pMemObj) {
|
||||
cl_kernel_arg_access_qualifier accessQualifier = getKernelInfo().kernelArgInfo[argIndex].accessQualifier;
|
||||
auto accessQualifier = getKernelInfo().kernelArgInfo[argIndex].metadata.accessQualifier;
|
||||
cl_mem_flags flags = pMemObj->getMemoryPropertiesFlags();
|
||||
if ((accessQualifier == CL_KERNEL_ARG_ACCESS_READ_ONLY && ((flags | CL_MEM_WRITE_ONLY) == flags)) ||
|
||||
(accessQualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY && ((flags | CL_MEM_READ_ONLY) == flags))) {
|
||||
if ((accessQualifier == KernelArgMetadata::AccessQualifier::ReadOnly && ((flags | CL_MEM_WRITE_ONLY) == flags)) ||
|
||||
(accessQualifier == KernelArgMetadata::AccessQualifier::WriteOnly && ((flags | CL_MEM_READ_ONLY) == flags))) {
|
||||
return CL_INVALID_ARG_VALUE;
|
||||
}
|
||||
} else {
|
||||
@@ -2341,7 +2347,7 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF
|
||||
auto &context = this->program->getContext();
|
||||
if (context.isProvidingPerformanceHints()) {
|
||||
context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION,
|
||||
kernelInfo.name.c_str(), i, kernelInfo.kernelArgInfo.at(i).name.c_str());
|
||||
kernelInfo.name.c_str(), i, kernelInfo.kernelArgInfo.at(i).metadataExtended->argName.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,15 +72,12 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
const void *argVal);
|
||||
|
||||
template <typename kernel_t = Kernel, typename program_t = Program>
|
||||
static kernel_t *create(Program *program, const KernelInfo &kernelInfo, cl_int *errcodeRet) {
|
||||
static kernel_t *create(program_t *program, const KernelInfo &kernelInfo, cl_int *errcodeRet) {
|
||||
cl_int retVal;
|
||||
kernel_t *pKernel = nullptr;
|
||||
|
||||
do {
|
||||
// copy the kernel data into our new allocation
|
||||
pKernel = new kernel_t(program, kernelInfo, program->getDevice(0));
|
||||
retVal = pKernel->initialize();
|
||||
} while (false);
|
||||
pKernel = new kernel_t(program, kernelInfo, program->getDevice(0));
|
||||
retVal = pKernel->initialize();
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
delete pKernel;
|
||||
@@ -179,10 +176,6 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
return kernelInfo.kernelArgInfo.size();
|
||||
}
|
||||
|
||||
uint32_t getKernelArgAddressQualifier(uint32_t argIndex) const {
|
||||
return kernelInfo.kernelArgInfo[argIndex].addressQualifier;
|
||||
}
|
||||
|
||||
bool requiresSshForBuffers() const {
|
||||
return kernelInfo.requiresSshForBuffers;
|
||||
}
|
||||
|
||||
57
runtime/kernel/kernel_info_cl.h
Normal file
57
runtime/kernel/kernel_info_cl.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/program/kernel_arg_info.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
constexpr cl_kernel_arg_access_qualifier asClKernelArgAccessQualifier(KernelArgMetadata::AccessQualifier accessQualifier) {
|
||||
using namespace KernelArgMetadata;
|
||||
switch (accessQualifier) {
|
||||
default:
|
||||
return 0U;
|
||||
case AccessQualifier::None:
|
||||
return CL_KERNEL_ARG_ACCESS_NONE;
|
||||
case AccessQualifier::ReadOnly:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
|
||||
case AccessQualifier::WriteOnly:
|
||||
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
|
||||
case AccessQualifier::ReadWrite:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr cl_kernel_arg_address_qualifier asClKernelArgAddressQualifier(KernelArgMetadata::AddressSpaceQualifier addressQualifier) {
|
||||
using namespace KernelArgMetadata;
|
||||
switch (addressQualifier) {
|
||||
default:
|
||||
return 0U;
|
||||
case AddressSpaceQualifier::Global:
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
case AddressSpaceQualifier::Local:
|
||||
return CL_KERNEL_ARG_ADDRESS_LOCAL;
|
||||
case AddressSpaceQualifier::Private:
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
case AddressSpaceQualifier::Constant:
|
||||
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr cl_kernel_arg_type_qualifier asClKernelArgTypeQualifier(KernelArgMetadata::TypeQualifiers typeQualifiers) {
|
||||
using namespace KernelArgMetadata;
|
||||
cl_kernel_arg_type_qualifier ret = 0U;
|
||||
ret |= (typeQualifiers.constQual) ? CL_KERNEL_ARG_TYPE_CONST : 0U;
|
||||
ret |= (typeQualifiers.volatileQual) ? CL_KERNEL_ARG_TYPE_VOLATILE : 0U;
|
||||
ret |= (typeQualifiers.restrictQual) ? CL_KERNEL_ARG_TYPE_RESTRICT : 0U;
|
||||
ret |= (typeQualifiers.pipeQual) ? CL_KERNEL_ARG_TYPE_PIPE : 0U;
|
||||
return ret;
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
#include "runtime/helpers/validators.h"
|
||||
#include "runtime/program/kernel_info.h"
|
||||
|
||||
#include "program.h"
|
||||
|
||||
@@ -56,7 +57,7 @@ cl_int Program::getInfo(cl_program_info paramName, size_t paramValueSize,
|
||||
break;
|
||||
|
||||
case CL_PROGRAM_KERNEL_NAMES:
|
||||
kernelNamesString = getKernelNamesString();
|
||||
kernelNamesString = concatenateKernelNames(kernelInfoArray);
|
||||
pSrc = kernelNamesString.c_str();
|
||||
retSize = srcSize = kernelNamesString.length() + 1;
|
||||
|
||||
|
||||
@@ -1,16 +1,186 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "CL/cl.h"
|
||||
|
||||
#include "core/compiler_interface/compiler_options/compiler_options_base.h"
|
||||
#include "core/utilities/const_stringref.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
namespace KernelArgMetadata {
|
||||
|
||||
enum class AccessQualifier : uint8_t {
|
||||
Unknown,
|
||||
None,
|
||||
ReadOnly,
|
||||
WriteOnly,
|
||||
ReadWrite,
|
||||
};
|
||||
|
||||
namespace AccessQualifierStrings {
|
||||
constexpr ConstStringRef none = "NONE";
|
||||
constexpr ConstStringRef readOnly = "read_only";
|
||||
constexpr ConstStringRef writeOnly = "write_only";
|
||||
constexpr ConstStringRef readWrite = "read_write";
|
||||
constexpr ConstStringRef underscoreReadOnly = "__read_only";
|
||||
constexpr ConstStringRef underscoreWriteOnly = "__write_only";
|
||||
constexpr ConstStringRef underscoreReadWrite = "__read_write";
|
||||
} // namespace AccessQualifierStrings
|
||||
|
||||
enum class AddressSpaceQualifier : uint8_t {
|
||||
Unknown,
|
||||
Global,
|
||||
Local,
|
||||
Private,
|
||||
Constant
|
||||
};
|
||||
|
||||
namespace AddressSpaceQualifierStrings {
|
||||
constexpr ConstStringRef addrGlobal = "__global";
|
||||
constexpr ConstStringRef addrLocal = "__local";
|
||||
constexpr ConstStringRef addrPrivate = "__private";
|
||||
constexpr ConstStringRef addrConstant = "__constant";
|
||||
constexpr ConstStringRef addrNotSpecified = "not_specified";
|
||||
} // namespace AddressSpaceQualifierStrings
|
||||
|
||||
constexpr AccessQualifier parseAccessQualifier(ConstStringRef str) {
|
||||
using namespace AccessQualifierStrings;
|
||||
if (str.empty() || (none == str)) {
|
||||
return AccessQualifier::None;
|
||||
}
|
||||
|
||||
if (str.length() < 3) {
|
||||
return AccessQualifier::Unknown;
|
||||
}
|
||||
|
||||
ConstStringRef strNoUnderscore = ('_' == str[0]) ? ConstStringRef(str.data() + 2, str.length() - 2) : str;
|
||||
static_assert(writeOnly[0] != readOnly[0], "");
|
||||
static_assert(writeOnly[0] != readWrite[0], "");
|
||||
if (strNoUnderscore[0] == writeOnly[0]) {
|
||||
return (writeOnly == strNoUnderscore) ? AccessQualifier::WriteOnly : AccessQualifier::Unknown;
|
||||
}
|
||||
|
||||
if (readOnly == strNoUnderscore) {
|
||||
return AccessQualifier::ReadOnly;
|
||||
}
|
||||
|
||||
return (readWrite == strNoUnderscore) ? AccessQualifier::ReadWrite : AccessQualifier::Unknown;
|
||||
}
|
||||
|
||||
constexpr AddressSpaceQualifier parseAddressSpace(ConstStringRef str) {
|
||||
using namespace AddressSpaceQualifierStrings;
|
||||
if (str.empty()) {
|
||||
return AddressSpaceQualifier::Global;
|
||||
}
|
||||
|
||||
if (str.length() < 3) {
|
||||
return AddressSpaceQualifier::Unknown;
|
||||
}
|
||||
|
||||
switch (str[2]) {
|
||||
default:
|
||||
return AddressSpaceQualifier::Unknown;
|
||||
case addrNotSpecified[2]:
|
||||
return (str == addrNotSpecified) ? AddressSpaceQualifier::Private : AddressSpaceQualifier::Unknown;
|
||||
case addrGlobal[2]:
|
||||
return (str == addrGlobal) ? AddressSpaceQualifier::Global : AddressSpaceQualifier::Unknown;
|
||||
case addrLocal[2]:
|
||||
return (str == addrLocal) ? AddressSpaceQualifier::Local : AddressSpaceQualifier::Unknown;
|
||||
case addrPrivate[2]:
|
||||
return (str == addrPrivate) ? AddressSpaceQualifier::Private : AddressSpaceQualifier::Unknown;
|
||||
case addrConstant[2]:
|
||||
return (str == addrConstant) ? AddressSpaceQualifier::Constant : AddressSpaceQualifier::Unknown;
|
||||
}
|
||||
}
|
||||
|
||||
union TypeQualifiers {
|
||||
uint8_t packed = 0U;
|
||||
struct {
|
||||
bool constQual : 1;
|
||||
bool volatileQual : 1;
|
||||
bool restrictQual : 1;
|
||||
bool pipeQual : 1;
|
||||
bool unknownQual : 1;
|
||||
};
|
||||
bool empty() const {
|
||||
return 0U == packed;
|
||||
}
|
||||
};
|
||||
|
||||
namespace TypeQualifierStrings {
|
||||
constexpr ConstStringRef qualConst = "const";
|
||||
constexpr ConstStringRef qualVolatile = "volatile";
|
||||
constexpr ConstStringRef qualRestrict = "restrict";
|
||||
constexpr ConstStringRef qualPipe = "pipe";
|
||||
} // namespace TypeQualifierStrings
|
||||
|
||||
inline TypeQualifiers parseTypeQualifiers(ConstStringRef str) {
|
||||
using namespace TypeQualifierStrings;
|
||||
TypeQualifiers ret = {};
|
||||
auto tokenized = CompilerOptions::tokenize(str);
|
||||
for (const auto &tok : tokenized) {
|
||||
bool knownQualifier = true;
|
||||
switch (tok[0]) {
|
||||
default:
|
||||
knownQualifier = false;
|
||||
break;
|
||||
case qualConst[0]:
|
||||
knownQualifier = (qualConst == tok);
|
||||
ret.constQual |= knownQualifier;
|
||||
break;
|
||||
case qualVolatile[0]:
|
||||
knownQualifier = (qualVolatile == tok);
|
||||
ret.volatileQual |= knownQualifier;
|
||||
break;
|
||||
case qualRestrict[0]:
|
||||
knownQualifier = (qualRestrict == tok);
|
||||
ret.restrictQual |= knownQualifier;
|
||||
break;
|
||||
case qualPipe[0]:
|
||||
knownQualifier = (qualPipe == tok);
|
||||
ret.pipeQual |= knownQualifier;
|
||||
break;
|
||||
}
|
||||
ret.unknownQual |= !knownQualifier;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace KernelArgMetadata
|
||||
|
||||
inline std::string parseLimitedString(const char *str, size_t maxSize) {
|
||||
std::string ret{str, str + maxSize};
|
||||
size_t minSize = strlen(ret.c_str());
|
||||
ret.assign(str, minSize);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct ArgTypeMetadata {
|
||||
uint32_t argByValSize = 0U;
|
||||
KernelArgMetadata::AccessQualifier accessQualifier = {};
|
||||
KernelArgMetadata::AddressSpaceQualifier addressQualifier = {};
|
||||
KernelArgMetadata::TypeQualifiers typeQualifiers = {};
|
||||
};
|
||||
static_assert(sizeof(ArgTypeMetadata) <= 8, "");
|
||||
|
||||
struct ArgTypeMetadataExtended {
|
||||
std::string argName;
|
||||
std::string type;
|
||||
std::string accessQualifier;
|
||||
std::string addressQualifier;
|
||||
std::string typeQualifiers;
|
||||
};
|
||||
|
||||
struct KernelArgPatchInfo {
|
||||
uint32_t crossthreadOffset = 0;
|
||||
uint32_t size = 0;
|
||||
@@ -18,15 +188,18 @@ struct KernelArgPatchInfo {
|
||||
};
|
||||
|
||||
struct KernelArgInfo {
|
||||
KernelArgInfo() = default;
|
||||
~KernelArgInfo() = default;
|
||||
KernelArgInfo(const KernelArgInfo &rhs) = delete;
|
||||
KernelArgInfo &operator=(const KernelArgInfo &) = delete;
|
||||
KernelArgInfo(KernelArgInfo &&) = default;
|
||||
KernelArgInfo &operator=(KernelArgInfo &&) = default;
|
||||
|
||||
static constexpr uint32_t undefinedOffset = (uint32_t)-1;
|
||||
|
||||
std::string name;
|
||||
std::string typeStr;
|
||||
std::string accessQualifierStr;
|
||||
std::string addressQualifierStr;
|
||||
std::string typeQualifierStr;
|
||||
uint32_t offsetHeap = 0;
|
||||
std::vector<KernelArgPatchInfo> kernelArgPatchInfoVector;
|
||||
ArgTypeMetadata metadata;
|
||||
std::unique_ptr<ArgTypeMetadataExtended> metadataExtended;
|
||||
|
||||
uint32_t slmAlignment = 0;
|
||||
bool isImage = false;
|
||||
bool isMediaImage = false;
|
||||
@@ -37,6 +210,11 @@ struct KernelArgInfo {
|
||||
bool isBuffer = false;
|
||||
bool pureStatefulBufferAccess = false;
|
||||
bool isReadOnly = false;
|
||||
bool needPatch = false;
|
||||
bool isTransformable = false;
|
||||
|
||||
uint32_t offsetHeap = 0;
|
||||
std::vector<KernelArgPatchInfo> kernelArgPatchInfoVector;
|
||||
uint32_t samplerArgumentType = 0;
|
||||
uint32_t offsetImgWidth = undefinedOffset;
|
||||
uint32_t offsetImgHeight = undefinedOffset;
|
||||
@@ -59,13 +237,6 @@ struct KernelArgInfo {
|
||||
uint32_t offsetFlatWidth = undefinedOffset;
|
||||
uint32_t offsetFlatHeight = undefinedOffset;
|
||||
uint32_t offsetFlatPitch = undefinedOffset;
|
||||
|
||||
bool needPatch = false;
|
||||
bool isTransformable = false;
|
||||
|
||||
cl_kernel_arg_access_qualifier accessQualifier = CL_KERNEL_ARG_ACCESS_NONE;
|
||||
cl_kernel_arg_address_qualifier addressQualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
cl_kernel_arg_type_qualifier typeQualifier = CL_KERNEL_ARG_TYPE_NONE;
|
||||
|
||||
KernelArgInfo() = default;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -26,41 +26,11 @@
|
||||
|
||||
namespace NEO {
|
||||
|
||||
const uint32_t WorkloadInfo::undefinedOffset = (uint32_t)-1;
|
||||
const uint32_t WorkloadInfo::invalidParentEvent = (uint32_t)-1;
|
||||
|
||||
std::unordered_map<std::string, uint32_t> accessQualifierMap = {
|
||||
{"", CL_KERNEL_ARG_ACCESS_NONE},
|
||||
{"NONE", CL_KERNEL_ARG_ACCESS_NONE},
|
||||
{"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
||||
{"__read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
||||
{"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
||||
{"__write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
||||
{"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE},
|
||||
{"__read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, uint32_t> addressQualifierMap = {
|
||||
{"", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"__global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"__local", CL_KERNEL_ARG_ADDRESS_LOCAL},
|
||||
{"__private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
||||
{"__constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
|
||||
{"not_specified", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
||||
};
|
||||
|
||||
struct KernelArgumentType {
|
||||
const char *argTypeQualifier;
|
||||
uint64_t argTypeQualifierValue;
|
||||
};
|
||||
|
||||
constexpr KernelArgumentType typeQualifiers[] = {
|
||||
{"const", CL_KERNEL_ARG_TYPE_CONST},
|
||||
{"volatile", CL_KERNEL_ARG_TYPE_VOLATILE},
|
||||
{"restrict", CL_KERNEL_ARG_TYPE_RESTRICT},
|
||||
{"pipe", CL_KERNEL_ARG_TYPE_PIPE},
|
||||
};
|
||||
|
||||
std::map<std::string, size_t> typeSizeMap = {
|
||||
{"char", sizeof(cl_char)},
|
||||
{"char2", sizeof(cl_char2)},
|
||||
@@ -235,26 +205,12 @@ void KernelInfo::storePatchToken(const SPatchExecutionEnvironment *execEnv) {
|
||||
}
|
||||
}
|
||||
|
||||
void KernelInfo::storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo) {
|
||||
if (pkernelArgInfo == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t argNum = pkernelArgInfo->ArgumentNumber;
|
||||
void KernelInfo::storeArgInfo(uint32_t argNum, ArgTypeMetadata metadata, std::unique_ptr<ArgTypeMetadataExtended> metadataExtended) {
|
||||
resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
|
||||
auto inlineData = PatchTokenBinary::getInlineData(pkernelArgInfo);
|
||||
|
||||
kernelArgInfo[argNum].addressQualifierStr = std::string(inlineData.addressQualifier.begin(), inlineData.addressQualifier.end()).c_str();
|
||||
kernelArgInfo[argNum].accessQualifierStr = std::string(inlineData.accessQualifier.begin(), inlineData.accessQualifier.end()).c_str();
|
||||
kernelArgInfo[argNum].name = std::string(inlineData.argName.begin(), inlineData.argName.end()).c_str();
|
||||
|
||||
auto argTypeDelim = strchr(inlineData.typeName.begin(), ';');
|
||||
DEBUG_BREAK_IF(argTypeDelim == nullptr);
|
||||
kernelArgInfo[argNum].typeStr = std::string(inlineData.typeName.begin(), ptrDiff(argTypeDelim, inlineData.typeName.begin())).c_str();
|
||||
kernelArgInfo[argNum].typeQualifierStr = std::string(inlineData.typeQualifiers.begin(), inlineData.typeQualifiers.end()).c_str();
|
||||
|
||||
patchInfo.kernelArgumentInfo.push_back(pkernelArgInfo);
|
||||
auto &argInfo = kernelArgInfo[argNum];
|
||||
argInfo.metadata = metadata;
|
||||
argInfo.metadataExtended = std::move(metadataExtended);
|
||||
argInfo.isReadOnly |= argInfo.metadata.typeQualifiers.constQual;
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgument(
|
||||
@@ -295,9 +251,11 @@ void KernelInfo::storeKernelArgument(
|
||||
kernelArgInfo[argNum].isMediaBlockImage = true;
|
||||
}
|
||||
|
||||
kernelArgInfo[argNum].accessQualifier = pImageMemObjKernelArg->Writeable
|
||||
? CL_KERNEL_ARG_ACCESS_READ_WRITE
|
||||
: CL_KERNEL_ARG_ACCESS_READ_ONLY;
|
||||
kernelArgInfo[argNum].metadata.accessQualifier = pImageMemObjKernelArg->Writeable
|
||||
? KernelArgMetadata::AccessQualifier::ReadWrite
|
||||
: KernelArgMetadata::AccessQualifier::ReadOnly;
|
||||
|
||||
kernelArgInfo[argNum].metadata.argByValSize = sizeof(cl_mem);
|
||||
|
||||
kernelArgInfo[argNum].isTransformable = pImageMemObjKernelArg->Transformable != 0;
|
||||
patchInfo.imageMemObjKernelArgs.push_back(pImageMemObjKernelArg);
|
||||
@@ -311,8 +269,6 @@ void KernelInfo::storeKernelArgument(
|
||||
usesSsh |= true;
|
||||
storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState);
|
||||
kernelArgInfo[argNum].isBuffer = true;
|
||||
|
||||
patchInfo.globalMemObjKernelArgs.push_back(pGlobalMemObjKernelArg);
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgument(
|
||||
@@ -417,43 +373,6 @@ void KernelInfo::storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBu
|
||||
patchInfo.pAllocateSyncBuffer = pAllocateSyncBuffer;
|
||||
}
|
||||
|
||||
cl_int KernelInfo::resolveKernelInfo() {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unordered_map<std::string, uint32_t>::iterator iterUint;
|
||||
std::unordered_map<std::string, size_t>::iterator iterSizeT;
|
||||
|
||||
for (auto &argInfo : kernelArgInfo) {
|
||||
iterUint = accessQualifierMap.find(argInfo.accessQualifierStr);
|
||||
if (iterUint != accessQualifierMap.end()) {
|
||||
argInfo.accessQualifier = iterUint->second;
|
||||
} else {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
break;
|
||||
}
|
||||
|
||||
iterUint = addressQualifierMap.find(argInfo.addressQualifierStr);
|
||||
if (iterUint != addressQualifierMap.end()) {
|
||||
argInfo.addressQualifier = iterUint->second;
|
||||
} else {
|
||||
retVal = CL_INVALID_BINARY;
|
||||
break;
|
||||
}
|
||||
|
||||
auto qualifierCount = sizeof(typeQualifiers) / sizeof(typeQualifiers[0]);
|
||||
|
||||
for (auto qualifierId = 0u; qualifierId < qualifierCount; qualifierId++) {
|
||||
if (strstr(argInfo.typeQualifierStr.c_str(), typeQualifiers[qualifierId].argTypeQualifier) != nullptr) {
|
||||
argInfo.typeQualifier |= typeQualifiers[qualifierId].argTypeQualifierValue;
|
||||
if (argInfo.typeQualifier == CL_KERNEL_ARG_TYPE_CONST) {
|
||||
argInfo.isReadOnly = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t dataOffset, uint32_t sourceOffset, uint32_t offsetSSH) {
|
||||
resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
|
||||
@@ -505,4 +424,17 @@ bool KernelInfo::createKernelAllocation(uint32_t rootDeviceIndex, MemoryManager
|
||||
return memoryManager->copyMemoryToAllocation(kernelAllocation, heapInfo.pKernelHeap, kernelIsaSize);
|
||||
}
|
||||
|
||||
std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos) {
|
||||
std::string semiColonDelimitedKernelNameStr;
|
||||
|
||||
for (const auto &kernelInfo : kernelInfos) {
|
||||
if (!semiColonDelimitedKernelNameStr.empty()) {
|
||||
semiColonDelimitedKernelNameStr += ';';
|
||||
}
|
||||
semiColonDelimitedKernelNameStr += kernelInfo->name;
|
||||
}
|
||||
|
||||
return semiColonDelimitedKernelNameStr;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,11 +7,11 @@
|
||||
|
||||
#pragma once
|
||||
#include "core/helpers/hw_info.h"
|
||||
#include "core/utilities/arrayref.h"
|
||||
#include "core/utilities/const_stringref.h"
|
||||
#include "runtime/program/heap_info.h"
|
||||
#include "runtime/program/kernel_arg_info.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "ocl_igc_shared/gtpin/gtpin_driver_common.h"
|
||||
#include "patch_info.h"
|
||||
|
||||
#include <algorithm>
|
||||
@@ -23,6 +23,10 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace gtpin {
|
||||
typedef struct igc_info_s igc_info_t;
|
||||
}
|
||||
|
||||
namespace NEO {
|
||||
class BuiltinDispatchInfoBuilder;
|
||||
class Device;
|
||||
@@ -33,13 +37,11 @@ struct KernelArgumentType;
|
||||
class GraphicsAllocation;
|
||||
class MemoryManager;
|
||||
|
||||
extern std::unordered_map<std::string, uint32_t> accessQualifierMap;
|
||||
extern std::unordered_map<std::string, uint32_t> addressQualifierMap;
|
||||
extern std::map<std::string, size_t> typeSizeMap;
|
||||
|
||||
struct WorkloadInfo {
|
||||
static const uint32_t undefinedOffset;
|
||||
static const uint32_t invalidParentEvent;
|
||||
enum : uint32_t { undefinedOffset = std::numeric_limits<uint32_t>::max() };
|
||||
enum : uint32_t { invalidParentEvent = std::numeric_limits<uint32_t>::max() };
|
||||
|
||||
uint32_t globalWorkOffsetOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset};
|
||||
uint32_t globalWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset};
|
||||
@@ -90,6 +92,13 @@ struct DebugData {
|
||||
const char *genIsa = nullptr;
|
||||
};
|
||||
|
||||
struct DeviceInfoKernelPayloadConstants {
|
||||
void *slmWindow = nullptr;
|
||||
uint32_t slmWindowSize = 0U;
|
||||
uint32_t computeUnitsUsedForScratch = 0U;
|
||||
uint32_t maxWorkGroupSize = 0U;
|
||||
};
|
||||
|
||||
struct KernelInfo {
|
||||
public:
|
||||
KernelInfo() = default;
|
||||
@@ -97,7 +106,7 @@ struct KernelInfo {
|
||||
KernelInfo &operator=(const KernelInfo &) = delete;
|
||||
~KernelInfo();
|
||||
|
||||
void storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo);
|
||||
void storeArgInfo(uint32_t argNum, ArgTypeMetadata metadata, std::unique_ptr<ArgTypeMetadataExtended> metadataExtended);
|
||||
void storeKernelArgument(const SPatchDataParameterBuffer *pDataParameterKernelArg);
|
||||
void storeKernelArgument(const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg);
|
||||
void storeKernelArgument(const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg);
|
||||
@@ -117,7 +126,6 @@ struct KernelInfo {
|
||||
void storePatchToken(const SPatchAllocateSystemThreadSurface *pSystemThreadSurface);
|
||||
void storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBuffer);
|
||||
GraphicsAllocation *getGraphicsAllocation() const { return this->kernelAllocation; }
|
||||
cl_int resolveKernelInfo();
|
||||
void resizeKernelArgInfoAndRegisterParameter(uint32_t argCount) {
|
||||
if (kernelArgInfo.size() <= argCount) {
|
||||
kernelArgInfo.resize(argCount + 1);
|
||||
@@ -171,7 +179,7 @@ struct KernelInfo {
|
||||
int32_t getArgNumByName(const char *name) const {
|
||||
int32_t argNum = 0;
|
||||
for (auto &arg : kernelArgInfo) {
|
||||
if (arg.name == name) {
|
||||
if (arg.metadataExtended && (arg.metadataExtended->argName == name)) {
|
||||
return argNum;
|
||||
}
|
||||
++argNum;
|
||||
@@ -191,7 +199,6 @@ struct KernelInfo {
|
||||
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
|
||||
bool usesSsh = false;
|
||||
bool requiresSshForBuffers = false;
|
||||
bool isValid = false;
|
||||
bool isVmeWorkload = false;
|
||||
char *crossThreadData = nullptr;
|
||||
size_t reqdWorkGroupSize[3] = {WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset};
|
||||
@@ -210,4 +217,7 @@ struct KernelInfo {
|
||||
bool computeMode = false;
|
||||
const gtpin::igc_info_t *igcInfoForGtpin = nullptr;
|
||||
};
|
||||
|
||||
std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos);
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#include "runtime/compiler_interface/patchtokens_decoder.h"
|
||||
#include "runtime/program/kernel_info.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using namespace iOpenCL;
|
||||
@@ -29,14 +31,42 @@ inline uint32_t getOffset(T *token) {
|
||||
return WorkloadInfo::undefinedOffset;
|
||||
}
|
||||
|
||||
void populateKernelInfoArgMetadata(KernelInfo &dstKernelInfoArg, const SPatchKernelArgumentInfo *src) {
|
||||
if (nullptr == src) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t argNum = src->ArgumentNumber;
|
||||
|
||||
auto inlineData = PatchTokenBinary::getInlineData(src);
|
||||
|
||||
auto metadataExtended = std::make_unique<ArgTypeMetadataExtended>();
|
||||
metadataExtended->addressQualifier = parseLimitedString(inlineData.addressQualifier.begin(), inlineData.addressQualifier.size());
|
||||
metadataExtended->accessQualifier = parseLimitedString(inlineData.accessQualifier.begin(), inlineData.accessQualifier.size());
|
||||
metadataExtended->argName = parseLimitedString(inlineData.argName.begin(), inlineData.argName.size());
|
||||
|
||||
auto argTypeFull = parseLimitedString(inlineData.typeName.begin(), inlineData.typeName.size());
|
||||
const char *argTypeDelim = strchr(argTypeFull.data(), ';');
|
||||
if (nullptr == argTypeDelim) {
|
||||
argTypeDelim = argTypeFull.data() + argTypeFull.size();
|
||||
}
|
||||
metadataExtended->type = std::string(argTypeFull.data(), argTypeDelim).c_str();
|
||||
metadataExtended->typeQualifiers = parseLimitedString(inlineData.typeQualifiers.begin(), inlineData.typeQualifiers.size());
|
||||
|
||||
ArgTypeMetadata metadata = {};
|
||||
metadata.accessQualifier = KernelArgMetadata::parseAccessQualifier(metadataExtended->accessQualifier);
|
||||
metadata.addressQualifier = KernelArgMetadata::parseAddressSpace(metadataExtended->addressQualifier);
|
||||
metadata.typeQualifiers = KernelArgMetadata::parseTypeQualifiers(metadataExtended->typeQualifiers);
|
||||
|
||||
dstKernelInfoArg.storeArgInfo(argNum, metadata, std::move(metadataExtended));
|
||||
}
|
||||
|
||||
void populateKernelInfoArg(KernelInfo &dstKernelInfo, KernelArgInfo &dstKernelInfoArg, const PatchTokenBinary::KernelArgFromPatchtokens &src) {
|
||||
dstKernelInfoArg.needPatch = true;
|
||||
dstKernelInfo.storeArgInfo(src.argInfo);
|
||||
populateKernelInfoArgMetadata(dstKernelInfo, src.argInfo);
|
||||
if (src.objectArg != nullptr) {
|
||||
switch (src.objectArg->Token) {
|
||||
default:
|
||||
UNRECOVERABLE_IF(true);
|
||||
case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT:
|
||||
UNRECOVERABLE_IF(PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT != src.objectArg->Token);
|
||||
dstKernelInfo.storeKernelArgument(reinterpret_cast<const SPatchImageMemoryObjectKernelArgument *>(src.objectArg));
|
||||
break;
|
||||
case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT:
|
||||
@@ -111,7 +141,9 @@ void populateKernelInfoArg(KernelInfo &dstKernelInfo, KernelArgInfo &dstKernelIn
|
||||
dstKernelInfoArg.offsetObjectId = getOffset(src.objectId);
|
||||
}
|
||||
|
||||
void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src) {
|
||||
void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes,
|
||||
const DeviceInfoKernelPayloadConstants &constants) {
|
||||
UNRECOVERABLE_IF(nullptr == src.header);
|
||||
dst.heapInfo.pKernelHeader = src.header;
|
||||
dst.name = std::string(src.name.begin(), src.name.end()).c_str();
|
||||
dst.heapInfo.pKernelHeap = src.isa.begin();
|
||||
@@ -132,9 +164,7 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
|
||||
dst.patchInfo.threadPayload = src.tokens.threadPayload;
|
||||
dst.patchInfo.dataParameterStream = src.tokens.dataParameterStream;
|
||||
|
||||
dst.patchInfo.kernelArgumentInfo.reserve(src.tokens.kernelArgs.size());
|
||||
dst.kernelArgInfo.resize(src.tokens.kernelArgs.size());
|
||||
dst.argumentsToPatchNum = static_cast<uint32_t>(src.tokens.kernelArgs.size());
|
||||
|
||||
for (size_t i = 0U; i < src.tokens.kernelArgs.size(); ++i) {
|
||||
auto &decodedKernelArg = src.tokens.kernelArgs[i];
|
||||
@@ -184,7 +214,38 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
|
||||
dst.igcInfoForGtpin = reinterpret_cast<const gtpin::igc_info_t *>(src.tokens.gtpinInfo + 1);
|
||||
}
|
||||
|
||||
dst.isValid = (false == NEO::PatchTokenBinary::hasInvalidChecksum(src));
|
||||
dst.gpuPointerSize = gpuPointerSizeInBytes;
|
||||
|
||||
if (dst.patchInfo.dataParameterStream && dst.patchInfo.dataParameterStream->DataParameterStreamSize) {
|
||||
uint32_t crossThreadDataSize = dst.patchInfo.dataParameterStream->DataParameterStreamSize;
|
||||
dst.crossThreadData = new char[crossThreadDataSize];
|
||||
memset(dst.crossThreadData, 0x00, crossThreadDataSize);
|
||||
|
||||
uint32_t privateMemoryStatelessSizeOffset = dst.workloadInfo.privateMemoryStatelessSizeOffset;
|
||||
uint32_t localMemoryStatelessWindowSizeOffset = dst.workloadInfo.localMemoryStatelessWindowSizeOffset;
|
||||
uint32_t localMemoryStatelessWindowStartAddressOffset = dst.workloadInfo.localMemoryStatelessWindowStartAddressOffset;
|
||||
|
||||
if (localMemoryStatelessWindowStartAddressOffset != WorkloadInfo::undefinedOffset) {
|
||||
*(uintptr_t *)&(dst.crossThreadData[localMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast<uintptr_t>(constants.slmWindow);
|
||||
}
|
||||
|
||||
if (localMemoryStatelessWindowSizeOffset != WorkloadInfo::undefinedOffset) {
|
||||
*(uint32_t *)&(dst.crossThreadData[localMemoryStatelessWindowSizeOffset]) = constants.slmWindowSize;
|
||||
}
|
||||
|
||||
uint32_t privateMemorySize = 0U;
|
||||
if (dst.patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
privateMemorySize = dst.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize * constants.computeUnitsUsedForScratch * dst.getMaxSimdSize();
|
||||
}
|
||||
|
||||
if (privateMemoryStatelessSizeOffset != WorkloadInfo::undefinedOffset) {
|
||||
*(uint32_t *)&(dst.crossThreadData[privateMemoryStatelessSizeOffset]) = privateMemorySize;
|
||||
}
|
||||
|
||||
if (dst.workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) {
|
||||
*(uint32_t *)&(dst.crossThreadData[dst.workloadInfo.maxWorkGroupSizeOffset]) = constants.maxWorkGroupSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,14 +7,18 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct DeviceInfoKernelPayloadConstants;
|
||||
struct KernelInfo;
|
||||
|
||||
namespace PatchTokenBinary {
|
||||
struct KernelFromPatchtokens;
|
||||
}
|
||||
|
||||
void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src);
|
||||
void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes,
|
||||
const DeviceInfoKernelPayloadConstants &constant);
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -60,8 +60,6 @@ struct PatchInfo {
|
||||
statelessGlobalMemObjKernelArgs;
|
||||
::std::vector<const SPatchImageMemoryObjectKernelArgument *>
|
||||
imageMemObjKernelArgs;
|
||||
::std::vector<const SPatchGlobalMemoryObjectKernelArgument *>
|
||||
globalMemObjKernelArgs;
|
||||
const SPatchDataParameterStream *dataParameterStream = nullptr;
|
||||
const SPatchThreadPayload *threadPayload = nullptr;
|
||||
const SPatchExecutionEnvironment *executionEnvironment = nullptr;
|
||||
@@ -75,7 +73,6 @@ struct PatchInfo {
|
||||
const SPatchAllocateStatelessDefaultDeviceQueueSurface *pAllocateStatelessDefaultDeviceQueueSurface = nullptr;
|
||||
const SPatchAllocateSystemThreadSurface *pAllocateSystemThreadSurface = nullptr;
|
||||
::std::unordered_map<uint32_t, std::string> stringDataMap;
|
||||
::std::vector<const SPatchKernelArgumentInfo *> kernelArgumentInfo;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -10,6 +10,9 @@
|
||||
#include "core/helpers/ptr_math.h"
|
||||
#include "core/helpers/string.h"
|
||||
#include "core/memory_manager/unified_memory_manager.h"
|
||||
#include "core/program/program_info.h"
|
||||
#include "core/program/program_info_from_patchtokens.h"
|
||||
#include "core/program/program_initialization.h"
|
||||
#include "runtime/compiler_interface/patchtokens_decoder.h"
|
||||
#include "runtime/compiler_interface/patchtokens_dumper.h"
|
||||
#include "runtime/compiler_interface/patchtokens_validator.inl"
|
||||
@@ -53,124 +56,6 @@ const KernelInfo *Program::getKernelInfo(size_t ordinal) const {
|
||||
return kernelInfoArray[ordinal];
|
||||
}
|
||||
|
||||
std::string Program::getKernelNamesString() const {
|
||||
std::string semiColonDelimitedKernelNameStr;
|
||||
|
||||
for (auto kernelInfo : kernelInfoArray) {
|
||||
if (!semiColonDelimitedKernelNameStr.empty()) {
|
||||
semiColonDelimitedKernelNameStr += ';';
|
||||
}
|
||||
semiColonDelimitedKernelNameStr += kernelInfo->name;
|
||||
}
|
||||
|
||||
return semiColonDelimitedKernelNameStr;
|
||||
}
|
||||
|
||||
void Program::populateKernelInfo(
|
||||
const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram,
|
||||
uint32_t kernelNum,
|
||||
cl_int &retVal) {
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
const PatchTokenBinary::KernelFromPatchtokens &decodedKernel = decodedProgram.kernels[kernelNum];
|
||||
|
||||
NEO::populateKernelInfo(*kernelInfo, decodedKernel);
|
||||
retVal = kernelInfo->resolveKernelInfo();
|
||||
if (retVal != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
kernelInfo->gpuPointerSize = decodedProgram.header->GPUPointerSizeInBytes;
|
||||
|
||||
if (decodedKernel.tokens.programSymbolTable) {
|
||||
prepareLinkerInputStorage();
|
||||
linkerInput->decodeExportedFunctionsSymbolTable(decodedKernel.tokens.programSymbolTable + 1, decodedKernel.tokens.programSymbolTable->NumEntries, kernelNum);
|
||||
}
|
||||
|
||||
if (decodedKernel.tokens.programRelocationTable) {
|
||||
prepareLinkerInputStorage();
|
||||
linkerInput->decodeRelocationTable(decodedKernel.tokens.programRelocationTable + 1, decodedKernel.tokens.programRelocationTable->NumEntries, kernelNum);
|
||||
}
|
||||
|
||||
if (kernelInfo->patchInfo.dataParameterStream && kernelInfo->patchInfo.dataParameterStream->DataParameterStreamSize) {
|
||||
uint32_t crossThreadDataSize = kernelInfo->patchInfo.dataParameterStream->DataParameterStreamSize;
|
||||
kernelInfo->crossThreadData = new char[crossThreadDataSize];
|
||||
memset(kernelInfo->crossThreadData, 0x00, crossThreadDataSize);
|
||||
|
||||
uint32_t privateMemoryStatelessSizeOffset = kernelInfo->workloadInfo.privateMemoryStatelessSizeOffset;
|
||||
uint32_t localMemoryStatelessWindowSizeOffset = kernelInfo->workloadInfo.localMemoryStatelessWindowSizeOffset;
|
||||
uint32_t localMemoryStatelessWindowStartAddressOffset = kernelInfo->workloadInfo.localMemoryStatelessWindowStartAddressOffset;
|
||||
|
||||
if (localMemoryStatelessWindowStartAddressOffset != 0xFFffFFff) {
|
||||
*(uintptr_t *)&(kernelInfo->crossThreadData[localMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast<uintptr_t>(this->executionEnvironment.memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment));
|
||||
}
|
||||
|
||||
if (localMemoryStatelessWindowSizeOffset != 0xFFffFFff) {
|
||||
*(uint32_t *)&(kernelInfo->crossThreadData[localMemoryStatelessWindowSizeOffset]) = (uint32_t)this->pDevice->getDeviceInfo().localMemSize;
|
||||
}
|
||||
|
||||
if (kernelInfo->patchInfo.pAllocateStatelessPrivateSurface && (privateMemoryStatelessSizeOffset != 0xFFffFFff)) {
|
||||
*(uint32_t *)&(kernelInfo->crossThreadData[privateMemoryStatelessSizeOffset]) = kernelInfo->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize * this->getDevice(0).getDeviceInfo().computeUnitsUsedForScratch * kernelInfo->getMaxSimdSize();
|
||||
}
|
||||
|
||||
if (kernelInfo->workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) {
|
||||
*(uint32_t *)&(kernelInfo->crossThreadData[kernelInfo->workloadInfo.maxWorkGroupSizeOffset]) = (uint32_t)this->getDevice(0).getDeviceInfo().maxWorkGroupSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && this->pDevice) {
|
||||
retVal = kernelInfo->createKernelAllocation(this->pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager()) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && !this->pDevice);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (kernelInfo->hasDeviceEnqueue()) {
|
||||
parentKernelInfoArray.push_back(kernelInfo.get());
|
||||
}
|
||||
if (kernelInfo->requiresSubgroupIndependentForwardProgress()) {
|
||||
subgroupKernelInfoArray.push_back(kernelInfo.get());
|
||||
}
|
||||
kernelInfoArray.push_back(kernelInfo.release());
|
||||
}
|
||||
|
||||
inline uint64_t readMisalignedUint64(const uint64_t *address) {
|
||||
const uint32_t *addressBits = reinterpret_cast<const uint32_t *>(address);
|
||||
return static_cast<uint64_t>(static_cast<uint64_t>(addressBits[1]) << 32) | addressBits[0];
|
||||
}
|
||||
|
||||
GraphicsAllocation *allocateGlobalsSurface(NEO::Context *ctx, NEO::ClDevice *device, size_t size, bool constant, bool globalsAreExported, const void *initData) {
|
||||
UNRECOVERABLE_IF(device == nullptr);
|
||||
if (globalsAreExported && (ctx != nullptr) && (ctx->getSVMAllocsManager() != nullptr)) {
|
||||
NEO::SVMAllocsManager::SvmAllocationProperties svmProps = {};
|
||||
svmProps.coherent = false;
|
||||
svmProps.readOnly = constant;
|
||||
svmProps.hostPtrReadOnly = constant;
|
||||
auto ptr = ctx->getSVMAllocsManager()->createSVMAlloc(device->getRootDeviceIndex(), size, svmProps);
|
||||
DEBUG_BREAK_IF(ptr == nullptr);
|
||||
if (ptr == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
auto svmAlloc = ctx->getSVMAllocsManager()->getSVMAlloc(ptr);
|
||||
UNRECOVERABLE_IF(svmAlloc == nullptr);
|
||||
auto gpuAlloc = svmAlloc->gpuAllocation;
|
||||
UNRECOVERABLE_IF(gpuAlloc == nullptr);
|
||||
device->getMemoryManager()->copyMemoryToAllocation(gpuAlloc, initData, static_cast<uint32_t>(size));
|
||||
return ctx->getSVMAllocsManager()->getSVMAlloc(ptr)->gpuAllocation;
|
||||
} else {
|
||||
auto allocationType = constant ? GraphicsAllocation::AllocationType::CONSTANT_SURFACE : GraphicsAllocation::AllocationType::GLOBAL_SURFACE;
|
||||
auto gpuAlloc = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), size, allocationType});
|
||||
DEBUG_BREAK_IF(gpuAlloc == nullptr);
|
||||
if (gpuAlloc == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
memcpy_s(gpuAlloc->getUnderlyingBuffer(), gpuAlloc->getUnderlyingBufferSize(), initData, size);
|
||||
return gpuAlloc;
|
||||
}
|
||||
}
|
||||
|
||||
cl_int Program::isHandled(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) const {
|
||||
std::string validatorErrMessage;
|
||||
std::string validatorWarnings;
|
||||
@@ -191,76 +76,27 @@ cl_int Program::isHandled(const PatchTokenBinary::ProgramFromPatchtokens &decode
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
void Program::processProgramScopeMetadata(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) {
|
||||
if (decodedProgram.programScopeTokens.symbolTable != nullptr) {
|
||||
const auto patch = decodedProgram.programScopeTokens.symbolTable;
|
||||
this->prepareLinkerInputStorage();
|
||||
this->linkerInput->decodeGlobalVariablesSymbolTable(patch + 1, patch->NumEntries);
|
||||
}
|
||||
|
||||
if (decodedProgram.programScopeTokens.allocateConstantMemorySurface.size() != 0) {
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(this->constantSurface);
|
||||
|
||||
auto exportsGlobals = (linkerInput && linkerInput->getTraits().exportsGlobalConstants);
|
||||
size_t globalConstantsSurfaceSize = decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]->InlineDataSize;
|
||||
const void *globalConstantsInitData = NEO::PatchTokenBinary::getInlineData(decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]);
|
||||
this->constantSurface = allocateGlobalsSurface(context, pDevice, globalConstantsSurfaceSize, true, exportsGlobals, globalConstantsInitData);
|
||||
}
|
||||
|
||||
if (decodedProgram.programScopeTokens.allocateGlobalMemorySurface.size() != 0) {
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(this->globalSurface);
|
||||
|
||||
auto exportsGlobals = (linkerInput && linkerInput->getTraits().exportsGlobalVariables);
|
||||
size_t globalVariablesSurfaceSize = decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]->InlineDataSize;
|
||||
const void *globalVariablesInitData = NEO::PatchTokenBinary::getInlineData(decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]);
|
||||
this->globalVarTotalSize = globalVariablesSurfaceSize;
|
||||
this->globalSurface = allocateGlobalsSurface(context, pDevice, globalVariablesSurfaceSize, false, exportsGlobals, globalVariablesInitData);
|
||||
}
|
||||
|
||||
for (const auto &globalConstantPointerToken : decodedProgram.programScopeTokens.constantPointer) {
|
||||
NEO::GraphicsAllocation *srcSurface = this->constantSurface;
|
||||
if (globalConstantPointerToken->BufferType != PROGRAM_SCOPE_CONSTANT_BUFFER) {
|
||||
UNRECOVERABLE_IF(globalConstantPointerToken->BufferType != PROGRAM_SCOPE_GLOBAL_BUFFER);
|
||||
srcSurface = this->globalSurface;
|
||||
}
|
||||
UNRECOVERABLE_IF(srcSurface == nullptr);
|
||||
UNRECOVERABLE_IF(this->constantSurface == nullptr);
|
||||
auto offset = readMisalignedUint64(&globalConstantPointerToken->ConstantPointerOffset);
|
||||
UNRECOVERABLE_IF(this->constantSurface->getUnderlyingBufferSize() < ((offset + constantSurface->is32BitAllocation()) ? 4 : sizeof(uintptr_t)));
|
||||
void *patchOffset = ptrOffset(this->constantSurface->getUnderlyingBuffer(), static_cast<size_t>(offset));
|
||||
patchIncrement(patchOffset, constantSurface->is32BitAllocation() ? 4 : sizeof(uintptr_t), srcSurface->getGpuAddressToPatch());
|
||||
}
|
||||
|
||||
for (const auto &globalVariablePointerToken : decodedProgram.programScopeTokens.globalPointer) {
|
||||
NEO::GraphicsAllocation *srcSurface = this->globalSurface;
|
||||
if (globalVariablePointerToken->BufferType != PROGRAM_SCOPE_GLOBAL_BUFFER) {
|
||||
UNRECOVERABLE_IF(globalVariablePointerToken->BufferType != PROGRAM_SCOPE_CONSTANT_BUFFER);
|
||||
srcSurface = this->constantSurface;
|
||||
}
|
||||
UNRECOVERABLE_IF(srcSurface == nullptr);
|
||||
UNRECOVERABLE_IF(this->globalSurface == nullptr);
|
||||
auto offset = readMisalignedUint64(&globalVariablePointerToken->GlobalPointerOffset);
|
||||
UNRECOVERABLE_IF(this->globalSurface->getUnderlyingBufferSize() < ((offset + globalSurface->is32BitAllocation()) ? 4 : sizeof(uintptr_t)));
|
||||
void *patchOffset = ptrOffset(this->globalSurface->getUnderlyingBuffer(), static_cast<size_t>(offset));
|
||||
patchIncrement(patchOffset, globalSurface->is32BitAllocation() ? 4 : sizeof(uintptr_t), srcSurface->getGpuAddressToPatch());
|
||||
}
|
||||
}
|
||||
|
||||
cl_int Program::linkBinary() {
|
||||
if (linkerInput == nullptr) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
Linker linker(*linkerInput);
|
||||
Linker::Segment globals;
|
||||
Linker::Segment constants;
|
||||
Linker::Segment exportedFunctions;
|
||||
Linker::SegmentInfo globals;
|
||||
Linker::SegmentInfo constants;
|
||||
Linker::SegmentInfo exportedFunctions;
|
||||
Linker::PatchableSegment globalsForPatching;
|
||||
Linker::PatchableSegment constantsForPatching;
|
||||
if (this->globalSurface != nullptr) {
|
||||
globals.gpuAddress = static_cast<uintptr_t>(this->globalSurface->getGpuAddress());
|
||||
globals.segmentSize = this->globalSurface->getUnderlyingBufferSize();
|
||||
globalsForPatching.hostPointer = this->globalSurface->getUnderlyingBuffer();
|
||||
globalsForPatching.segmentSize = this->globalSurface->getUnderlyingBufferSize();
|
||||
}
|
||||
if (this->constantSurface != nullptr) {
|
||||
constants.gpuAddress = static_cast<uintptr_t>(this->constantSurface->getGpuAddress());
|
||||
constants.segmentSize = this->constantSurface->getUnderlyingBufferSize();
|
||||
constantsForPatching.hostPointer = this->constantSurface->getUnderlyingBuffer();
|
||||
constantsForPatching.segmentSize = this->constantSurface->getUnderlyingBufferSize();
|
||||
}
|
||||
if (this->linkerInput->getExportedFunctionsSegmentId() >= 0) {
|
||||
// Exported functions reside in instruction heap of one of kernels
|
||||
@@ -283,6 +119,7 @@ cl_int Program::linkBinary() {
|
||||
|
||||
Linker::UnresolvedExternals unresolvedExternalsInfo;
|
||||
bool linkSuccess = linker.link(globals, constants, exportedFunctions,
|
||||
globalsForPatching, constantsForPatching,
|
||||
isaSegmentsForPatching, unresolvedExternalsInfo);
|
||||
this->symbols = linker.extractRelocatedSymbols();
|
||||
if (false == linkSuccess) {
|
||||
@@ -295,6 +132,9 @@ cl_int Program::linkBinary() {
|
||||
return CL_INVALID_BINARY;
|
||||
} else if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
|
||||
for (const auto &kernelInfo : this->kernelInfoArray) {
|
||||
if (nullptr == kernelInfo->getGraphicsAllocation()) {
|
||||
continue;
|
||||
}
|
||||
auto &kernHeapInfo = kernelInfo->heapInfo;
|
||||
auto segmentId = &kernelInfo - &this->kernelInfoArray[0];
|
||||
this->pDevice->getMemoryManager()->copyMemoryToAllocation(kernelInfo->getGraphicsAllocation(),
|
||||
@@ -307,30 +147,83 @@ cl_int Program::linkBinary() {
|
||||
|
||||
cl_int Program::processGenBinary() {
|
||||
cleanCurrentKernelInfo();
|
||||
if (this->constantSurface || this->globalSurface) {
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(this->constantSurface);
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(this->globalSurface);
|
||||
this->constantSurface = nullptr;
|
||||
this->globalSurface = nullptr;
|
||||
}
|
||||
|
||||
auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(genBinary.get()), genBinarySize);
|
||||
ProgramInfo programInfo;
|
||||
auto ret = this->processPatchTokensBinary(blob, programInfo);
|
||||
if (CL_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return this->processProgramInfo(programInfo);
|
||||
}
|
||||
|
||||
cl_int Program::processPatchTokensBinary(ArrayRef<const uint8_t> src, ProgramInfo &dst) {
|
||||
NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram = {};
|
||||
NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(blob, decodedProgram);
|
||||
NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(src, decodedProgram);
|
||||
DBG_LOG(LogPatchTokens, NEO::PatchTokenBinary::asString(decodedProgram).c_str());
|
||||
cl_int retVal = this->isHandled(decodedProgram);
|
||||
if (CL_SUCCESS != retVal) {
|
||||
return retVal;
|
||||
}
|
||||
|
||||
auto numKernels = decodedProgram.header->NumberOfKernels;
|
||||
for (uint32_t i = 0; i < numKernels && retVal == CL_SUCCESS; i++) {
|
||||
populateKernelInfo(decodedProgram, i, retVal);
|
||||
NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants;
|
||||
if (this->pDevice) {
|
||||
deviceInfoConstants.maxWorkGroupSize = (uint32_t)this->pDevice->getDeviceInfo().maxWorkGroupSize;
|
||||
deviceInfoConstants.computeUnitsUsedForScratch = this->pDevice->getDeviceInfo().computeUnitsUsedForScratch;
|
||||
deviceInfoConstants.slmWindowSize = (uint32_t)this->pDevice->getDeviceInfo().localMemSize;
|
||||
if (requiresLocalMemoryWindowVA(decodedProgram)) {
|
||||
deviceInfoConstants.slmWindow = this->executionEnvironment.memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment);
|
||||
}
|
||||
}
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
return retVal;
|
||||
NEO::populateProgramInfo(dst, decodedProgram, deviceInfoConstants);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int Program::processProgramInfo(ProgramInfo &src) {
|
||||
this->linkerInput = std::move(src.linkerInput);
|
||||
this->kernelInfoArray = std::move(src.kernelInfos);
|
||||
auto svmAllocsManager = context ? context->getSVMAllocsManager() : nullptr;
|
||||
if (src.globalConstants.size != 0) {
|
||||
UNRECOVERABLE_IF(nullptr == pDevice);
|
||||
this->constantSurface = allocateGlobalsSurface(svmAllocsManager, pDevice->getDevice(), src.globalConstants.size, true, linkerInput.get(), src.globalConstants.initData);
|
||||
}
|
||||
|
||||
processProgramScopeMetadata(decodedProgram);
|
||||
if (src.globalVariables.size != 0) {
|
||||
UNRECOVERABLE_IF(nullptr == pDevice);
|
||||
this->globalSurface = allocateGlobalsSurface(svmAllocsManager, pDevice->getDevice(), src.globalVariables.size, false, linkerInput.get(), src.globalVariables.initData);
|
||||
}
|
||||
|
||||
retVal = linkBinary();
|
||||
this->globalVarTotalSize = src.globalVariables.size;
|
||||
|
||||
return retVal;
|
||||
for (auto &kernelInfo : this->kernelInfoArray) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
if (kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && this->pDevice) {
|
||||
retVal = kernelInfo->createKernelAllocation(this->pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager()) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && !this->pDevice);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if (kernelInfo->hasDeviceEnqueue()) {
|
||||
parentKernelInfoArray.push_back(kernelInfo);
|
||||
}
|
||||
if (kernelInfo->requiresSubgroupIndependentForwardProgress()) {
|
||||
subgroupKernelInfoArray.push_back(kernelInfo);
|
||||
}
|
||||
}
|
||||
|
||||
return linkBinary();
|
||||
}
|
||||
|
||||
bool Program::validateGenBinaryDevice(GFXCORE_FAMILY device) const {
|
||||
|
||||
@@ -456,9 +456,4 @@ void Program::updateNonUniformFlag(const Program **inputPrograms, size_t numInpu
|
||||
this->allowNonUniform = allowNonUniform;
|
||||
}
|
||||
|
||||
void Program::prepareLinkerInputStorage() {
|
||||
if (this->linkerInput == nullptr) {
|
||||
this->linkerInput = std::make_unique<LinkerInput>();
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "core/compiler_interface/compiler_interface.h"
|
||||
#include "core/compiler_interface/linker.h"
|
||||
#include "core/elf/writer.h"
|
||||
#include "core/program/program_info.h"
|
||||
#include "core/utilities/const_stringref.h"
|
||||
#include "runtime/api/cl_types.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
@@ -131,6 +132,8 @@ class Program : public BaseObject<_cl_program> {
|
||||
std::unordered_map<std::string, BuiltinDispatchInfoBuilder *> &builtinsMap);
|
||||
|
||||
MOCKABLE_VIRTUAL cl_int processGenBinary();
|
||||
MOCKABLE_VIRTUAL cl_int processPatchTokensBinary(ArrayRef<const uint8_t> src, ProgramInfo &dst);
|
||||
MOCKABLE_VIRTUAL cl_int processProgramInfo(ProgramInfo &dst);
|
||||
|
||||
cl_int compile(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions,
|
||||
cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames,
|
||||
@@ -269,16 +272,12 @@ class Program : public BaseObject<_cl_program> {
|
||||
MOCKABLE_VIRTUAL cl_int linkBinary();
|
||||
|
||||
MOCKABLE_VIRTUAL cl_int isHandled(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) const;
|
||||
void processProgramScopeMetadata(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram);
|
||||
void populateKernelInfo(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram, uint32_t kernelNum, cl_int &retVal);
|
||||
|
||||
MOCKABLE_VIRTUAL cl_int rebuildProgramFromIr();
|
||||
|
||||
bool validateGenBinaryDevice(GFXCORE_FAMILY device) const;
|
||||
bool validateGenBinaryHeader(const iOpenCL::SProgramBinaryHeader *pGenBinaryHeader) const;
|
||||
|
||||
std::string getKernelNamesString() const;
|
||||
|
||||
void separateBlockKernels();
|
||||
|
||||
void updateNonUniformFlag();
|
||||
@@ -292,8 +291,6 @@ class Program : public BaseObject<_cl_program> {
|
||||
MOCKABLE_VIRTUAL bool appendKernelDebugOptions();
|
||||
void notifyDebuggerWithSourceCode(std::string &filename);
|
||||
|
||||
void prepareLinkerInputStorage();
|
||||
|
||||
static const std::string clOptNameClVer;
|
||||
|
||||
cl_program_binary_type programBinaryType;
|
||||
@@ -354,6 +351,4 @@ class Program : public BaseObject<_cl_program> {
|
||||
bool kernelDebugEnabled = false;
|
||||
};
|
||||
|
||||
GraphicsAllocation *allocateGlobalsSurface(NEO::Context *ctx, NEO::ClDevice *device, size_t size, bool constant, bool globalsAreExported, const void *initData);
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -178,9 +178,9 @@ void FileLogger<DebugLevel>::dumpKernelArgs(const Kernel *kernel) {
|
||||
|
||||
auto &argInfo = kernel->getKernelInfo().kernelArgInfo[i];
|
||||
|
||||
if (argInfo.addressQualifier == CL_KERNEL_ARG_ADDRESS_LOCAL) {
|
||||
if (argInfo.metadata.addressQualifier == KernelArgMetadata::AddressSpaceQualifier::Local) {
|
||||
type = "local";
|
||||
} else if (argInfo.typeStr.find("image") != std::string::npos) {
|
||||
} else if (argInfo.isImage) {
|
||||
type = "image";
|
||||
auto clMem = (const cl_mem)kernel->getKernelArg(i);
|
||||
auto memObj = castToObject<MemObj>(clMem);
|
||||
@@ -189,9 +189,9 @@ void FileLogger<DebugLevel>::dumpKernelArgs(const Kernel *kernel) {
|
||||
size = memObj->getSize();
|
||||
flags = memObj->getMemoryPropertiesFlags();
|
||||
}
|
||||
} else if (argInfo.typeStr.find("sampler") != std::string::npos) {
|
||||
} else if (argInfo.isSampler) {
|
||||
type = "sampler";
|
||||
} else if (argInfo.typeStr.find("*") != std::string::npos) {
|
||||
} else if (argInfo.isBuffer) {
|
||||
type = "buffer";
|
||||
auto clMem = (const cl_mem)kernel->getKernelArg(i);
|
||||
auto memObj = castToObject<MemObj>(clMem);
|
||||
|
||||
Reference in New Issue
Block a user