2017-12-21 07:45:38 +08:00
|
|
|
/*
|
2019-02-27 18:39:32 +08:00
|
|
|
* Copyright (C) 2017-2019 Intel Corporation
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
2018-09-17 20:03:37 +08:00
|
|
|
* SPDX-License-Identifier: MIT
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2019-08-03 04:25:45 +08:00
|
|
|
#include "core/helpers/aligned_memory.h"
|
2019-09-05 15:35:56 +08:00
|
|
|
#include "core/helpers/debug_helpers.h"
|
2019-05-29 10:09:40 +08:00
|
|
|
#include "core/helpers/ptr_math.h"
|
2019-06-19 15:21:29 +08:00
|
|
|
#include "core/helpers/string.h"
|
2019-09-26 18:10:38 +08:00
|
|
|
#include "core/memory_manager/unified_memory_manager.h"
|
2019-10-28 02:48:26 +08:00
|
|
|
#include "runtime/compiler_interface/patchtokens_decoder.h"
|
|
|
|
#include "runtime/compiler_interface/patchtokens_dumper.h"
|
|
|
|
#include "runtime/compiler_interface/patchtokens_validator.inl"
|
2019-07-04 23:14:51 +08:00
|
|
|
#include "runtime/context/context.h"
|
2019-09-03 20:20:32 +08:00
|
|
|
#include "runtime/device/device.h"
|
2019-02-27 18:39:32 +08:00
|
|
|
#include "runtime/gtpin/gtpin_notify.h"
|
2018-07-17 20:41:24 +08:00
|
|
|
#include "runtime/memory_manager/memory_manager.h"
|
2019-10-28 02:48:26 +08:00
|
|
|
#include "runtime/program/kernel_info.h"
|
|
|
|
#include "runtime/program/kernel_info_from_patchtokens.h"
|
2019-09-03 20:20:32 +08:00
|
|
|
#include "runtime/program/program.h"
|
2019-02-27 18:39:32 +08:00
|
|
|
|
|
|
|
#include "patch_list.h"
|
|
|
|
#include "patch_shared.h"
|
|
|
|
#include "program_debug_data.h"
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
|
|
|
using namespace iOpenCL;
|
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
namespace NEO {
|
2017-12-21 07:45:38 +08:00
|
|
|
extern bool familyEnabled[];
|
|
|
|
|
|
|
|
const KernelInfo *Program::getKernelInfo(
|
|
|
|
const char *kernelName) const {
|
|
|
|
if (kernelName == nullptr) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto it = std::find_if(kernelInfoArray.begin(), kernelInfoArray.end(),
|
|
|
|
[=](const KernelInfo *kInfo) { return (0 == strcmp(kInfo->name.c_str(), kernelName)); });
|
|
|
|
|
|
|
|
return (it != kernelInfoArray.end()) ? *it : nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t Program::getNumKernels() const {
|
|
|
|
return kernelInfoArray.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
const KernelInfo *Program::getKernelInfo(size_t ordinal) const {
|
|
|
|
DEBUG_BREAK_IF(ordinal >= kernelInfoArray.size());
|
|
|
|
return kernelInfoArray[ordinal];
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string Program::getKernelNamesString() const {
|
|
|
|
std::string semiColonDelimitedKernelNameStr;
|
|
|
|
|
2019-07-29 19:35:55 +08:00
|
|
|
for (auto kernelInfo : kernelInfoArray) {
|
|
|
|
if (!semiColonDelimitedKernelNameStr.empty()) {
|
|
|
|
semiColonDelimitedKernelNameStr += ';';
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
2019-07-29 19:35:55 +08:00
|
|
|
semiColonDelimitedKernelNameStr += kernelInfo->name;
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return semiColonDelimitedKernelNameStr;
|
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
void Program::populateKernelInfo(
|
|
|
|
const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram,
|
2019-07-04 23:14:51 +08:00
|
|
|
uint32_t kernelNum,
|
2017-12-21 07:45:38 +08:00
|
|
|
cl_int &retVal) {
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
auto kernelInfo = std::make_unique<KernelInfo>();
|
|
|
|
const PatchTokenBinary::KernelFromPatchtokens &decodedKernel = decodedProgram.kernels[kernelNum];
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
NEO::populateKernelInfo(*kernelInfo, decodedKernel);
|
|
|
|
retVal = kernelInfo->resolveKernelInfo();
|
|
|
|
if (retVal != CL_SUCCESS) {
|
|
|
|
return;
|
|
|
|
}
|
2019-11-19 23:54:47 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
kernelInfo->gpuPointerSize = decodedProgram.header->GPUPointerSizeInBytes;
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (decodedKernel.tokens.programSymbolTable) {
|
|
|
|
prepareLinkerInputStorage();
|
|
|
|
linkerInput->decodeExportedFunctionsSymbolTable(decodedKernel.tokens.programSymbolTable + 1, decodedKernel.tokens.programSymbolTable->NumEntries, kernelNum);
|
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (decodedKernel.tokens.programRelocationTable) {
|
|
|
|
prepareLinkerInputStorage();
|
|
|
|
linkerInput->decodeRelocationTable(decodedKernel.tokens.programRelocationTable + 1, decodedKernel.tokens.programRelocationTable->NumEntries, kernelNum);
|
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (kernelInfo->patchInfo.dataParameterStream && kernelInfo->patchInfo.dataParameterStream->DataParameterStreamSize) {
|
|
|
|
uint32_t crossThreadDataSize = kernelInfo->patchInfo.dataParameterStream->DataParameterStreamSize;
|
|
|
|
kernelInfo->crossThreadData = new char[crossThreadDataSize];
|
|
|
|
memset(kernelInfo->crossThreadData, 0x00, crossThreadDataSize);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
uint32_t privateMemoryStatelessSizeOffset = kernelInfo->workloadInfo.privateMemoryStatelessSizeOffset;
|
|
|
|
uint32_t localMemoryStatelessWindowSizeOffset = kernelInfo->workloadInfo.localMemoryStatelessWindowSizeOffset;
|
|
|
|
uint32_t localMemoryStatelessWindowStartAddressOffset = kernelInfo->workloadInfo.localMemoryStatelessWindowStartAddressOffset;
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (localMemoryStatelessWindowStartAddressOffset != 0xFFffFFff) {
|
|
|
|
*(uintptr_t *)&(kernelInfo->crossThreadData[localMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast<uintptr_t>(this->executionEnvironment.memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment));
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (localMemoryStatelessWindowSizeOffset != 0xFFffFFff) {
|
|
|
|
*(uint32_t *)&(kernelInfo->crossThreadData[localMemoryStatelessWindowSizeOffset]) = (uint32_t)this->pDevice->getDeviceInfo().localMemSize;
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (kernelInfo->patchInfo.pAllocateStatelessPrivateSurface && (privateMemoryStatelessSizeOffset != 0xFFffFFff)) {
|
|
|
|
*(uint32_t *)&(kernelInfo->crossThreadData[privateMemoryStatelessSizeOffset]) = kernelInfo->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize * this->getDevice(0).getDeviceInfo().computeUnitsUsedForScratch * kernelInfo->getMaxSimdSize();
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (kernelInfo->workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) {
|
|
|
|
*(uint32_t *)&(kernelInfo->crossThreadData[kernelInfo->workloadInfo.maxWorkGroupSizeOffset]) = (uint32_t)this->getDevice(0).getDeviceInfo().maxWorkGroupSize;
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && this->pDevice) {
|
|
|
|
retVal = kernelInfo->createKernelAllocation(this->pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager()) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY;
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
DEBUG_BREAK_IF(kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && !this->pDevice);
|
|
|
|
if (retVal != CL_SUCCESS) {
|
|
|
|
return;
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (kernelInfo->hasDeviceEnqueue()) {
|
|
|
|
parentKernelInfoArray.push_back(kernelInfo.get());
|
2018-03-08 18:56:44 +08:00
|
|
|
}
|
2019-10-28 02:48:26 +08:00
|
|
|
if (kernelInfo->requiresSubgroupIndependentForwardProgress()) {
|
|
|
|
subgroupKernelInfoArray.push_back(kernelInfo.get());
|
2019-07-22 21:52:30 +08:00
|
|
|
}
|
2019-10-28 02:48:26 +08:00
|
|
|
kernelInfoArray.push_back(kernelInfo.release());
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2019-07-04 23:14:51 +08:00
|
|
|
inline uint64_t readMisalignedUint64(const uint64_t *address) {
|
|
|
|
const uint32_t *addressBits = reinterpret_cast<const uint32_t *>(address);
|
|
|
|
return static_cast<uint64_t>(static_cast<uint64_t>(addressBits[1]) << 32) | addressBits[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
GraphicsAllocation *allocateGlobalsSurface(NEO::Context *ctx, NEO::Device *device, size_t size, bool constant, bool globalsAreExported, const void *initData) {
|
2019-11-25 18:34:52 +08:00
|
|
|
UNRECOVERABLE_IF(device == nullptr);
|
2019-07-04 23:14:51 +08:00
|
|
|
if (globalsAreExported && (ctx != nullptr) && (ctx->getSVMAllocsManager() != nullptr)) {
|
|
|
|
NEO::SVMAllocsManager::SvmAllocationProperties svmProps = {};
|
|
|
|
svmProps.coherent = false;
|
|
|
|
svmProps.readOnly = constant;
|
|
|
|
svmProps.hostPtrReadOnly = constant;
|
2019-11-07 21:15:04 +08:00
|
|
|
auto ptr = ctx->getSVMAllocsManager()->createSVMAlloc(device->getRootDeviceIndex(), size, svmProps);
|
2019-11-24 21:50:41 +08:00
|
|
|
DEBUG_BREAK_IF(ptr == nullptr);
|
|
|
|
if (ptr == nullptr) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
2019-07-18 16:35:42 +08:00
|
|
|
auto svmAlloc = ctx->getSVMAllocsManager()->getSVMAlloc(ptr);
|
|
|
|
UNRECOVERABLE_IF(svmAlloc == nullptr);
|
|
|
|
auto gpuAlloc = svmAlloc->gpuAllocation;
|
2019-07-12 19:34:54 +08:00
|
|
|
UNRECOVERABLE_IF(gpuAlloc == nullptr);
|
2019-07-04 23:14:51 +08:00
|
|
|
device->getMemoryManager()->copyMemoryToAllocation(gpuAlloc, initData, static_cast<uint32_t>(size));
|
|
|
|
return ctx->getSVMAllocsManager()->getSVMAlloc(ptr)->gpuAllocation;
|
|
|
|
} else {
|
|
|
|
auto allocationType = constant ? GraphicsAllocation::AllocationType::CONSTANT_SURFACE : GraphicsAllocation::AllocationType::GLOBAL_SURFACE;
|
2019-11-07 21:15:04 +08:00
|
|
|
auto gpuAlloc = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), size, allocationType});
|
2019-11-24 21:50:41 +08:00
|
|
|
DEBUG_BREAK_IF(gpuAlloc == nullptr);
|
|
|
|
if (gpuAlloc == nullptr) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
2019-07-04 23:14:51 +08:00
|
|
|
memcpy_s(gpuAlloc->getUnderlyingBuffer(), gpuAlloc->getUnderlyingBufferSize(), initData, size);
|
|
|
|
return gpuAlloc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
cl_int Program::isHandled(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) const {
|
|
|
|
std::string validatorErrMessage;
|
|
|
|
std::string validatorWarnings;
|
|
|
|
auto availableSlm = this->pDevice ? static_cast<size_t>(this->pDevice->getDeviceInfo().localMemSize) : 0U;
|
|
|
|
auto validatorErr = PatchTokenBinary::validate(decodedProgram, availableSlm, *this, validatorErrMessage, validatorWarnings);
|
|
|
|
if (validatorWarnings.empty() == false) {
|
|
|
|
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", validatorWarnings.c_str());
|
|
|
|
}
|
|
|
|
if (validatorErr != PatchTokenBinary::ValidatorError::Success) {
|
|
|
|
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", validatorErrMessage.c_str());
|
|
|
|
switch (validatorErr) {
|
2017-12-21 07:45:38 +08:00
|
|
|
default:
|
2019-10-28 02:48:26 +08:00
|
|
|
return CL_INVALID_BINARY;
|
|
|
|
case PatchTokenBinary::ValidatorError::NotEnoughSlm:
|
|
|
|
return CL_OUT_OF_RESOURCES;
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
2019-10-28 02:48:26 +08:00
|
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
void Program::processProgramScopeMetadata(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) {
|
|
|
|
if (decodedProgram.programScopeTokens.symbolTable != nullptr) {
|
|
|
|
const auto patch = decodedProgram.programScopeTokens.symbolTable;
|
|
|
|
this->prepareLinkerInputStorage();
|
|
|
|
this->linkerInput->decodeGlobalVariablesSymbolTable(patch + 1, patch->NumEntries);
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (decodedProgram.programScopeTokens.allocateConstantMemorySurface.size() != 0) {
|
|
|
|
pDevice->getMemoryManager()->freeGraphicsMemory(this->constantSurface);
|
|
|
|
|
2019-07-04 23:14:51 +08:00
|
|
|
auto exportsGlobals = (linkerInput && linkerInput->getTraits().exportsGlobalConstants);
|
2019-10-28 02:48:26 +08:00
|
|
|
size_t globalConstantsSurfaceSize = decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]->InlineDataSize;
|
|
|
|
const void *globalConstantsInitData = NEO::PatchTokenBinary::getInlineData(decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]);
|
|
|
|
this->constantSurface = allocateGlobalsSurface(context, pDevice, globalConstantsSurfaceSize, true, exportsGlobals, globalConstantsInitData);
|
2019-07-04 23:14:51 +08:00
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (decodedProgram.programScopeTokens.allocateGlobalMemorySurface.size() != 0) {
|
|
|
|
pDevice->getMemoryManager()->freeGraphicsMemory(this->globalSurface);
|
|
|
|
|
2019-07-04 23:14:51 +08:00
|
|
|
auto exportsGlobals = (linkerInput && linkerInput->getTraits().exportsGlobalVariables);
|
2019-10-28 02:48:26 +08:00
|
|
|
size_t globalVariablesSurfaceSize = decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]->InlineDataSize;
|
|
|
|
const void *globalVariablesInitData = NEO::PatchTokenBinary::getInlineData(decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]);
|
|
|
|
this->globalVarTotalSize = globalVariablesSurfaceSize;
|
|
|
|
this->globalSurface = allocateGlobalsSurface(context, pDevice, globalVariablesSurfaceSize, false, exportsGlobals, globalVariablesInitData);
|
2019-07-04 23:14:51 +08:00
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
for (const auto &globalConstantPointerToken : decodedProgram.programScopeTokens.constantPointer) {
|
|
|
|
UNRECOVERABLE_IF(this->constantSurface == nullptr);
|
|
|
|
auto offset = readMisalignedUint64(&globalConstantPointerToken->ConstantPointerOffset);
|
|
|
|
UNRECOVERABLE_IF(this->constantSurface->getUnderlyingBufferSize() < ((offset + constantSurface->is32BitAllocation()) ? 4 : sizeof(uintptr_t)));
|
|
|
|
void *patchOffset = ptrOffset(this->constantSurface->getUnderlyingBuffer(), static_cast<size_t>(offset));
|
|
|
|
patchIncrement(patchOffset, constantSurface->is32BitAllocation() ? 4 : sizeof(uintptr_t), constantSurface->getGpuAddressToPatch());
|
2019-07-04 23:14:51 +08:00
|
|
|
}
|
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
for (const auto &globalVariablePointerToken : decodedProgram.programScopeTokens.globalPointer) {
|
|
|
|
UNRECOVERABLE_IF(this->globalSurface == nullptr);
|
|
|
|
auto offset = readMisalignedUint64(&globalVariablePointerToken->GlobalPointerOffset);
|
|
|
|
UNRECOVERABLE_IF(this->globalSurface->getUnderlyingBufferSize() < ((offset + globalSurface->is32BitAllocation()) ? 4 : sizeof(uintptr_t)));
|
|
|
|
void *patchOffset = ptrOffset(this->globalSurface->getUnderlyingBuffer(), static_cast<size_t>(offset));
|
|
|
|
patchIncrement(patchOffset, globalSurface->is32BitAllocation() ? 4 : sizeof(uintptr_t), globalSurface->getGpuAddressToPatch());
|
2019-07-04 23:14:51 +08:00
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2019-07-04 23:14:51 +08:00
|
|
|
cl_int Program::linkBinary() {
|
2019-07-17 23:39:39 +08:00
|
|
|
if (linkerInput == nullptr) {
|
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
|
|
|
Linker linker(*linkerInput);
|
|
|
|
Linker::Segment globals;
|
|
|
|
Linker::Segment constants;
|
|
|
|
Linker::Segment exportedFunctions;
|
|
|
|
if (this->globalSurface != nullptr) {
|
|
|
|
globals.gpuAddress = static_cast<uintptr_t>(this->globalSurface->getGpuAddress());
|
|
|
|
globals.segmentSize = this->globalSurface->getUnderlyingBufferSize();
|
|
|
|
}
|
|
|
|
if (this->constantSurface != nullptr) {
|
|
|
|
constants.gpuAddress = static_cast<uintptr_t>(this->constantSurface->getGpuAddress());
|
|
|
|
constants.segmentSize = this->constantSurface->getUnderlyingBufferSize();
|
|
|
|
}
|
|
|
|
if (this->linkerInput->getExportedFunctionsSegmentId() >= 0) {
|
|
|
|
// Exported functions reside in instruction heap of one of kernels
|
|
|
|
auto exportedFunctionHeapId = this->linkerInput->getExportedFunctionsSegmentId();
|
|
|
|
this->exportedFunctionsSurface = this->kernelInfoArray[exportedFunctionHeapId]->getGraphicsAllocation();
|
|
|
|
exportedFunctions.gpuAddress = static_cast<uintptr_t>(exportedFunctionsSurface->getGpuAddressToPatch());
|
|
|
|
exportedFunctions.segmentSize = exportedFunctionsSurface->getUnderlyingBufferSize();
|
|
|
|
}
|
|
|
|
Linker::PatchableSegments isaSegmentsForPatching;
|
|
|
|
std::vector<std::vector<char>> patchedIsaTempStorage;
|
|
|
|
if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
|
|
|
|
patchedIsaTempStorage.reserve(this->kernelInfoArray.size());
|
|
|
|
for (const auto &kernelInfo : this->kernelInfoArray) {
|
|
|
|
auto &kernHeapInfo = kernelInfo->heapInfo;
|
|
|
|
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
|
|
|
|
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.pKernelHeader->KernelHeapSize));
|
|
|
|
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.pKernelHeader->KernelHeapSize});
|
2019-07-04 23:14:51 +08:00
|
|
|
}
|
2019-07-17 23:39:39 +08:00
|
|
|
}
|
2019-07-04 23:14:51 +08:00
|
|
|
|
2019-07-17 23:39:39 +08:00
|
|
|
Linker::UnresolvedExternals unresolvedExternalsInfo;
|
|
|
|
bool linkSuccess = linker.link(globals, constants, exportedFunctions,
|
|
|
|
isaSegmentsForPatching, unresolvedExternalsInfo);
|
|
|
|
this->symbols = linker.extractRelocatedSymbols();
|
|
|
|
if (false == linkSuccess) {
|
|
|
|
std::vector<std::string> kernelNames;
|
|
|
|
for (const auto &kernelInfo : this->kernelInfoArray) {
|
|
|
|
kernelNames.push_back("kernel : " + kernelInfo->name);
|
|
|
|
}
|
|
|
|
auto error = constructLinkerErrorMessage(unresolvedExternalsInfo, kernelNames);
|
|
|
|
updateBuildLog(pDevice, error.c_str(), error.size());
|
|
|
|
return CL_INVALID_BINARY;
|
|
|
|
} else if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
|
|
|
|
for (const auto &kernelInfo : this->kernelInfoArray) {
|
|
|
|
auto &kernHeapInfo = kernelInfo->heapInfo;
|
|
|
|
auto segmentId = &kernelInfo - &this->kernelInfoArray[0];
|
|
|
|
this->pDevice->getMemoryManager()->copyMemoryToAllocation(kernelInfo->getGraphicsAllocation(),
|
|
|
|
isaSegmentsForPatching[segmentId].hostPointer,
|
|
|
|
kernHeapInfo.pKernelHeader->KernelHeapSize);
|
2019-07-04 23:14:51 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
cl_int Program::processGenBinary() {
|
2018-03-08 18:56:44 +08:00
|
|
|
cleanCurrentKernelInfo();
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(genBinary.get()), genBinarySize);
|
|
|
|
NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram = {};
|
|
|
|
NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(blob, decodedProgram);
|
|
|
|
DBG_LOG(LogPatchTokens, NEO::PatchTokenBinary::asString(decodedProgram).c_str());
|
|
|
|
cl_int retVal = this->isHandled(decodedProgram);
|
|
|
|
if (CL_SUCCESS != retVal) {
|
|
|
|
return retVal;
|
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
auto numKernels = decodedProgram.header->NumberOfKernels;
|
|
|
|
for (uint32_t i = 0; i < numKernels && retVal == CL_SUCCESS; i++) {
|
|
|
|
populateKernelInfo(decodedProgram, i, retVal);
|
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
if (retVal != CL_SUCCESS) {
|
|
|
|
return retVal;
|
|
|
|
}
|
2019-07-04 23:14:51 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
processProgramScopeMetadata(decodedProgram);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-10-28 02:48:26 +08:00
|
|
|
retVal = linkBinary();
|
2019-07-04 23:14:51 +08:00
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
return retVal;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Program::validateGenBinaryDevice(GFXCORE_FAMILY device) const {
|
|
|
|
bool isValid = familyEnabled[device];
|
|
|
|
|
|
|
|
return isValid;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Program::validateGenBinaryHeader(const iOpenCL::SProgramBinaryHeader *pGenBinaryHeader) const {
|
|
|
|
return pGenBinaryHeader->Magic == MAGIC_CL &&
|
|
|
|
pGenBinaryHeader->Version == CURRENT_ICBE_VERSION &&
|
|
|
|
validateGenBinaryDevice(static_cast<GFXCORE_FAMILY>(pGenBinaryHeader->Device));
|
|
|
|
}
|
2018-05-02 20:27:55 +08:00
|
|
|
|
|
|
|
void Program::processDebugData() {
|
|
|
|
if (debugData != nullptr) {
|
2019-08-29 21:10:51 +08:00
|
|
|
SProgramDebugDataHeaderIGC *programDebugHeader = reinterpret_cast<SProgramDebugDataHeaderIGC *>(debugData.get());
|
2018-05-02 20:27:55 +08:00
|
|
|
|
|
|
|
DEBUG_BREAK_IF(programDebugHeader->NumberOfKernels != kernelInfoArray.size());
|
|
|
|
|
|
|
|
const SKernelDebugDataHeaderIGC *kernelDebugHeader = reinterpret_cast<SKernelDebugDataHeaderIGC *>(ptrOffset(programDebugHeader, sizeof(SProgramDebugDataHeaderIGC)));
|
|
|
|
const char *kernelName = nullptr;
|
|
|
|
const char *kernelDebugData = nullptr;
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < programDebugHeader->NumberOfKernels; i++) {
|
|
|
|
kernelName = reinterpret_cast<const char *>(ptrOffset(kernelDebugHeader, sizeof(SKernelDebugDataHeaderIGC)));
|
|
|
|
|
|
|
|
auto kernelInfo = kernelInfoArray[i];
|
|
|
|
UNRECOVERABLE_IF(kernelInfo->name.compare(0, kernelInfo->name.size(), kernelName) != 0);
|
|
|
|
|
|
|
|
kernelDebugData = ptrOffset(kernelName, kernelDebugHeader->KernelNameSize);
|
|
|
|
|
|
|
|
kernelInfo->debugData.vIsa = kernelDebugData;
|
|
|
|
kernelInfo->debugData.genIsa = ptrOffset(kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes);
|
|
|
|
kernelInfo->debugData.vIsaSize = kernelDebugHeader->SizeVisaDbgInBytes;
|
|
|
|
kernelInfo->debugData.genIsaSize = kernelDebugHeader->SizeGenIsaDbgInBytes;
|
|
|
|
|
|
|
|
kernelDebugData = ptrOffset(kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes + kernelDebugHeader->SizeGenIsaDbgInBytes);
|
|
|
|
kernelDebugHeader = reinterpret_cast<const SKernelDebugDataHeaderIGC *>(kernelDebugData);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
} // namespace NEO
|