compute-runtime/shared/source/device_binary_format/patchtokens_decoder.cpp

637 lines
26 KiB
C++

/*
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "patchtokens_decoder.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/hash.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/utilities/logger.h"
#include <algorithm>
namespace NEO {
namespace PatchTokenBinary {
struct PatchTokensStreamReader {
const ArrayRef<const uint8_t> data;
PatchTokensStreamReader(ArrayRef<const uint8_t> data) : data(data) {}
template <typename DecodePosT>
bool notEnoughDataLeft(DecodePosT *decodePos, size_t requestedSize) {
return getDataSizeLeft(decodePos) < requestedSize;
}
template <typename T, typename DecodePosT>
constexpr bool notEnoughDataLeft(DecodePosT *decodePos) {
return notEnoughDataLeft(decodePos, sizeof(T));
}
template <typename... ArgsT>
bool enoughDataLeft(ArgsT &&...args) {
return false == notEnoughDataLeft(std::forward<ArgsT>(args)...);
}
template <typename T, typename... ArgsT>
bool enoughDataLeft(ArgsT &&...args) {
return false == notEnoughDataLeft<T>(std::forward<ArgsT>(args)...);
}
template <typename DecodePosT>
size_t getDataSizeLeft(DecodePosT *decodePos) {
auto dataConsumed = ptrDiff(decodePos, data.begin());
UNRECOVERABLE_IF(dataConsumed > data.size());
return data.size() - dataConsumed;
}
};
template <typename T>
inline void assignToken(const T *&dst, const SPatchItemHeader *src) {
dst = reinterpret_cast<const T *>(src);
}
inline KernelArgFromPatchtokens &getKernelArg(KernelFromPatchtokens &kernel, size_t argNum, ArgObjectType type = ArgObjectType::none, ArgObjectTypeSpecialized typeSpecialized = ArgObjectTypeSpecialized::none) {
if (kernel.tokens.kernelArgs.size() < argNum + 1) {
kernel.tokens.kernelArgs.resize(argNum + 1);
}
auto &arg = kernel.tokens.kernelArgs[argNum];
if (arg.objectType == ArgObjectType::none) {
arg.objectType = type;
} else if ((arg.objectType != type) && (type != ArgObjectType::none)) {
kernel.decodeStatus = DecodeError::invalidBinary;
DBG_LOG(LogPatchTokens, "\n Mismatched metadata for kernel arg :", argNum);
DEBUG_BREAK_IF(true);
}
if (arg.objectTypeSpecialized == ArgObjectTypeSpecialized::none) {
arg.objectTypeSpecialized = typeSpecialized;
} else if (typeSpecialized != ArgObjectTypeSpecialized::none) {
UNRECOVERABLE_IF(arg.objectTypeSpecialized != typeSpecialized);
}
return arg;
}
inline void assignArgInfo(KernelFromPatchtokens &kernel, const SPatchItemHeader *src) {
auto argInfoToken = reinterpret_cast<const SPatchKernelArgumentInfo *>(src);
getKernelArg(kernel, argInfoToken->ArgumentNumber, ArgObjectType::none).argInfo = argInfoToken;
}
template <typename T>
inline uint32_t getArgNum(const SPatchItemHeader *argToken) {
return reinterpret_cast<const T *>(argToken)->ArgumentNumber;
}
inline void assignArg(KernelFromPatchtokens &kernel, const SPatchItemHeader *src) {
uint32_t argNum = 0;
ArgObjectType type = ArgObjectType::buffer;
switch (src->Token) {
default:
UNRECOVERABLE_IF(src->Token != PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT);
argNum = getArgNum<SPatchSamplerKernelArgument>(src);
type = ArgObjectType::sampler;
break;
case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT:
argNum = getArgNum<SPatchImageMemoryObjectKernelArgument>(src);
type = ArgObjectType::image;
break;
case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT:
argNum = getArgNum<SPatchGlobalMemoryObjectKernelArgument>(src);
break;
case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT:
argNum = getArgNum<SPatchStatelessGlobalMemoryObjectKernelArgument>(src);
break;
case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT:
argNum = getArgNum<SPatchStatelessConstantMemoryObjectKernelArgument>(src);
break;
case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT:
argNum = getArgNum<SPatchStatelessDeviceQueueKernelArgument>(src);
break;
}
getKernelArg(kernel, argNum, type).objectArg = src;
}
inline void assignToken(StackVecStrings &stringVec, const SPatchItemHeader *src) {
auto stringToken = reinterpret_cast<const SPatchString *>(src);
if (stringVec.size() < stringToken->Index + 1) {
stringVec.resize(stringToken->Index + 1);
}
stringVec[stringToken->Index] = stringToken;
}
template <size_t s>
inline void assignTokenInArray(const SPatchDataParameterBuffer *(&tokensArray)[s], const SPatchDataParameterBuffer *src, StackVecUnhandledTokens &unhandledTokens) {
auto sourceIndex = src->SourceOffset >> 2;
if (sourceIndex >= s) {
DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled sourceIndex ", sourceIndex);
DEBUG_BREAK_IF(true);
unhandledTokens.push_back(src);
return;
}
assignToken(tokensArray[sourceIndex], src);
}
template <typename PatchT, size_t numInlineEl>
inline void addTok(StackVec<const PatchT *, numInlineEl> &tokensVec, const SPatchItemHeader *src) {
tokensVec.push_back(reinterpret_cast<const PatchT *>(src));
}
inline void decodeKernelDataParameterToken(const SPatchDataParameterBuffer *token, KernelFromPatchtokens &out) {
auto &crossthread = out.tokens.crossThreadPayloadArgs;
auto sourceIndex = token->SourceOffset >> 2;
auto argNum = token->ArgumentNumber;
switch (token->Type) {
default:
DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled SPatchDataParameterBuffer ", token->Type);
DEBUG_BREAK_IF(true);
out.unhandledTokens.push_back(token);
break;
case DATA_PARAMETER_KERNEL_ARGUMENT:
getKernelArg(out, argNum, ArgObjectType::none).byValMap.push_back(token);
break;
case DATA_PARAMETER_LOCAL_WORK_SIZE: {
if (sourceIndex >= 3) {
DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled sourceIndex ", sourceIndex);
DEBUG_BREAK_IF(true);
out.unhandledTokens.push_back(token);
return;
}
auto localWorkSizeArray = (crossthread.localWorkSize[sourceIndex] == nullptr)
? crossthread.localWorkSize
: crossthread.localWorkSize2;
localWorkSizeArray[sourceIndex] = token;
break;
}
case DATA_PARAMETER_GLOBAL_WORK_OFFSET:
assignTokenInArray(crossthread.globalWorkOffset, token, out.unhandledTokens);
break;
case DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE:
assignTokenInArray(crossthread.enqueuedLocalWorkSize, token, out.unhandledTokens);
break;
case DATA_PARAMETER_GLOBAL_WORK_SIZE:
assignTokenInArray(crossthread.globalWorkSize, token, out.unhandledTokens);
break;
case DATA_PARAMETER_NUM_WORK_GROUPS:
assignTokenInArray(crossthread.numWorkGroups, token, out.unhandledTokens);
break;
case DATA_PARAMETER_MAX_WORKGROUP_SIZE:
crossthread.maxWorkGroupSize = token;
break;
case DATA_PARAMETER_WORK_DIMENSIONS:
crossthread.workDimensions = token;
break;
case DATA_PARAMETER_SIMD_SIZE:
crossthread.simdSize = token;
break;
case DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE:
crossthread.privateMemoryStatelessSize = token;
break;
case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE:
crossthread.localMemoryStatelessWindowSize = token;
break;
case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS:
crossthread.localMemoryStatelessWindowStartAddress = token;
break;
case DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES: {
auto &kernelArg = getKernelArg(out, argNum, ArgObjectType::slm);
kernelArg.byValMap.push_back(token);
kernelArg.metadata.slm.token = token;
} break;
case DATA_PARAMETER_BUFFER_OFFSET:
getKernelArg(out, argNum, ArgObjectType::buffer).metadata.buffer.bufferOffset = token;
break;
case DATA_PARAMETER_BUFFER_STATEFUL:
getKernelArg(out, argNum, ArgObjectType::buffer).metadata.buffer.pureStateful = token;
break;
case DATA_PARAMETER_IMAGE_WIDTH:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.width = token;
break;
case DATA_PARAMETER_IMAGE_HEIGHT:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.height = token;
break;
case DATA_PARAMETER_IMAGE_DEPTH:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.depth = token;
break;
case DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.channelDataType = token;
break;
case DATA_PARAMETER_IMAGE_CHANNEL_ORDER:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.channelOrder = token;
break;
case DATA_PARAMETER_IMAGE_ARRAY_SIZE:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.arraySize = token;
break;
case DATA_PARAMETER_IMAGE_NUM_SAMPLES:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.numSamples = token;
break;
case DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.numMipLevels = token;
break;
case DATA_PARAMETER_FLAT_IMAGE_BASEOFFSET:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.flatBaseOffset = token;
break;
case DATA_PARAMETER_FLAT_IMAGE_WIDTH:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.flatWidth = token;
break;
case DATA_PARAMETER_FLAT_IMAGE_HEIGHT:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.flatHeight = token;
break;
case DATA_PARAMETER_FLAT_IMAGE_PITCH:
getKernelArg(out, argNum, ArgObjectType::image).metadata.image.flatPitch = token;
break;
case DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED:
getKernelArg(out, argNum, ArgObjectType::sampler).metadata.sampler.coordinateSnapWaRequired = token;
break;
case DATA_PARAMETER_SAMPLER_ADDRESS_MODE:
getKernelArg(out, argNum, ArgObjectType::sampler).metadata.sampler.addressMode = token;
break;
case DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS:
getKernelArg(out, argNum, ArgObjectType::sampler).metadata.sampler.normalizedCoords = token;
break;
case DATA_PARAMETER_VME_MB_BLOCK_TYPE:
getKernelArg(out, argNum, ArgObjectType::none, ArgObjectTypeSpecialized::vme).metadataSpecialized.vme.mbBlockType = token;
break;
case DATA_PARAMETER_VME_SUBPIXEL_MODE:
getKernelArg(out, argNum, ArgObjectType::none, ArgObjectTypeSpecialized::vme).metadataSpecialized.vme.subpixelMode = token;
break;
case DATA_PARAMETER_VME_SAD_ADJUST_MODE:
getKernelArg(out, argNum, ArgObjectType::none, ArgObjectTypeSpecialized::vme).metadataSpecialized.vme.sadAdjustMode = token;
break;
case DATA_PARAMETER_VME_SEARCH_PATH_TYPE:
getKernelArg(out, argNum, ArgObjectType::none, ArgObjectTypeSpecialized::vme).metadataSpecialized.vme.searchPathType = token;
break;
case DATA_PARAMETER_PARENT_EVENT:
crossthread.parentEvent = token;
break;
case DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE:
crossthread.preferredWorkgroupMultiple = token;
break;
case DATA_PARAMETER_IMPL_ARG_BUFFER:
out.tokens.crossThreadPayloadArgs.implicitArgsBufferOffset = token;
break;
case DATA_PARAMETER_NUM_HARDWARE_THREADS:
case DATA_PARAMETER_PRINTF_SURFACE_SIZE:
case DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER:
case DATA_PARAMETER_STAGE_IN_GRID_ORIGIN:
case DATA_PARAMETER_STAGE_IN_GRID_SIZE:
case DATA_PARAMETER_LOCAL_ID:
case DATA_PARAMETER_EXECUTION_MASK:
case DATA_PARAMETER_VME_IMAGE_TYPE:
case DATA_PARAMETER_VME_MB_SKIP_BLOCK_TYPE:
case DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE:
case DATA_PARAMETER_OBJECT_ID:
// ignored intentionally
break;
}
}
inline bool decodeToken(const SPatchItemHeader *token, KernelFromPatchtokens &out) {
switch (token->Token) {
default: {
PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stderr, "Unknown kernel-scope Patch Token: %d\n", token->Token);
DEBUG_BREAK_IF(true);
out.unhandledTokens.push_back(token);
break;
}
case PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA:
PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stderr, "Ignored kernel-scope Patch Token: %d\n", token->Token);
break;
case PATCH_TOKEN_SAMPLER_STATE_ARRAY:
assignToken(out.tokens.samplerStateArray, token);
break;
case PATCH_TOKEN_BINDING_TABLE_STATE:
assignToken(out.tokens.bindingTableState, token);
break;
case PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE:
assignToken(out.tokens.allocateLocalSurface, token);
break;
case PATCH_TOKEN_MEDIA_VFE_STATE:
assignToken(out.tokens.mediaVfeState[0], token);
break;
case PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1:
assignToken(out.tokens.mediaVfeState[1], token);
break;
case PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD:
assignToken(out.tokens.mediaInterfaceDescriptorLoad, token);
break;
case PATCH_TOKEN_THREAD_PAYLOAD:
assignToken(out.tokens.threadPayload, token);
break;
case PATCH_TOKEN_EXECUTION_ENVIRONMENT:
assignToken(out.tokens.executionEnvironment, token);
break;
case PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO:
assignToken(out.tokens.kernelAttributesInfo, token);
break;
case PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY:
assignToken(out.tokens.allocateStatelessPrivateSurface, token);
break;
case PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION:
assignToken(out.tokens.allocateStatelessConstantMemorySurfaceWithInitialization, token);
break;
case PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION:
assignToken(out.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization, token);
break;
case PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE:
assignToken(out.tokens.allocateStatelessPrintfSurface, token);
break;
case PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE:
break;
case PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE:
assignToken(out.tokens.allocateStatelessDefaultDeviceQueueSurface, token);
break;
case PATCH_TOKEN_STRING:
assignToken(out.tokens.strings, token);
break;
case PATCH_TOKEN_INLINE_VME_SAMPLER_INFO:
assignToken(out.tokens.inlineVmeSamplerInfo, token);
break;
case PATCH_TOKEN_GTPIN_FREE_GRF_INFO:
assignToken(out.tokens.gtpinFreeGrfInfo, token);
break;
case PATCH_TOKEN_GTPIN_INFO:
assignToken(out.tokens.gtpinInfo, token);
break;
case PATCH_TOKEN_STATE_SIP:
assignToken(out.tokens.stateSip, token);
break;
case PATCH_TOKEN_ALLOCATE_SIP_SURFACE:
assignToken(out.tokens.allocateSystemThreadSurface, token);
break;
case PATCH_TOKEN_PROGRAM_SYMBOL_TABLE:
assignToken(out.tokens.programSymbolTable, token);
break;
case PATCH_TOKEN_PROGRAM_RELOCATION_TABLE:
assignToken(out.tokens.programRelocationTable, token);
break;
case PATCH_TOKEN_KERNEL_ARGUMENT_INFO:
assignArgInfo(out, token);
break;
case PATCH_TOKEN_GLOBAL_HOST_ACCESS_TABLE:
assignToken(out.tokens.hostAccessTable, token);
break;
case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT:
case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT:
case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT:
case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT:
case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT:
case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT:
assignArg(out, token);
break;
case PATCH_TOKEN_DATA_PARAMETER_STREAM:
assignToken(out.tokens.dataParameterStream, token);
break;
case PATCH_TOKEN_DATA_PARAMETER_BUFFER: {
auto tokDataP = reinterpret_cast<const SPatchDataParameterBuffer *>(token);
decodeKernelDataParameterToken(tokDataP, out);
} break;
case PATCH_TOKEN_ALLOCATE_SYNC_BUFFER: {
assignToken(out.tokens.allocateSyncBuffer, token);
} break;
case PATCH_TOKEN_ALLOCATE_RT_GLOBAL_BUFFER:
assignToken(out.tokens.allocateRTGlobalBuffer, token);
break;
}
return out.decodeStatus != DecodeError::invalidBinary;
}
inline bool decodeToken(const SPatchItemHeader *token, ProgramFromPatchtokens &out) {
auto &progTok = out.programScopeTokens;
switch (token->Token) {
default: {
PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stderr, "Unknown program-scope Patch Token: %d\n", token->Token);
DEBUG_BREAK_IF(true);
out.unhandledTokens.push_back(token);
break;
}
case PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:
addTok(progTok.allocateConstantMemorySurface, token);
break;
case PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO:
addTok(progTok.allocateGlobalMemorySurface, token);
break;
case PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO:
addTok(progTok.globalPointer, token);
break;
case PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO:
addTok(progTok.constantPointer, token);
break;
case PATCH_TOKEN_PROGRAM_SYMBOL_TABLE:
assignToken(progTok.symbolTable, token);
break;
}
return true;
}
template <typename DecodeContext>
inline size_t getPatchTokenTotalSize(PatchTokensStreamReader stream, const SPatchItemHeader *token);
template <>
inline size_t getPatchTokenTotalSize<KernelFromPatchtokens>(PatchTokensStreamReader stream, const SPatchItemHeader *token) {
return token->Size;
}
template <>
inline size_t getPatchTokenTotalSize<ProgramFromPatchtokens>(PatchTokensStreamReader stream, const SPatchItemHeader *token) {
size_t tokSize = token->Size;
switch (token->Token) {
default:
return tokSize;
case PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:
return stream.enoughDataLeft<SPatchAllocateConstantMemorySurfaceProgramBinaryInfo>(token)
? tokSize + reinterpret_cast<const SPatchAllocateConstantMemorySurfaceProgramBinaryInfo *>(token)->InlineDataSize
: std::numeric_limits<size_t>::max();
case PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO:
return stream.enoughDataLeft<SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo>(token)
? tokSize + reinterpret_cast<const SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo *>(token)->InlineDataSize
: std::numeric_limits<size_t>::max();
}
}
template <typename OutT>
inline bool decodePatchList(PatchTokensStreamReader patchListStream, OutT &out) {
auto decodePos = patchListStream.data.begin();
auto decodeEnd = patchListStream.data.end();
bool decodeSuccess = true;
while ((ptrDiff(decodeEnd, decodePos) > sizeof(SPatchItemHeader)) && decodeSuccess) {
auto token = reinterpret_cast<const SPatchItemHeader *>(decodePos);
size_t tokenTotalSize = getPatchTokenTotalSize<OutT>(patchListStream, token);
decodeSuccess = patchListStream.enoughDataLeft(decodePos, tokenTotalSize);
decodeSuccess = decodeSuccess && (tokenTotalSize > 0U);
decodeSuccess = decodeSuccess && decodeToken(token, out);
decodePos = ptrOffset(decodePos, tokenTotalSize);
}
return decodeSuccess;
}
bool decodeKernelFromPatchtokensBlob(ArrayRef<const uint8_t> kernelBlob, KernelFromPatchtokens &out) {
PatchTokensStreamReader stream{kernelBlob};
auto decodePos = stream.data.begin();
out.decodeStatus = DecodeError::undefined;
if (stream.notEnoughDataLeft<SKernelBinaryHeaderCommon>(decodePos)) {
out.decodeStatus = DecodeError::invalidBinary;
return false;
}
out.header = reinterpret_cast<const SKernelBinaryHeaderCommon *>(decodePos);
auto kernelInfoBlobSize = sizeof(SKernelBinaryHeaderCommon) + out.header->KernelNameSize + out.header->KernelHeapSize + out.header->GeneralStateHeapSize + out.header->DynamicStateHeapSize + out.header->SurfaceStateHeapSize + out.header->PatchListSize;
if (stream.notEnoughDataLeft(decodePos, kernelInfoBlobSize)) {
out.decodeStatus = DecodeError::invalidBinary;
return false;
}
out.blobs.kernelInfo = ArrayRef<const uint8_t>(stream.data.begin(), kernelInfoBlobSize);
decodePos = ptrOffset(decodePos, sizeof(SKernelBinaryHeaderCommon));
auto kernelName = reinterpret_cast<const char *>(decodePos);
out.name = ArrayRef<const char>(kernelName, out.header->KernelNameSize);
decodePos = ptrOffset(decodePos, out.name.size());
out.isa = ArrayRef<const uint8_t>(decodePos, out.header->KernelHeapSize);
decodePos = ptrOffset(decodePos, out.isa.size());
out.heaps.generalState = ArrayRef<const uint8_t>(decodePos, out.header->GeneralStateHeapSize);
decodePos = ptrOffset(decodePos, out.heaps.generalState.size());
out.heaps.dynamicState = ArrayRef<const uint8_t>(decodePos, out.header->DynamicStateHeapSize);
decodePos = ptrOffset(decodePos, out.heaps.dynamicState.size());
out.heaps.surfaceState = ArrayRef<const uint8_t>(decodePos, out.header->SurfaceStateHeapSize);
decodePos = ptrOffset(decodePos, out.heaps.surfaceState.size());
out.blobs.patchList = ArrayRef<const uint8_t>(decodePos, out.header->PatchListSize);
if (false == decodePatchList(out.blobs.patchList, out)) {
out.decodeStatus = DecodeError::invalidBinary;
return false;
}
out.decodeStatus = DecodeError::success;
return true;
}
inline bool decodeProgramHeader(ProgramFromPatchtokens &decodedProgram) {
auto decodePos = decodedProgram.blobs.programInfo.begin();
PatchTokensStreamReader stream{decodedProgram.blobs.programInfo};
if (stream.notEnoughDataLeft<SProgramBinaryHeader>(decodePos)) {
return false;
}
decodedProgram.header = reinterpret_cast<const SProgramBinaryHeader *>(decodePos);
if (decodedProgram.header->Magic != MAGIC_CL) {
return false;
}
decodePos = ptrOffset(decodePos, sizeof(SProgramBinaryHeader));
if (stream.notEnoughDataLeft(decodePos, decodedProgram.header->PatchListSize)) {
return false;
}
decodedProgram.blobs.patchList = ArrayRef<const uint8_t>(decodePos, decodedProgram.header->PatchListSize);
decodePos = ptrOffset(decodePos, decodedProgram.blobs.patchList.size());
decodedProgram.blobs.kernelsInfo = ArrayRef<const uint8_t>(decodePos, stream.getDataSizeLeft(decodePos));
return true;
}
inline bool decodeKernels(ProgramFromPatchtokens &decodedProgram) {
auto numKernels = decodedProgram.header->NumberOfKernels;
decodedProgram.kernels.reserve(decodedProgram.header->NumberOfKernels);
const uint8_t *decodePos = decodedProgram.blobs.kernelsInfo.begin();
bool decodeSuccess = true;
PatchTokensStreamReader stream{decodedProgram.blobs.kernelsInfo};
for (uint32_t i = 0; (i < numKernels) && decodeSuccess; i++) {
decodedProgram.kernels.resize(decodedProgram.kernels.size() + 1);
auto &currKernelInfo = *decodedProgram.kernels.rbegin();
auto kernelDataLeft = ArrayRef<const uint8_t>(decodePos, stream.getDataSizeLeft(decodePos));
decodeSuccess = decodeKernelFromPatchtokensBlob(kernelDataLeft, currKernelInfo);
decodePos = ptrOffset(decodePos, currKernelInfo.blobs.kernelInfo.size());
}
return decodeSuccess;
}
bool decodeProgramFromPatchtokensBlob(ArrayRef<const uint8_t> programBlob, ProgramFromPatchtokens &out) {
out.blobs.programInfo = programBlob;
bool decodeSuccess = decodeProgramHeader(out);
decodeSuccess = decodeSuccess && decodeKernels(out);
decodeSuccess = decodeSuccess && decodePatchList(out.blobs.patchList, out);
out.decodeStatus = decodeSuccess ? DecodeError::success : DecodeError::invalidBinary;
return decodeSuccess;
}
uint32_t calcKernelChecksum(const ArrayRef<const uint8_t> kernelBlob) {
UNRECOVERABLE_IF(kernelBlob.size() <= sizeof(SKernelBinaryHeaderCommon));
auto dataToHash = ArrayRef<const uint8_t>(ptrOffset(kernelBlob.begin(), sizeof(SKernelBinaryHeaderCommon)), kernelBlob.end());
uint64_t hashValue = Hash::hash(reinterpret_cast<const char *>(dataToHash.begin()), dataToHash.size());
uint32_t checksum = hashValue & 0xFFFFFFFF;
return checksum;
}
bool hasInvalidChecksum(const KernelFromPatchtokens &decodedKernel) {
uint32_t decodedChecksum = decodedKernel.header->CheckSum;
uint32_t calculatedChecksum = NEO::PatchTokenBinary::calcKernelChecksum(decodedKernel.blobs.kernelInfo);
return decodedChecksum != calculatedChecksum;
}
const KernelArgAttributesFromPatchtokens getInlineData(const SPatchKernelArgumentInfo *ptr) {
KernelArgAttributesFromPatchtokens ret = {};
UNRECOVERABLE_IF(ptr == nullptr);
auto decodePos = reinterpret_cast<const char *>(ptr + 1);
auto bounds = reinterpret_cast<const char *>(ptr) + ptr->Size;
ret.addressQualifier = ArrayRef<const char>(decodePos, std::min(decodePos + ptr->AddressQualifierSize, bounds));
decodePos += ret.addressQualifier.size();
ret.accessQualifier = ArrayRef<const char>(decodePos, std::min(decodePos + ptr->AccessQualifierSize, bounds));
decodePos += ret.accessQualifier.size();
ret.argName = ArrayRef<const char>(decodePos, std::min(decodePos + ptr->ArgumentNameSize, bounds));
decodePos += ret.argName.size();
ret.typeName = ArrayRef<const char>(decodePos, std::min(decodePos + ptr->TypeNameSize, bounds));
decodePos += ret.typeName.size();
ret.typeQualifiers = ArrayRef<const char>(decodePos, std::min(decodePos + ptr->TypeQualifierSize, bounds));
return ret;
}
const iOpenCL::SProgramBinaryHeader *decodeProgramHeader(const ArrayRef<const uint8_t> programBlob) {
ProgramFromPatchtokens program;
program.blobs.programInfo = programBlob;
if (false == decodeProgramHeader(program)) {
return nullptr;
}
return program.header;
}
} // namespace PatchTokenBinary
} // namespace NEO