From 355e8d3e5ab832fae4b77ba3ee1b19cb4b79b071 Mon Sep 17 00:00:00 2001 From: Jaroslaw Chodor Date: Sun, 27 Oct 2019 19:48:26 +0100 Subject: [PATCH] [1/N] Program refactor - decouple from patchokens Change-Id: I63bbf6c31a5db9e788124f22b6105e65c16c86d4 --- core/gen11/hw_cmds_generated.inl | 4 + core/gen12lp/hw_cmds_generated.inl | 4 + core/gen8/hw_cmds_generated.inl | 4 + core/gen9/hw_cmds_generated.inl | 4 + core/helpers/hw_helper.h | 6 +- core/helpers/ptr_math.h | 33 +- .../unit_tests/utilities/containers_tests.cpp | 5 + core/utilities/arrayref.h | 14 + core/utilities/stackvec.h | 4 + runtime/built_ins/sip.cpp | 1 + runtime/compiler_interface/CMakeLists.txt | 5 + .../patchtokens_decoder.cpp | 620 ++++++ .../compiler_interface/patchtokens_decoder.h | 213 ++ .../compiler_interface/patchtokens_dumper.cpp | 889 +++++++++ .../compiler_interface/patchtokens_dumper.h | 26 + .../patchtokens_validator.inl | 130 ++ .../device_queue/device_queue_hw_bdw_plus.inl | 1 + runtime/helpers/hardware_commands_helper.h | 37 +- runtime/helpers/hardware_commands_helper.inl | 38 + runtime/kernel/kernel.cpp | 20 +- runtime/kernel/kernel.h | 11 +- runtime/kernel/kernel.inl | 1 + runtime/program/CMakeLists.txt | 2 + runtime/program/build.cpp | 10 +- runtime/program/create.inl | 1 + runtime/program/heap_info.h | 24 +- runtime/program/kernel_info.cpp | 87 +- runtime/program/kernel_info.h | 78 +- .../program/kernel_info_from_patchtokens.cpp | 185 ++ .../program/kernel_info_from_patchtokens.h | 20 + runtime/program/link.cpp | 1 + runtime/program/patch_info.h | 5 +- runtime/program/process_elf_binary.cpp | 5 +- runtime/program/process_gen_binary.cpp | 1073 ++-------- runtime/program/program.cpp | 12 +- runtime/program/program.h | 50 +- unit_tests/api/cl_build_program_tests.inl | 1 + unit_tests/api/cl_create_kernel_tests.inl | 1 + unit_tests/compiler_interface/CMakeLists.txt | 4 + .../patchtokens_decoder_tests.cpp | 1180 +++++++++++ .../patchtokens_dumper_tests.cpp | 1754 +++++++++++++++++ .../compiler_interface/patchtokens_tests.h | 252 +++ .../patchtokens_validator_tests.cpp | 337 ++++ .../device_queue/device_queue_hw_tests.cpp | 2 +- .../enqueue_execution_model_kernel_tests.cpp | 4 +- unit_tests/fixtures/kernel_data_fixture.cpp | 20 +- unit_tests/gtpin/gtpin_tests.cpp | 5 + .../kernel_reflection_surface_tests.cpp | 14 +- unit_tests/kernel/kernel_tests.cpp | 4 +- unit_tests/kernel/parent_kernel_tests.cpp | 6 +- unit_tests/mocks/mock_kernel.h | 3 +- unit_tests/mocks/mock_program.cpp | 1 + unit_tests/mocks/mock_program.h | 25 +- unit_tests/program/CMakeLists.txt | 1 + .../evaluate_unhandled_token_tests.cpp | 23 +- unit_tests/program/kernel_data.cpp | 128 +- .../kernel_info_from_patchtokens_tests.cpp | 27 + .../program/process_elf_binary_tests.cpp | 1 + unit_tests/program/program_data_tests.cpp | 42 +- unit_tests/program/program_tests.cpp | 60 +- .../program_with_block_kernels_tests.cpp | 9 +- 61 files changed, 6159 insertions(+), 1368 deletions(-) create mode 100644 runtime/compiler_interface/patchtokens_decoder.cpp create mode 100644 runtime/compiler_interface/patchtokens_decoder.h create mode 100644 runtime/compiler_interface/patchtokens_dumper.cpp create mode 100644 runtime/compiler_interface/patchtokens_dumper.h create mode 100644 runtime/compiler_interface/patchtokens_validator.inl create mode 100644 runtime/program/kernel_info_from_patchtokens.cpp create mode 100644 runtime/program/kernel_info_from_patchtokens.h create mode 100644 unit_tests/compiler_interface/patchtokens_decoder_tests.cpp create mode 100644 unit_tests/compiler_interface/patchtokens_dumper_tests.cpp create mode 100644 unit_tests/compiler_interface/patchtokens_tests.h create mode 100644 unit_tests/compiler_interface/patchtokens_validator_tests.cpp create mode 100644 unit_tests/program/kernel_info_from_patchtokens_tests.cpp diff --git a/core/gen11/hw_cmds_generated.inl b/core/gen11/hw_cmds_generated.inl index b09b089359..3304a60eba 100644 --- a/core/gen11/hw_cmds_generated.inl +++ b/core/gen11/hw_cmds_generated.inl @@ -30,6 +30,10 @@ typedef struct tagBINDING_TABLE_STATE { DEBUG_BREAK_IF(index >= 1); return TheStructure.RawData[index]; } + inline const uint32_t &getRawData(const uint32_t index) const { + DEBUG_BREAK_IF(index >= 1); + return TheStructure.RawData[index]; + } typedef enum tagSURFACESTATEPOINTER { SURFACESTATEPOINTER_BIT_SHIFT = 0x6, SURFACESTATEPOINTER_ALIGN_SIZE = 0x40, diff --git a/core/gen12lp/hw_cmds_generated.inl b/core/gen12lp/hw_cmds_generated.inl index c28c6fe9d0..b5d9e4c2f6 100644 --- a/core/gen12lp/hw_cmds_generated.inl +++ b/core/gen12lp/hw_cmds_generated.inl @@ -30,6 +30,10 @@ typedef struct tagBINDING_TABLE_STATE { DEBUG_BREAK_IF(index >= 1); return TheStructure.RawData[index]; } + inline const uint32_t &getRawData(const uint32_t index) const { + DEBUG_BREAK_IF(index >= 1); + return TheStructure.RawData[index]; + } typedef enum tagSURFACESTATEPOINTER { SURFACESTATEPOINTER_BIT_SHIFT = 0x6, SURFACESTATEPOINTER_ALIGN_SIZE = 0x40, diff --git a/core/gen8/hw_cmds_generated.inl b/core/gen8/hw_cmds_generated.inl index c64d7a1241..2d1beb2e6a 100644 --- a/core/gen8/hw_cmds_generated.inl +++ b/core/gen8/hw_cmds_generated.inl @@ -30,6 +30,10 @@ typedef struct tagBINDING_TABLE_STATE { DEBUG_BREAK_IF(index >= 1); return TheStructure.RawData[index]; } + inline const uint32_t &getRawData(const uint32_t index) const { + DEBUG_BREAK_IF(index >= 1); + return TheStructure.RawData[index]; + } typedef enum tagSURFACESTATEPOINTER { SURFACESTATEPOINTER_BIT_SHIFT = 0x6, SURFACESTATEPOINTER_ALIGN_SIZE = 0x40, diff --git a/core/gen9/hw_cmds_generated.inl b/core/gen9/hw_cmds_generated.inl index 54cc60c013..bdd0c0870d 100644 --- a/core/gen9/hw_cmds_generated.inl +++ b/core/gen9/hw_cmds_generated.inl @@ -30,6 +30,10 @@ typedef struct tagBINDING_TABLE_STATE { DEBUG_BREAK_IF(index >= 1); return TheStructure.RawData[index]; } + inline const uint32_t &getRawData(const uint32_t index) const { + DEBUG_BREAK_IF(index >= 1); + return TheStructure.RawData[index]; + } typedef enum tagSURFACESTATEPOINTER { SURFACESTATEPOINTER_BIT_SHIFT = 0x6, SURFACESTATEPOINTER_ALIGN_SIZE = 0x40, diff --git a/core/helpers/hw_helper.h b/core/helpers/hw_helper.h index 93455c5903..e307ddf037 100644 --- a/core/helpers/hw_helper.h +++ b/core/helpers/hw_helper.h @@ -25,7 +25,7 @@ class GmmHelper; class HwHelper { public: static HwHelper &get(GFXCORE_FAMILY gfxCore); - virtual uint32_t getBindingTableStateSurfaceStatePointer(void *pBindingTable, uint32_t index) = 0; + virtual uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) = 0; virtual size_t getBindingTableStateSize() const = 0; virtual uint32_t getBindingTableStateAlignement() const = 0; virtual size_t getInterfaceDescriptorDataSize() const = 0; @@ -88,10 +88,10 @@ class HwHelperHw : public HwHelper { static const aub_stream::EngineType lowPriorityEngineType; - uint32_t getBindingTableStateSurfaceStatePointer(void *pBindingTable, uint32_t index) override { + uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) override { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; - BINDING_TABLE_STATE *bindingTableState = static_cast(pBindingTable); + const BINDING_TABLE_STATE *bindingTableState = static_cast(pBindingTable); return bindingTableState[index].getRawData(0); } diff --git a/core/helpers/ptr_math.h b/core/helpers/ptr_math.h index b9fedde6d4..33ecb3e337 100644 --- a/core/helpers/ptr_math.h +++ b/core/helpers/ptr_math.h @@ -47,14 +47,33 @@ inline void *addrToPtr(IntegerAddressType addr) { return ptrReturn; } -inline void patchWithRequiredSize(void *memoryToBePatched, uint32_t patchSize, uintptr_t patchValue) { - if (patchSize == sizeof(uint64_t)) { - uint64_t *curbeAddress = (uint64_t *)memoryToBePatched; - *curbeAddress = patchValue; - } else { - uint32_t *curbeAddress = (uint32_t *)memoryToBePatched; - *curbeAddress = (uint32_t)patchValue; +struct PatchStoreOperation { + template + void operator()(T *memory, T value) { + *memory = value; } +}; + +struct PatchIncrementOperation { + template + void operator()(T *memory, T value) { + *memory += value; + } +}; + +template +inline void patchWithRequiredSize(void *memoryToBePatched, uint32_t patchSize, uint64_t patchValue) { + if (patchSize == sizeof(uint64_t)) { + uint64_t *curbeAddress = reinterpret_cast(memoryToBePatched); + PatchOperationT{}(curbeAddress, patchValue); + } else { + uint32_t *curbeAddress = reinterpret_cast(memoryToBePatched); + PatchOperationT{}(curbeAddress, static_cast(patchValue)); + } +} + +inline void patchIncrement(void *memoryToBePatched, uint32_t patchSize, uint64_t patchIncrementValue) { + patchWithRequiredSize(memoryToBePatched, patchSize, patchIncrementValue); } inline uint64_t castToUint64(void *address) { diff --git a/core/unit_tests/utilities/containers_tests.cpp b/core/unit_tests/utilities/containers_tests.cpp index e683efaa53..59001c20a7 100644 --- a/core/unit_tests/utilities/containers_tests.cpp +++ b/core/unit_tests/utilities/containers_tests.cpp @@ -1417,14 +1417,17 @@ TEST(StackVec, Clear) { DummyFNode nd2(&destructorCounter); DummyFNode nd3(&destructorCounter); StackVec v; + EXPECT_TRUE(v.empty()); v.push_back(nd1); v.push_back(nd2); v.push_back(nd3); ASSERT_EQ(0U, destructorCounter); ASSERT_EQ(3U, v.size()); + EXPECT_FALSE(v.empty()); v.clear(); ASSERT_EQ(3U, destructorCounter); ASSERT_EQ(0U, v.size()); + EXPECT_TRUE(v.empty()); StackVec v2; v2.push_back(nd1); @@ -1561,11 +1564,13 @@ TEST(ArrayRef, WrapContainers) { ASSERT_EQ(35, sum(carray)); ArrayRef ar2; + EXPECT_TRUE(ar2.empty()); ASSERT_EQ(0U, ar2.size()); ASSERT_EQ(nullptr, ar2.begin()); ASSERT_EQ(nullptr, ar2.end()); ar2 = carray; + EXPECT_FALSE(ar2.empty()); ASSERT_EQ(sizeof(carray) / sizeof(carray[0]), ar2.size()); ASSERT_EQ(35, sum(ar2)); diff --git a/core/utilities/arrayref.h b/core/utilities/arrayref.h index a5f4615275..707ad6bf4e 100644 --- a/core/utilities/arrayref.h +++ b/core/utilities/arrayref.h @@ -45,10 +45,24 @@ class ArrayRef { ArrayRef() = default; + ArrayRef(const ArrayRef &src) + : begIt(src.begIt), endIt(src.endIt) { + } + + ArrayRef &operator=(const ArrayRef &src) { + this->begIt = src.begIt; + this->endIt = src.endIt; + return *this; + } + size_t size() const { return endIt - begIt; } + bool empty() const { + return (0U == size()); + } + DataType &operator[](std::size_t idx) { return begIt[idx]; } diff --git a/core/utilities/stackvec.h b/core/utilities/stackvec.h index 35eb253768..4da81324fd 100644 --- a/core/utilities/stackvec.h +++ b/core/utilities/stackvec.h @@ -133,6 +133,10 @@ class StackVec { return onStackSize; } + bool empty() const { + return 0U == size(); + } + size_t capacity() const { if (dynamicMem) { return dynamicMem->capacity(); diff --git a/runtime/built_ins/sip.cpp b/runtime/built_ins/sip.cpp index 9ce1f64d34..27c133d00f 100644 --- a/runtime/built_ins/sip.cpp +++ b/runtime/built_ins/sip.cpp @@ -13,6 +13,7 @@ #include "core/helpers/string.h" #include "core/memory_manager/graphics_allocation.h" #include "runtime/device/device.h" +#include "runtime/program/kernel_info.h" #include "runtime/program/program.h" namespace NEO { diff --git a/runtime/compiler_interface/CMakeLists.txt b/runtime/compiler_interface/CMakeLists.txt index 56b501bd35..0560c9cfa3 100644 --- a/runtime/compiler_interface/CMakeLists.txt +++ b/runtime/compiler_interface/CMakeLists.txt @@ -9,6 +9,11 @@ set(RUNTIME_SRCS_COMPILER_INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/compiler_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compiler_options.h ${CMAKE_CURRENT_SOURCE_DIR}/default_cl_cache_config.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/patchtokens_decoder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/patchtokens_decoder.h + ${CMAKE_CURRENT_SOURCE_DIR}/patchtokens_dumper.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/patchtokens_dumper.h + ${CMAKE_CURRENT_SOURCE_DIR}/patchtokens_validator.inl ) get_property(NEO_COMPILER_INTERFACE GLOBAL PROPERTY NEO_COMPILER_INTERFACE) diff --git a/runtime/compiler_interface/patchtokens_decoder.cpp b/runtime/compiler_interface/patchtokens_decoder.cpp new file mode 100644 index 0000000000..85486883c7 --- /dev/null +++ b/runtime/compiler_interface/patchtokens_decoder.cpp @@ -0,0 +1,620 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "patchtokens_decoder.h" + +#include "core/helpers/debug_helpers.h" +#include "core/helpers/hash.h" +#include "core/helpers/ptr_math.h" +#include "runtime/os_interface/debug_settings_manager.h" + +#include + +namespace NEO { + +namespace PatchTokenBinary { + +struct PatchTokensStreamReader { + const ArrayRef data; + PatchTokensStreamReader(ArrayRef data) : data(data) {} + + template + bool notEnoughDataLeft(DecodePosT *decodePos, size_t requestedSize) { + return getDataSizeLeft(decodePos) < requestedSize; + } + + template + constexpr bool notEnoughDataLeft(DecodePosT *decodePos) { + return notEnoughDataLeft(decodePos, sizeof(T)); + } + + template + bool enoughDataLeft(ArgsT &&... args) { + return false == notEnoughDataLeft(std::forward(args)...); + } + + template + bool enoughDataLeft(ArgsT &&... args) { + return false == notEnoughDataLeft(std::forward(args)...); + } + + template + size_t getDataSizeLeft(DecodePosT *decodePos) { + auto dataConsumed = ptrDiff(decodePos, data.begin()); + UNRECOVERABLE_IF(dataConsumed > data.size()); + return data.size() - dataConsumed; + } +}; + +template +inline void assignToken(const T *&dst, const SPatchItemHeader *src) { + dst = reinterpret_cast(src); +} + +inline KernelArgFromPatchtokens &getKernelArg(KernelFromPatchtokens &kernel, size_t argNum, ArgObjectType type = ArgObjectType::None, ArgObjectTypeSpecialized typeSpecialized = ArgObjectTypeSpecialized::None) { + if (kernel.tokens.kernelArgs.size() < argNum + 1) { + kernel.tokens.kernelArgs.resize(argNum + 1); + } + auto &arg = kernel.tokens.kernelArgs[argNum]; + if (arg.objectType == ArgObjectType::None) { + arg.objectType = type; + } else if ((arg.objectType != type) && (type != ArgObjectType::None)) { + kernel.decodeStatus = DecoderError::InvalidBinary; + DBG_LOG(LogPatchTokens, "\n Mismatched metadata for kernel arg :", argNum); + DEBUG_BREAK_IF(true); + } + + if (arg.objectTypeSpecialized == ArgObjectTypeSpecialized::None) { + arg.objectTypeSpecialized = typeSpecialized; + } else if (typeSpecialized != ArgObjectTypeSpecialized::None) { + UNRECOVERABLE_IF(arg.objectTypeSpecialized != typeSpecialized); + } + + return arg; +} + +inline void assignArgInfo(KernelFromPatchtokens &kernel, const SPatchItemHeader *src) { + auto argInfoToken = reinterpret_cast(src); + getKernelArg(kernel, argInfoToken->ArgumentNumber, ArgObjectType::None).argInfo = argInfoToken; +} + +template +inline uint32_t getArgNum(const SPatchItemHeader *argToken) { + return reinterpret_cast(argToken)->ArgumentNumber; +} + +inline void assignArg(KernelFromPatchtokens &kernel, const SPatchItemHeader *src) { + uint32_t argNum = 0; + ArgObjectType type = ArgObjectType::Buffer; + switch (src->Token) { + default: + UNRECOVERABLE_IF(src->Token != PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT); + argNum = getArgNum(src); + type = ArgObjectType::Sampler; + break; + case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT: + argNum = getArgNum(src); + type = ArgObjectType::Image; + break; + case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: + argNum = getArgNum(src); + break; + case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: + argNum = getArgNum(src); + break; + case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT: + argNum = getArgNum(src); + break; + case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT: + argNum = getArgNum(src); + break; + } + + getKernelArg(kernel, argNum, type).objectArg = src; +} + +inline void assignToken(StackVecStrings &stringVec, const SPatchItemHeader *src) { + auto stringToken = reinterpret_cast(src); + if (stringVec.size() < stringToken->Index + 1) { + stringVec.resize(stringToken->Index + 1); + } + stringVec[stringToken->Index] = stringToken; +} + +template +inline void assignTokenInArray(const SPatchDataParameterBuffer *(&tokensArray)[S], const SPatchDataParameterBuffer *src, StackVecUnhandledTokens &unhandledTokens) { + auto sourceIndex = src->SourceOffset >> 2; + if (sourceIndex >= S) { + DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled sourceIndex ", sourceIndex); + DEBUG_BREAK_IF(true); + unhandledTokens.push_back(src); + return; + } + assignToken(tokensArray[sourceIndex], src); +} + +template +inline void addTok(StackVec &tokensVec, const SPatchItemHeader *src) { + tokensVec.push_back(reinterpret_cast(src)); +} + +inline void decodeKernelDataParameterToken(const SPatchDataParameterBuffer *token, KernelFromPatchtokens &out) { + auto &crossthread = out.tokens.crossThreadPayloadArgs; + auto sourceIndex = token->SourceOffset >> 2; + auto argNum = token->ArgumentNumber; + + switch (token->Type) { + default: + DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled SPatchDataParameterBuffer ", token->Type); + DEBUG_BREAK_IF(true); + out.unhandledTokens.push_back(token); + break; + + case DATA_PARAMETER_KERNEL_ARGUMENT: + getKernelArg(out, argNum, ArgObjectType::None).byValMap.push_back(token); + break; + + case DATA_PARAMETER_LOCAL_WORK_SIZE: { + if (sourceIndex >= 3) { + DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled sourceIndex ", sourceIndex); + DEBUG_BREAK_IF(true); + out.unhandledTokens.push_back(token); + return; + } + auto localWorkSizeArray = (crossthread.localWorkSize[sourceIndex] == nullptr) + ? crossthread.localWorkSize + : crossthread.localWorkSize2; + localWorkSizeArray[sourceIndex] = token; + break; + } + + case DATA_PARAMETER_GLOBAL_WORK_OFFSET: + assignTokenInArray(crossthread.globalWorkOffset, token, out.unhandledTokens); + break; + case DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE: + assignTokenInArray(crossthread.enqueuedLocalWorkSize, token, out.unhandledTokens); + break; + case DATA_PARAMETER_GLOBAL_WORK_SIZE: + assignTokenInArray(crossthread.globalWorkSize, token, out.unhandledTokens); + break; + case DATA_PARAMETER_NUM_WORK_GROUPS: + assignTokenInArray(crossthread.numWorkGroups, token, out.unhandledTokens); + break; + case DATA_PARAMETER_MAX_WORKGROUP_SIZE: + crossthread.maxWorkGroupSize = token; + break; + case DATA_PARAMETER_WORK_DIMENSIONS: + crossthread.workDimensions = token; + break; + case DATA_PARAMETER_SIMD_SIZE: + crossthread.simdSize = token; + break; + + case DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE: + crossthread.privateMemoryStatelessSize = token; + break; + case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE: + crossthread.localMemoryStatelessWindowSize = token; + break; + case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS: + crossthread.localMemoryStatelessWindowStartAddress = token; + break; + + case DATA_PARAMETER_OBJECT_ID: + getKernelArg(out, argNum, ArgObjectType::None).objectId = token; + break; + + case DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES: { + auto &kernelArg = getKernelArg(out, argNum, ArgObjectType::Slm); + kernelArg.byValMap.push_back(token); + kernelArg.metadata.slm.token = token; + } break; + + case DATA_PARAMETER_BUFFER_OFFSET: + getKernelArg(out, argNum, ArgObjectType::Buffer).metadata.buffer.bufferOffset = token; + break; + case DATA_PARAMETER_BUFFER_STATEFUL: + getKernelArg(out, argNum, ArgObjectType::Buffer).metadata.buffer.pureStateful = token; + break; + + case DATA_PARAMETER_IMAGE_WIDTH: + getKernelArg(out, argNum, ArgObjectType::Image).metadata.image.width = token; + break; + case DATA_PARAMETER_IMAGE_HEIGHT: + getKernelArg(out, argNum, ArgObjectType::Image).metadata.image.height = token; + break; + case DATA_PARAMETER_IMAGE_DEPTH: + getKernelArg(out, argNum, ArgObjectType::Image).metadata.image.depth = token; + break; + case DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE: + getKernelArg(out, argNum, ArgObjectType::Image).metadata.image.channelDataType = token; + break; + case DATA_PARAMETER_IMAGE_CHANNEL_ORDER: + getKernelArg(out, argNum, ArgObjectType::Image).metadata.image.channelOrder = token; + break; + case DATA_PARAMETER_IMAGE_ARRAY_SIZE: + getKernelArg(out, argNum, ArgObjectType::Image).metadata.image.arraySize = token; + break; + case DATA_PARAMETER_IMAGE_NUM_SAMPLES: + getKernelArg(out, argNum, ArgObjectType::Image).metadata.image.numSamples = token; + break; + case DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS: + getKernelArg(out, argNum, ArgObjectType::Image).metadata.image.numMipLevels = token; + break; + + case DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED: + getKernelArg(out, argNum, ArgObjectType::Sampler).metadata.sampler.coordinateSnapWaRequired = token; + break; + case DATA_PARAMETER_SAMPLER_ADDRESS_MODE: + getKernelArg(out, argNum, ArgObjectType::Sampler).metadata.sampler.addressMode = token; + break; + case DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS: + getKernelArg(out, argNum, ArgObjectType::Sampler).metadata.sampler.normalizedCoords = token; + break; + + case DATA_PARAMETER_VME_MB_BLOCK_TYPE: + getKernelArg(out, argNum, ArgObjectType::None, ArgObjectTypeSpecialized::Vme).metadataSpecialized.vme.mbBlockType = token; + break; + case DATA_PARAMETER_VME_SUBPIXEL_MODE: + getKernelArg(out, argNum, ArgObjectType::None, ArgObjectTypeSpecialized::Vme).metadataSpecialized.vme.subpixelMode = token; + break; + case DATA_PARAMETER_VME_SAD_ADJUST_MODE: + getKernelArg(out, argNum, ArgObjectType::None, ArgObjectTypeSpecialized::Vme).metadataSpecialized.vme.sadAdjustMode = token; + break; + case DATA_PARAMETER_VME_SEARCH_PATH_TYPE: + getKernelArg(out, argNum, ArgObjectType::None, ArgObjectTypeSpecialized::Vme).metadataSpecialized.vme.searchPathType = token; + break; + + case DATA_PARAMETER_PARENT_EVENT: + crossthread.parentEvent = token; + break; + case DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE: + crossthread.childBlockSimdSize.push_back(token); + break; + case DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE: + crossthread.preferredWorkgroupMultiple = token; + break; + + case DATA_PARAMETER_NUM_HARDWARE_THREADS: + CPP_ATTRIBUTE_FALLTHROUGH; + case DATA_PARAMETER_PRINTF_SURFACE_SIZE: + CPP_ATTRIBUTE_FALLTHROUGH; + case DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER: + CPP_ATTRIBUTE_FALLTHROUGH; + case DATA_PARAMETER_STAGE_IN_GRID_ORIGIN: + CPP_ATTRIBUTE_FALLTHROUGH; + case DATA_PARAMETER_STAGE_IN_GRID_SIZE: + CPP_ATTRIBUTE_FALLTHROUGH; + case DATA_PARAMETER_LOCAL_ID: + CPP_ATTRIBUTE_FALLTHROUGH; + case DATA_PARAMETER_EXECUTION_MASK: + CPP_ATTRIBUTE_FALLTHROUGH; + case DATA_PARAMETER_VME_IMAGE_TYPE: + CPP_ATTRIBUTE_FALLTHROUGH; + case DATA_PARAMETER_VME_MB_SKIP_BLOCK_TYPE: + // ignored intentionally + break; + } +} + +inline bool decodeToken(const SPatchItemHeader *token, KernelFromPatchtokens &out) { + switch (token->Token) { + default: { + printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Unknown kernel-scope Patch Token: %d\n", token->Token); + DEBUG_BREAK_IF(true); + out.unhandledTokens.push_back(token); + break; + } + case PATCH_TOKEN_SAMPLER_STATE_ARRAY: + assignToken(out.tokens.samplerStateArray, token); + break; + case PATCH_TOKEN_BINDING_TABLE_STATE: + assignToken(out.tokens.bindingTableState, token); + break; + case PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE: + assignToken(out.tokens.allocateLocalSurface, token); + break; + case PATCH_TOKEN_MEDIA_VFE_STATE: + assignToken(out.tokens.mediaVfeState[0], token); + break; + case PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1: + assignToken(out.tokens.mediaVfeState[1], token); + break; + case PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD: + assignToken(out.tokens.mediaInterfaceDescriptorLoad, token); + break; + case PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA: + assignToken(out.tokens.interfaceDescriptorData, token); + break; + case PATCH_TOKEN_THREAD_PAYLOAD: + assignToken(out.tokens.threadPayload, token); + break; + case PATCH_TOKEN_EXECUTION_ENVIRONMENT: + assignToken(out.tokens.executionEnvironment, token); + break; + + case PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO: + assignToken(out.tokens.kernelAttributesInfo, token); + break; + case PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY: + assignToken(out.tokens.allocateStatelessPrivateSurface, token); + break; + case PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION: + assignToken(out.tokens.allocateStatelessConstantMemorySurfaceWithInitialization, token); + break; + case PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION: + assignToken(out.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization, token); + break; + case PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE: + assignToken(out.tokens.allocateStatelessPrintfSurface, token); + break; + case PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE: + assignToken(out.tokens.allocateStatelessEventPoolSurface, token); + break; + case PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE: + assignToken(out.tokens.allocateStatelessDefaultDeviceQueueSurface, token); + break; + case PATCH_TOKEN_STRING: + assignToken(out.tokens.strings, token); + break; + case PATCH_TOKEN_INLINE_VME_SAMPLER_INFO: + assignToken(out.tokens.inlineVmeSamplerInfo, token); + break; + case PATCH_TOKEN_GTPIN_FREE_GRF_INFO: + assignToken(out.tokens.gtpinFreeGrfInfo, token); + break; + case PATCH_TOKEN_GTPIN_INFO: + assignToken(out.tokens.gtpinInfo, token); + break; + case PATCH_TOKEN_STATE_SIP: + assignToken(out.tokens.stateSip, token); + break; + case PATCH_TOKEN_ALLOCATE_SIP_SURFACE: + assignToken(out.tokens.allocateSystemThreadSurface, token); + break; + case PATCH_TOKEN_PROGRAM_SYMBOL_TABLE: + assignToken(out.tokens.programSymbolTable, token); + break; + case PATCH_TOKEN_PROGRAM_RELOCATION_TABLE: + assignToken(out.tokens.programRelocationTable, token); + break; + case PATCH_TOKEN_KERNEL_ARGUMENT_INFO: + assignArgInfo(out, token); + break; + + case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT: + CPP_ATTRIBUTE_FALLTHROUGH; + case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT: + CPP_ATTRIBUTE_FALLTHROUGH; + case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: + CPP_ATTRIBUTE_FALLTHROUGH; + case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: + CPP_ATTRIBUTE_FALLTHROUGH; + case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT: + CPP_ATTRIBUTE_FALLTHROUGH; + case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT: + assignArg(out, token); + break; + + case PATCH_TOKEN_DATA_PARAMETER_STREAM: + assignToken(out.tokens.dataParameterStream, token); + break; + case PATCH_TOKEN_DATA_PARAMETER_BUFFER: { + auto tokDataP = reinterpret_cast(token); + decodeKernelDataParameterToken(tokDataP, out); + } break; + } + + return out.decodeStatus != DecoderError::InvalidBinary; +} + +inline bool decodeToken(const SPatchItemHeader *token, ProgramFromPatchtokens &out) { + auto &progTok = out.programScopeTokens; + switch (token->Token) { + default: { + printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Unknown program-scope Patch Token: %d\n", token->Token); + DEBUG_BREAK_IF(true); + out.unhandledTokens.push_back(token); + break; + } + case PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO: + addTok(progTok.allocateConstantMemorySurface, token); + break; + case PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO: + addTok(progTok.allocateGlobalMemorySurface, token); + break; + case PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO: + addTok(progTok.globalPointer, token); + break; + case PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO: + addTok(progTok.constantPointer, token); + break; + case PATCH_TOKEN_PROGRAM_SYMBOL_TABLE: + assignToken(progTok.symbolTable, token); + break; + } + return true; +} + +template +inline size_t getPatchTokenTotalSize(PatchTokensStreamReader stream, const SPatchItemHeader *token); + +template <> +inline size_t getPatchTokenTotalSize(PatchTokensStreamReader stream, const SPatchItemHeader *token) { + return token->Size; +} + +template <> +inline size_t getPatchTokenTotalSize(PatchTokensStreamReader stream, const SPatchItemHeader *token) { + size_t tokSize = token->Size; + switch (token->Token) { + default: + return tokSize; + case PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO: + return stream.enoughDataLeft(token) + ? tokSize + reinterpret_cast(token)->InlineDataSize + : std::numeric_limits::max(); + case PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO: + return stream.enoughDataLeft(token) + ? tokSize + reinterpret_cast(token)->InlineDataSize + : std::numeric_limits::max(); + } +} + +template +inline bool decodePatchList(PatchTokensStreamReader patchListStream, OutT &out) { + auto decodePos = patchListStream.data.begin(); + auto decodeEnd = patchListStream.data.end(); + + bool decodeSuccess = true; + while ((decodePos + sizeof(SPatchItemHeader) <= decodeEnd) && decodeSuccess) { + auto token = reinterpret_cast(decodePos); + size_t tokenTotalSize = getPatchTokenTotalSize(patchListStream, token); + decodeSuccess = patchListStream.enoughDataLeft(decodePos, tokenTotalSize); + decodeSuccess = decodeSuccess && (tokenTotalSize > 0U); + decodeSuccess = decodeSuccess && decodeToken(token, out); + decodePos = ptrOffset(decodePos, tokenTotalSize); + } + + return decodeSuccess; +} + +bool decodeKernelFromPatchtokensBlob(ArrayRef data, KernelFromPatchtokens &out) { + PatchTokensStreamReader stream{data}; + auto decodePos = stream.data.begin(); + out.decodeStatus = DecoderError::Undefined; + if (stream.notEnoughDataLeft(decodePos)) { + out.decodeStatus = DecoderError::InvalidBinary; + return false; + } + + out.header = reinterpret_cast(decodePos); + + auto kernelInfoBlobSize = sizeof(SKernelBinaryHeaderCommon) + out.header->KernelNameSize + out.header->KernelHeapSize + out.header->GeneralStateHeapSize + out.header->DynamicStateHeapSize + out.header->SurfaceStateHeapSize + out.header->PatchListSize; + + if (stream.notEnoughDataLeft(decodePos, kernelInfoBlobSize)) { + out.decodeStatus = DecoderError::InvalidBinary; + return false; + } + + out.blobs.kernelInfo = ArrayRef(stream.data.begin(), kernelInfoBlobSize); + decodePos = ptrOffset(decodePos, sizeof(SKernelBinaryHeaderCommon)); + + auto kernelName = reinterpret_cast(decodePos); + out.name = ArrayRef(kernelName, out.header->KernelNameSize); + decodePos = ptrOffset(decodePos, out.name.size()); + + out.isa = ArrayRef(decodePos, out.header->KernelHeapSize); + decodePos = ptrOffset(decodePos, out.isa.size()); + + out.heaps.generalState = ArrayRef(decodePos, out.header->GeneralStateHeapSize); + decodePos = ptrOffset(decodePos, out.heaps.generalState.size()); + + out.heaps.dynamicState = ArrayRef(decodePos, out.header->DynamicStateHeapSize); + decodePos = ptrOffset(decodePos, out.heaps.dynamicState.size()); + + out.heaps.surfaceState = ArrayRef(decodePos, out.header->SurfaceStateHeapSize); + decodePos = ptrOffset(decodePos, out.heaps.surfaceState.size()); + + out.blobs.patchList = ArrayRef(decodePos, out.header->PatchListSize); + + if (false == decodePatchList(out.blobs.patchList, out)) { + out.decodeStatus = DecoderError::InvalidBinary; + return false; + } + + out.decodeStatus = DecoderError::Success; + return true; +} + +inline bool decodeProgramHeader(ProgramFromPatchtokens &decodedProgram) { + auto decodePos = decodedProgram.blobs.programInfo.begin(); + PatchTokensStreamReader stream{decodedProgram.blobs.programInfo}; + if (stream.notEnoughDataLeft(decodePos)) { + return false; + } + + decodedProgram.header = reinterpret_cast(decodePos); + if (decodedProgram.header->Magic != MAGIC_CL) { + return false; + } + decodePos = ptrOffset(decodePos, sizeof(SProgramBinaryHeader)); + + if (stream.notEnoughDataLeft(decodePos, decodedProgram.header->PatchListSize)) { + return false; + } + decodedProgram.blobs.patchList = ArrayRef(decodePos, decodedProgram.header->PatchListSize); + decodePos = ptrOffset(decodePos, decodedProgram.blobs.patchList.size()); + + decodedProgram.blobs.kernelsInfo = ArrayRef(decodePos, stream.getDataSizeLeft(decodePos)); + return true; +} + +inline bool decodeKernels(ProgramFromPatchtokens &decodedProgram) { + auto numKernels = decodedProgram.header->NumberOfKernels; + decodedProgram.kernels.reserve(decodedProgram.header->NumberOfKernels); + const uint8_t *decodePos = decodedProgram.blobs.kernelsInfo.begin(); + bool decodeSuccess = true; + PatchTokensStreamReader stream{decodedProgram.blobs.kernelsInfo}; + for (uint32_t i = 0; (i < numKernels) && decodeSuccess; i++) { + decodedProgram.kernels.resize(decodedProgram.kernels.size() + 1); + auto &currKernelInfo = *decodedProgram.kernels.rbegin(); + auto kernelDataLeft = ArrayRef(decodePos, stream.getDataSizeLeft(decodePos)); + decodeSuccess = decodeKernelFromPatchtokensBlob(kernelDataLeft, currKernelInfo); + decodePos = ptrOffset(decodePos, currKernelInfo.blobs.kernelInfo.size()); + } + return decodeSuccess; +} + +bool decodeProgramFromPatchtokensBlob(ArrayRef blob, ProgramFromPatchtokens &out) { + out.blobs.programInfo = blob; + bool decodeSuccess = decodeProgramHeader(out); + decodeSuccess = decodeSuccess && decodeKernels(out); + decodeSuccess = decodeSuccess && decodePatchList(out.blobs.patchList, out); + out.decodeStatus = decodeSuccess ? DecoderError::Success : DecoderError::InvalidBinary; + + return decodeSuccess; +} + +uint32_t calcKernelChecksum(const ArrayRef kernelBlob) { + UNRECOVERABLE_IF(kernelBlob.size() <= sizeof(SKernelBinaryHeaderCommon)); + auto dataToHash = ArrayRef(ptrOffset(kernelBlob.begin(), sizeof(SKernelBinaryHeaderCommon)), kernelBlob.end()); + uint64_t hashValue = Hash::hash(reinterpret_cast(dataToHash.begin()), dataToHash.size()); + uint32_t checksum = hashValue & 0xFFFFFFFF; + return checksum; +} + +bool hasInvalidChecksum(const KernelFromPatchtokens &decodedKernel) { + uint32_t decodedChecksum = decodedKernel.header->CheckSum; + uint32_t calculatedChecksum = NEO::PatchTokenBinary::calcKernelChecksum(decodedKernel.blobs.kernelInfo); + return decodedChecksum != calculatedChecksum; +} + +const KernelArgAttributesFromPatchtokens getInlineData(const SPatchKernelArgumentInfo *ptr) { + KernelArgAttributesFromPatchtokens ret = {}; + UNRECOVERABLE_IF(ptr == nullptr); + auto decodePos = reinterpret_cast(ptr + 1); + auto bounds = reinterpret_cast(ptr) + ptr->Size; + ret.addressQualifier = ArrayRef(decodePos, std::min(decodePos + ptr->AddressQualifierSize, bounds)); + decodePos += ret.addressQualifier.size(); + + ret.accessQualifier = ArrayRef(decodePos, std::min(decodePos + ptr->AccessQualifierSize, bounds)); + decodePos += ret.accessQualifier.size(); + + ret.argName = ArrayRef(decodePos, std::min(decodePos + ptr->ArgumentNameSize, bounds)); + decodePos += ret.argName.size(); + + ret.typeName = ArrayRef(decodePos, std::min(decodePos + ptr->TypeNameSize, bounds)); + decodePos += ret.typeName.size(); + + ret.typeQualifiers = ArrayRef(decodePos, std::min(decodePos + ptr->TypeQualifierSize, bounds)); + return ret; +} + +} // namespace PatchTokenBinary + +} // namespace NEO diff --git a/runtime/compiler_interface/patchtokens_decoder.h b/runtime/compiler_interface/patchtokens_decoder.h new file mode 100644 index 0000000000..5caf5971d0 --- /dev/null +++ b/runtime/compiler_interface/patchtokens_decoder.h @@ -0,0 +1,213 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "core/helpers/ptr_math.h" +#include "core/utilities/arrayref.h" +#include "core/utilities/stackvec.h" + +#include "patch_g7.h" +#include "patch_list.h" +#include "patch_shared.h" +#include "program_debug_data.h" + +#include +#include + +namespace NEO { + +namespace PatchTokenBinary { + +using namespace iOpenCL; + +enum class DecoderError { + Success = 0, + Undefined = 1, + InvalidBinary = 2, +}; + +enum class ArgObjectType : uint32_t { + None = 0, + Buffer, + Image, + Sampler, + Slm +}; + +enum class ArgObjectTypeSpecialized : uint32_t { + None = 0, + Vme +}; + +using StackVecUnhandledTokens = StackVec; +using StackVecByValMap = StackVec; +using StackVecStrings = StackVec; + +struct KernelArgFromPatchtokens { + const SPatchKernelArgumentInfo *argInfo = nullptr; + const SPatchItemHeader *objectArg = nullptr; + const SPatchDataParameterBuffer *objectId = nullptr; + ArgObjectType objectType = ArgObjectType::None; + ArgObjectTypeSpecialized objectTypeSpecialized = ArgObjectTypeSpecialized::None; + StackVecByValMap byValMap; + union { + struct { + const SPatchDataParameterBuffer *width; + const SPatchDataParameterBuffer *height; + const SPatchDataParameterBuffer *depth; + const SPatchDataParameterBuffer *channelDataType; + const SPatchDataParameterBuffer *channelOrder; + const SPatchDataParameterBuffer *arraySize; + const SPatchDataParameterBuffer *numSamples; + const SPatchDataParameterBuffer *numMipLevels; + } image; + struct { + const SPatchDataParameterBuffer *bufferOffset; + const SPatchDataParameterBuffer *pureStateful; + } buffer; + struct { + const SPatchDataParameterBuffer *coordinateSnapWaRequired; + const SPatchDataParameterBuffer *addressMode; + const SPatchDataParameterBuffer *normalizedCoords; + } sampler; + struct { + const SPatchDataParameterBuffer *token; + } slm; + static_assert((sizeof(image) > sizeof(buffer)) && (sizeof(image) > sizeof(sampler)) && (sizeof(image) > sizeof(slm)), + "Union initialization based on image wont' initialize whole struct"); + } metadata = {}; + + union { + struct { + const SPatchDataParameterBuffer *mbBlockType; + const SPatchDataParameterBuffer *subpixelMode; + const SPatchDataParameterBuffer *sadAdjustMode; + const SPatchDataParameterBuffer *searchPathType; + } vme; + } metadataSpecialized = {}; +}; + +using StackVecKernelArgs = StackVec; + +struct KernelFromPatchtokens { + DecoderError decodeStatus = DecoderError::Undefined; + + const SKernelBinaryHeaderCommon *header = nullptr; + ArrayRef name; + ArrayRef isa; + + struct { + ArrayRef generalState; + ArrayRef dynamicState; + ArrayRef surfaceState; + } heaps; + + struct { + ArrayRef kernelInfo; + ArrayRef patchList; + } blobs; + + struct { + const SPatchSamplerStateArray *samplerStateArray = nullptr; + const SPatchBindingTableState *bindingTableState = nullptr; + const SPatchAllocateLocalSurface *allocateLocalSurface = nullptr; + const SPatchMediaVFEState *mediaVfeState[2] = {nullptr, nullptr}; + const SPatchMediaInterfaceDescriptorLoad *mediaInterfaceDescriptorLoad = nullptr; + const SPatchInterfaceDescriptorData *interfaceDescriptorData = nullptr; + const SPatchThreadPayload *threadPayload = nullptr; + const SPatchExecutionEnvironment *executionEnvironment = nullptr; + const SPatchDataParameterStream *dataParameterStream = nullptr; + const SPatchKernelAttributesInfo *kernelAttributesInfo = nullptr; + const SPatchAllocateStatelessPrivateSurface *allocateStatelessPrivateSurface = nullptr; + const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *allocateStatelessConstantMemorySurfaceWithInitialization = nullptr; + const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *allocateStatelessGlobalMemorySurfaceWithInitialization = nullptr; + const SPatchAllocateStatelessPrintfSurface *allocateStatelessPrintfSurface = nullptr; + const SPatchAllocateStatelessEventPoolSurface *allocateStatelessEventPoolSurface = nullptr; + const SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateStatelessDefaultDeviceQueueSurface = nullptr; + const SPatchItemHeader *inlineVmeSamplerInfo = nullptr; + const SPatchGtpinFreeGRFInfo *gtpinFreeGrfInfo = nullptr; + const SPatchStateSIP *stateSip = nullptr; + const SPatchAllocateSystemThreadSurface *allocateSystemThreadSurface = nullptr; + const SPatchItemHeader *gtpinInfo = nullptr; + const SPatchFunctionTableInfo *programSymbolTable = nullptr; + const SPatchFunctionTableInfo *programRelocationTable = nullptr; + StackVecKernelArgs kernelArgs; + StackVecStrings strings; + struct { + const SPatchDataParameterBuffer *localWorkSize[3] = {}; + const SPatchDataParameterBuffer *localWorkSize2[3] = {}; + const SPatchDataParameterBuffer *enqueuedLocalWorkSize[3] = {}; + const SPatchDataParameterBuffer *numWorkGroups[3] = {}; + const SPatchDataParameterBuffer *globalWorkOffset[3] = {}; + const SPatchDataParameterBuffer *globalWorkSize[3] = {}; + const SPatchDataParameterBuffer *maxWorkGroupSize = nullptr; + const SPatchDataParameterBuffer *workDimensions = nullptr; + const SPatchDataParameterBuffer *simdSize = nullptr; + const SPatchDataParameterBuffer *parentEvent = nullptr; + const SPatchDataParameterBuffer *privateMemoryStatelessSize = nullptr; + const SPatchDataParameterBuffer *localMemoryStatelessWindowSize = nullptr; + const SPatchDataParameterBuffer *localMemoryStatelessWindowStartAddress = nullptr; + const SPatchDataParameterBuffer *preferredWorkgroupMultiple = nullptr; + StackVec childBlockSimdSize; + } crossThreadPayloadArgs; + } tokens; + + StackVecUnhandledTokens unhandledTokens; +}; + +struct ProgramFromPatchtokens { + DecoderError decodeStatus = DecoderError::Undefined; + + const SProgramBinaryHeader *header = nullptr; + struct { + ArrayRef programInfo; + ArrayRef patchList; + ArrayRef kernelsInfo; + } blobs; + + struct { + StackVec allocateConstantMemorySurface; + StackVec allocateGlobalMemorySurface; + StackVec constantPointer; + StackVec globalPointer; + const SPatchFunctionTableInfo *symbolTable = nullptr; + } programScopeTokens; + StackVec kernels; + StackVec unhandledTokens; +}; + +struct KernelArgAttributesFromPatchtokens { + ArrayRef addressQualifier; + ArrayRef accessQualifier; + ArrayRef argName; + ArrayRef typeName; + ArrayRef typeQualifiers; +}; + +bool decodeKernelFromPatchtokensBlob(ArrayRef blob, KernelFromPatchtokens &out); +bool decodeProgramFromPatchtokensBlob(ArrayRef blob, ProgramFromPatchtokens &out); +uint32_t calcKernelChecksum(const ArrayRef kernelBlob); +bool hasInvalidChecksum(const KernelFromPatchtokens &decodedKernel); + +inline const uint8_t *getInlineData(const SPatchAllocateConstantMemorySurfaceProgramBinaryInfo *ptr) { + return ptrOffset(reinterpret_cast(ptr), sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo)); +} + +inline const uint8_t *getInlineData(const SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo *ptr) { + return ptrOffset(reinterpret_cast(ptr), sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); +} + +inline const uint8_t *getInlineData(const SPatchString *ptr) { + return ptrOffset(reinterpret_cast(ptr), sizeof(SPatchString)); +} + +const KernelArgAttributesFromPatchtokens getInlineData(const SPatchKernelArgumentInfo *ptr); + +} // namespace PatchTokenBinary + +} // namespace NEO diff --git a/runtime/compiler_interface/patchtokens_dumper.cpp b/runtime/compiler_interface/patchtokens_dumper.cpp new file mode 100644 index 0000000000..18a0dca142 --- /dev/null +++ b/runtime/compiler_interface/patchtokens_dumper.cpp @@ -0,0 +1,889 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "patchtokens_dumper.h" + +#include "patchtokens_decoder.h" + +#include + +namespace NEO { + +namespace PatchTokenBinary { + +#define CASE_TOK_STR(TOK) \ + case TOK: \ + return std::to_string(TOK) + "(" + #TOK + ")"; \ + break; + +std::string asString(PATCH_TOKEN token) { + switch (token) { + default: + return std::to_string(token); + CASE_TOK_STR(PATCH_TOKEN_UNKNOWN); + CASE_TOK_STR(PATCH_TOKEN_MEDIA_STATE_POINTERS); + CASE_TOK_STR(PATCH_TOKEN_STATE_SIP); + CASE_TOK_STR(PATCH_TOKEN_CS_URB_STATE); + CASE_TOK_STR(PATCH_TOKEN_CONSTANT_BUFFER); + CASE_TOK_STR(PATCH_TOKEN_SAMPLER_STATE_ARRAY); + CASE_TOK_STR(PATCH_TOKEN_INTERFACE_DESCRIPTOR); + CASE_TOK_STR(PATCH_TOKEN_VFE_STATE); + CASE_TOK_STR(PATCH_TOKEN_BINDING_TABLE_STATE); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_SCRATCH_SURFACE); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_SIP_SURFACE); + CASE_TOK_STR(PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT); + CASE_TOK_STR(PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT); + CASE_TOK_STR(PATCH_TOKEN_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_SURFACE_WITH_INITIALIZATION); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE); + CASE_TOK_STR(PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT); + CASE_TOK_STR(PATCH_TOKEN_DATA_PARAMETER_BUFFER); + CASE_TOK_STR(PATCH_TOKEN_MEDIA_VFE_STATE); + CASE_TOK_STR(PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD); + CASE_TOK_STR(PATCH_TOKEN_MEDIA_CURBE_LOAD); + CASE_TOK_STR(PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA); + CASE_TOK_STR(PATCH_TOKEN_THREAD_PAYLOAD); + CASE_TOK_STR(PATCH_TOKEN_EXECUTION_ENVIRONMENT); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_PRIVATE_MEMORY); + CASE_TOK_STR(PATCH_TOKEN_DATA_PARAMETER_STREAM); + CASE_TOK_STR(PATCH_TOKEN_KERNEL_ARGUMENT_INFO); + CASE_TOK_STR(PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO); + CASE_TOK_STR(PATCH_TOKEN_STRING); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_PRINTF_SURFACE); + CASE_TOK_STR(PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT); + CASE_TOK_STR(PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_STATELESS_SURFACE_WITH_INITIALIZATION); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE); + CASE_TOK_STR(PATCH_TOKEN_CB_MAPPING); + CASE_TOK_STR(PATCH_TOKEN_CB2CR_GATHER_TABLE); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE); + CASE_TOK_STR(PATCH_TOKEN_NULL_SURFACE_LOCATION); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION); + CASE_TOK_STR(PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE); + CASE_TOK_STR(PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT); + CASE_TOK_STR(PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO); + CASE_TOK_STR(PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO); + CASE_TOK_STR(PATCH_TOKEN_CONSTRUCTOR_DESTRUCTOR_KERNEL_PROGRAM_BINARY_INFO); + CASE_TOK_STR(PATCH_TOKEN_INLINE_VME_SAMPLER_INFO); + CASE_TOK_STR(PATCH_TOKEN_GTPIN_FREE_GRF_INFO); + CASE_TOK_STR(PATCH_TOKEN_GTPIN_INFO); + CASE_TOK_STR(PATCH_TOKEN_PROGRAM_SYMBOL_TABLE); + CASE_TOK_STR(PATCH_TOKEN_PROGRAM_RELOCATION_TABLE); + CASE_TOK_STR(PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1); + } +} + +std::string asString(DATA_PARAMETER_TOKEN dataParameter) { + switch (dataParameter) { + default: + return std::to_string(dataParameter); + CASE_TOK_STR(DATA_PARAMETER_TOKEN_UNKNOWN); + CASE_TOK_STR(DATA_PARAMETER_KERNEL_ARGUMENT); + CASE_TOK_STR(DATA_PARAMETER_LOCAL_WORK_SIZE); + CASE_TOK_STR(DATA_PARAMETER_GLOBAL_WORK_SIZE); + CASE_TOK_STR(DATA_PARAMETER_NUM_WORK_GROUPS); + CASE_TOK_STR(DATA_PARAMETER_WORK_DIMENSIONS); + CASE_TOK_STR(DATA_PARAMETER_LOCAL_ID); + CASE_TOK_STR(DATA_PARAMETER_EXECUTION_MASK); + CASE_TOK_STR(DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES); + CASE_TOK_STR(DATA_PARAMETER_IMAGE_WIDTH); + CASE_TOK_STR(DATA_PARAMETER_IMAGE_HEIGHT); + CASE_TOK_STR(DATA_PARAMETER_IMAGE_DEPTH); + CASE_TOK_STR(DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE); + CASE_TOK_STR(DATA_PARAMETER_IMAGE_CHANNEL_ORDER); + CASE_TOK_STR(DATA_PARAMETER_SAMPLER_ADDRESS_MODE); + CASE_TOK_STR(DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS); + CASE_TOK_STR(DATA_PARAMETER_GLOBAL_WORK_OFFSET); + CASE_TOK_STR(DATA_PARAMETER_NUM_HARDWARE_THREADS); + CASE_TOK_STR(DATA_PARAMETER_IMAGE_ARRAY_SIZE); + CASE_TOK_STR(DATA_PARAMETER_PRINTF_SURFACE_SIZE); + CASE_TOK_STR(DATA_PARAMETER_IMAGE_NUM_SAMPLES); + CASE_TOK_STR(DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED); + CASE_TOK_STR(DATA_PARAMETER_PARENT_EVENT); + CASE_TOK_STR(DATA_PARAMETER_VME_MB_BLOCK_TYPE); + CASE_TOK_STR(DATA_PARAMETER_VME_SUBPIXEL_MODE); + CASE_TOK_STR(DATA_PARAMETER_VME_SAD_ADJUST_MODE); + CASE_TOK_STR(DATA_PARAMETER_VME_SEARCH_PATH_TYPE); + CASE_TOK_STR(DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS); + CASE_TOK_STR(DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE); + CASE_TOK_STR(DATA_PARAMETER_MAX_WORKGROUP_SIZE); + CASE_TOK_STR(DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE); + CASE_TOK_STR(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS); + CASE_TOK_STR(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE); + CASE_TOK_STR(DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE); + CASE_TOK_STR(DATA_PARAMETER_SIMD_SIZE); + CASE_TOK_STR(DATA_PARAMETER_OBJECT_ID); + CASE_TOK_STR(DATA_PARAMETER_VME_IMAGE_TYPE); + CASE_TOK_STR(DATA_PARAMETER_VME_MB_SKIP_BLOCK_TYPE); + CASE_TOK_STR(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE); + CASE_TOK_STR(DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER); + CASE_TOK_STR(DATA_PARAMETER_STAGE_IN_GRID_ORIGIN); + CASE_TOK_STR(DATA_PARAMETER_STAGE_IN_GRID_SIZE); + CASE_TOK_STR(DATA_PARAMETER_BUFFER_OFFSET); + CASE_TOK_STR(DATA_PARAMETER_BUFFER_STATEFUL); + } +} +#undef CASE_TOK_STR + +void dump(const SProgramBinaryHeader &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SProgramBinaryHeader {\n"; + out << indent << " uint32_t Magic; // = " << value.Magic << "\n"; + out << indent << " uint32_t Version; // = " << value.Version << "\n"; + out << indent << "\n"; + out << indent << " uint32_t Device; // = " << value.Device << "\n"; + out << indent << " uint32_t GPUPointerSizeInBytes; // = " << value.GPUPointerSizeInBytes << "\n"; + out << indent << "\n"; + out << indent << " uint32_t NumberOfKernels; // = " << value.NumberOfKernels << "\n"; + out << indent << "\n"; + out << indent << " uint32_t SteppingId; // = " << value.SteppingId << "\n"; + out << indent << "\n"; + out << indent << " uint32_t PatchListSize; // = " << value.PatchListSize << "\n"; + out << indent << "};\n"; +} + +void dump(const SKernelBinaryHeader &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SKernelBinaryHeader {\n"; + out << indent << " uint32_t CheckSum;// = " << value.CheckSum << "\n"; + out << indent << " uint64_t ShaderHashCode;// = " << value.ShaderHashCode << "\n"; + out << indent << " uint32_t KernelNameSize;// = " << value.KernelNameSize << "\n"; + out << indent << " uint32_t PatchListSize;// = " << value.PatchListSize << "\n"; + out << indent << "};\n"; +} + +void dump(const SPatchDataParameterBuffer &value, std::stringstream &out, const std::string &indent); +void dump(const SPatchItemHeader &value, std::stringstream &out, const std::string &indent) { + if (value.Token == iOpenCL::PATCH_TOKEN_DATA_PARAMETER_BUFFER) { + dump(static_cast(value), out, indent); + return; + } + out << indent << "struct SPatchItemHeader {\n"; + out << indent << " uint32_t Token;// = " << asString(static_cast(value.Token)) << "\n"; + out << indent << " uint32_t Size;// = " << value.Size << "\n"; + out << indent << "};\n"; +} + +void dumpPatchItemHeaderInline(const SPatchItemHeader &value, std::stringstream &out, const std::string &indent) { + out << "Token=" << asString(static_cast(value.Token)) << ", Size=" << value.Size; +} + +void dump(const SPatchGlobalMemoryObjectKernelArgument &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchGlobalMemoryObjectKernelArgument :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ArgumentNumber;// = " << value.ArgumentNumber << "\n"; + out << indent << " uint32_t Offset;// = " << value.Offset << "\n"; + out << indent << " uint32_t LocationIndex;// = " << value.LocationIndex << "\n"; + out << indent << " uint32_t LocationIndex2;// = " << value.LocationIndex2 << "\n"; + out << indent << " uint32_t IsEmulationArgument;// = " << value.IsEmulationArgument << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchImageMemoryObjectKernelArgument &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchImageMemoryObjectKernelArgument :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ArgumentNumber;// = " << value.ArgumentNumber << "\n"; + out << indent << " uint32_t Type;// = " << value.Type << "\n"; + out << indent << " uint32_t Offset;// = " << value.Offset << "\n"; + out << indent << " uint32_t LocationIndex;// = " << value.LocationIndex << "\n"; + out << indent << " uint32_t LocationIndex2;// = " << value.LocationIndex2 << "\n"; + out << indent << " uint32_t Writeable;// = " << value.Writeable << "\n"; + out << indent << " uint32_t Transformable;// = " << value.Transformable << "\n"; + out << indent << " uint32_t needBindlessHandle;// = " << value.needBindlessHandle << "\n"; + out << indent << " uint32_t IsEmulationArgument;// = " << value.IsEmulationArgument << "\n"; + out << indent << " uint32_t btiOffset;// = " << value.btiOffset << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchSamplerKernelArgument &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchSamplerKernelArgument :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ArgumentNumber;// = " << value.ArgumentNumber << "\n"; + out << indent << " uint32_t Type;// = " << value.Type << "\n"; + out << indent << " uint32_t Offset;// = " << value.Offset << "\n"; + out << indent << " uint32_t LocationIndex;// = " << value.LocationIndex << "\n"; + out << indent << " uint32_t LocationIndex2;// = " << value.LocationIndex2 << "\n"; + out << indent << " uint32_t needBindlessHandle;// = " << value.needBindlessHandle << "\n"; + out << indent << " uint32_t TextureMask;// = " << value.TextureMask << "\n"; + out << indent << " uint32_t IsEmulationArgument;// = " << value.IsEmulationArgument << "\n"; + out << indent << " uint32_t btiOffset;// = " << value.btiOffset << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchDataParameterBuffer &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchDataParameterBuffer :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t Type;// = " << asString(static_cast(value.Type)) << "\n"; + out << indent << " uint32_t ArgumentNumber;// = " << value.ArgumentNumber << "\n"; + out << indent << " uint32_t Offset;// = " << value.Offset << "\n"; + out << indent << " uint32_t DataSize;// = " << value.DataSize << "\n"; + out << indent << " uint32_t SourceOffset;// = " << value.SourceOffset << "\n"; + out << indent << " uint32_t LocationIndex;// = " << value.LocationIndex << "\n"; + out << indent << " uint32_t LocationIndex2;// = " << value.LocationIndex2 << "\n"; + out << indent << " uint32_t IsEmulationArgument;// = " << value.IsEmulationArgument << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchKernelArgumentInfo &value, std::stringstream &out, const std::string &indent) { + auto toStr = [](ArrayRef &src) { return std::string(src.begin(), src.end()); }; + auto inlineData = getInlineData(&value); + out << indent << "struct SPatchKernelArgumentInfo :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ArgumentNumber;// = " << value.ArgumentNumber << "\n"; + out << indent << " uint32_t AddressQualifierSize;// = " << value.AddressQualifierSize << " : [" << toStr(inlineData.addressQualifier) << "]\n"; + out << indent << " uint32_t AccessQualifierSize;// = " << value.AccessQualifierSize << " : [" << toStr(inlineData.accessQualifier) << "]\n"; + out << indent << " uint32_t ArgumentNameSize;// = " << value.ArgumentNameSize << " : [" << toStr(inlineData.argName) << "]\n"; + out << indent << " uint32_t TypeNameSize;// = " << value.TypeNameSize << " : [" << toStr(inlineData.typeName) << "]\n"; + out << indent << " uint32_t TypeQualifierSize;// = " << value.TypeQualifierSize << " : [" << toStr(inlineData.typeQualifiers) << "]\n"; + out << indent << "}\n"; +} + +void dump(const SPatchKernelAttributesInfo &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchKernelAttributesInfo :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t AttributesSize;// = " << value.AttributesSize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchMediaInterfaceDescriptorLoad &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchMediaInterfaceDescriptorLoad :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t InterfaceDescriptorDataOffset;// = " << value.InterfaceDescriptorDataOffset << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchInterfaceDescriptorData &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchInterfaceDescriptorData :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t Offset;// = " << value.Offset << "\n"; + out << indent << " uint32_t SamplerStateOffset;// = " << value.SamplerStateOffset << "\n"; + out << indent << " uint32_t KernelOffset;// = " << value.KernelOffset << "\n"; + out << indent << " uint32_t BindingTableOffset;// = " << value.BindingTableOffset << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchDataParameterStream &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchDataParameterStream :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t DataParameterStreamSize;// = " << value.DataParameterStreamSize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchStateSIP &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchStateSIP :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t SystemKernelOffset;// = " << value.SystemKernelOffset << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchSamplerStateArray &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchSamplerStateArray :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t Offset;// = " << value.Offset << "\n"; + out << indent << " uint32_t Count;// = " << value.Count << "\n"; + out << indent << " uint32_t BorderColorOffset;// = " << value.BorderColorOffset << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchBindingTableState &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchBindingTableState :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t Offset;// = " << value.Offset << "\n"; + out << indent << " uint32_t Count;// = " << value.Count << "\n"; + out << indent << " uint32_t SurfaceStateOffset;// = " << value.SurfaceStateOffset << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchAllocateSystemThreadSurface &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchAllocateSystemThreadSurface :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t Offset;// = " << value.Offset << "\n"; + out << indent << " uint32_t PerThreadSystemThreadSurfaceSize;// = " << value.PerThreadSystemThreadSurfaceSize << "\n"; + out << indent << " uint32_t BTI;// = " << value.BTI << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchAllocateLocalSurface &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchAllocateLocalSurface :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t Offset;// = " << value.Offset << "\n"; + out << indent << " uint32_t TotalInlineLocalMemorySize;// = " << value.TotalInlineLocalMemorySize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchThreadPayload &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchThreadPayload :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t HeaderPresent;// = " << value.HeaderPresent << "\n"; + out << indent << " uint32_t LocalIDXPresent;// = " << value.LocalIDXPresent << "\n"; + out << indent << " uint32_t LocalIDYPresent;// = " << value.LocalIDYPresent << "\n"; + out << indent << " uint32_t LocalIDZPresent;// = " << value.LocalIDZPresent << "\n"; + out << indent << " uint32_t LocalIDFlattenedPresent;// = " << value.LocalIDFlattenedPresent << "\n"; + out << indent << " uint32_t IndirectPayloadStorage;// = " << value.IndirectPayloadStorage << "\n"; + out << indent << " uint32_t UnusedPerThreadConstantPresent;// = " << value.UnusedPerThreadConstantPresent << "\n"; + out << indent << " uint32_t GetLocalIDPresent;// = " << value.GetLocalIDPresent << "\n"; + out << indent << " uint32_t GetGroupIDPresent;// = " << value.GetGroupIDPresent << "\n"; + out << indent << " uint32_t GetGlobalOffsetPresent;// = " << value.GetGlobalOffsetPresent << "\n"; + out << indent << " uint32_t StageInGridOriginPresent;// = " << value.StageInGridOriginPresent << "\n"; + out << indent << " uint32_t StageInGridSizePresent;// = " << value.StageInGridSizePresent << "\n"; + out << indent << " uint32_t OffsetToSkipPerThreadDataLoad;// = " << value.OffsetToSkipPerThreadDataLoad << "\n"; + out << indent << " uint32_t OffsetToSkipSetFFIDGP;// = " << value.OffsetToSkipSetFFIDGP << "\n"; + out << indent << " uint32_t PassInlineData;// = " << value.PassInlineData << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchExecutionEnvironment &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchExecutionEnvironment :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t RequiredWorkGroupSizeX;// = " << value.RequiredWorkGroupSizeX << "\n"; + out << indent << " uint32_t RequiredWorkGroupSizeY;// = " << value.RequiredWorkGroupSizeY << "\n"; + out << indent << " uint32_t RequiredWorkGroupSizeZ;// = " << value.RequiredWorkGroupSizeZ << "\n"; + out << indent << " uint32_t LargestCompiledSIMDSize;// = " << value.LargestCompiledSIMDSize << "\n"; + out << indent << " uint32_t CompiledSubGroupsNumber;// = " << value.CompiledSubGroupsNumber << "\n"; + out << indent << " uint32_t HasBarriers;// = " << value.HasBarriers << "\n"; + out << indent << " uint32_t DisableMidThreadPreemption;// = " << value.DisableMidThreadPreemption << "\n"; + out << indent << " uint32_t CompiledSIMD8;// = " << value.CompiledSIMD8 << "\n"; + out << indent << " uint32_t CompiledSIMD16;// = " << value.CompiledSIMD16 << "\n"; + out << indent << " uint32_t CompiledSIMD32;// = " << value.CompiledSIMD32 << "\n"; + out << indent << " uint32_t HasDeviceEnqueue;// = " << value.HasDeviceEnqueue << "\n"; + out << indent << " uint32_t MayAccessUndeclaredResource;// = " << value.MayAccessUndeclaredResource << "\n"; + out << indent << " uint32_t UsesFencesForReadWriteImages;// = " << value.UsesFencesForReadWriteImages << "\n"; + out << indent << " uint32_t UsesStatelessSpillFill;// = " << value.UsesStatelessSpillFill << "\n"; + out << indent << " uint32_t UsesMultiScratchSpaces;// = " << value.UsesMultiScratchSpaces << "\n"; + out << indent << " uint32_t IsCoherent;// = " << value.IsCoherent << "\n"; + out << indent << " uint32_t IsInitializer;// = " << value.IsInitializer << "\n"; + out << indent << " uint32_t IsFinalizer;// = " << value.IsFinalizer << "\n"; + out << indent << " uint32_t SubgroupIndependentForwardProgressRequired;// = " << value.SubgroupIndependentForwardProgressRequired << "\n"; + out << indent << " uint32_t CompiledForGreaterThan4GBBuffers;// = " << value.CompiledForGreaterThan4GBBuffers << "\n"; + out << indent << " uint32_t NumGRFRequired;// = " << value.NumGRFRequired << "\n"; + out << indent << " uint32_t WorkgroupWalkOrderDims;// = " << value.WorkgroupWalkOrderDims << "\n"; + out << indent << " uint32_t HasGlobalAtomics;// = " << value.HasGlobalAtomics << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchString &value, std::stringstream &out, const std::string &indent) { + const char *strBeg = reinterpret_cast((&value) + 1); + std::string strValue = std::string(strBeg, strBeg + value.StringSize); + out << indent << "struct SPatchString :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t Index;// = " << value.Index << "\n"; + out << indent << " uint32_t StringSize;// = " << value.StringSize << " : [" << strValue << "]" + << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchStatelessGlobalMemoryObjectKernelArgument &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchStatelessGlobalMemoryObjectKernelArgument :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ArgumentNumber;// = " << value.ArgumentNumber << "\n"; + out << indent << " uint32_t SurfaceStateHeapOffset;// = " << value.SurfaceStateHeapOffset << "\n"; + out << indent << " uint32_t DataParamOffset;// = " << value.DataParamOffset << "\n"; + out << indent << " uint32_t DataParamSize;// = " << value.DataParamSize << "\n"; + out << indent << " uint32_t LocationIndex;// = " << value.LocationIndex << "\n"; + out << indent << " uint32_t LocationIndex2;// = " << value.LocationIndex2 << "\n"; + out << indent << " uint32_t IsEmulationArgument;// = " << value.IsEmulationArgument << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchStatelessConstantMemoryObjectKernelArgument &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchStatelessConstantMemoryObjectKernelArgument :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ArgumentNumber;// = " << value.ArgumentNumber << "\n"; + out << indent << " uint32_t SurfaceStateHeapOffset;// = " << value.SurfaceStateHeapOffset << "\n"; + out << indent << " uint32_t DataParamOffset;// = " << value.DataParamOffset << "\n"; + out << indent << " uint32_t DataParamSize;// = " << value.DataParamSize << "\n"; + out << indent << " uint32_t LocationIndex;// = " << value.LocationIndex << "\n"; + out << indent << " uint32_t LocationIndex2;// = " << value.LocationIndex2 << "\n"; + out << indent << " uint32_t IsEmulationArgument;// = " << value.IsEmulationArgument << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t GlobalBufferIndex;// = " << value.GlobalBufferIndex << "\n"; + out << indent << " uint32_t SurfaceStateHeapOffset;// = " << value.SurfaceStateHeapOffset << "\n"; + out << indent << " uint32_t DataParamOffset;// = " << value.DataParamOffset << "\n"; + out << indent << " uint32_t DataParamSize;// = " << value.DataParamSize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchAllocateStatelessConstantMemorySurfaceWithInitialization :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ConstantBufferIndex;// = " << value.ConstantBufferIndex << "\n"; + out << indent << " uint32_t SurfaceStateHeapOffset;// = " << value.SurfaceStateHeapOffset << "\n"; + out << indent << " uint32_t DataParamOffset;// = " << value.DataParamOffset << "\n"; + out << indent << " uint32_t DataParamSize;// = " << value.DataParamSize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t Type;// = " << value.Type << "\n"; + out << indent << " uint32_t GlobalBufferIndex;// = " << value.GlobalBufferIndex << "\n"; + out << indent << " uint32_t InlineDataSize;// = " << value.InlineDataSize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchAllocateConstantMemorySurfaceProgramBinaryInfo &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchAllocateConstantMemorySurfaceProgramBinaryInfo :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ConstantBufferIndex;// = " << value.ConstantBufferIndex << "\n"; + out << indent << " uint32_t InlineDataSize;// = " << value.InlineDataSize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchGlobalPointerProgramBinaryInfo &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchGlobalPointerProgramBinaryInfo :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t GlobalBufferIndex;// = " << value.GlobalBufferIndex << "\n"; + out << indent << " uint64_t GlobalPointerOffset;// = " << value.GlobalPointerOffset << "\n"; + out << indent << " uint32_t BufferType;// = " << value.BufferType << "\n"; + out << indent << " uint32_t BufferIndex;// = " << value.BufferIndex << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchConstantPointerProgramBinaryInfo &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchConstantPointerProgramBinaryInfo :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ConstantBufferIndex;// = " << value.ConstantBufferIndex << "\n"; + out << indent << " uint64_t ConstantPointerOffset;// = " << value.ConstantPointerOffset << "\n"; + out << indent << " uint32_t BufferType;// = " << value.BufferType << "\n"; + out << indent << " uint32_t BufferIndex;// = " << value.BufferIndex << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchAllocateStatelessPrintfSurface &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchAllocateStatelessPrintfSurface :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t PrintfSurfaceIndex;// = " << value.PrintfSurfaceIndex << "\n"; + out << indent << " uint32_t SurfaceStateHeapOffset;// = " << value.SurfaceStateHeapOffset << "\n"; + out << indent << " uint32_t DataParamOffset;// = " << value.DataParamOffset << "\n"; + out << indent << " uint32_t DataParamSize;// = " << value.DataParamSize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchAllocateStatelessPrivateSurface &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchAllocateStatelessPrivateSurface :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t SurfaceStateHeapOffset;// = " << value.SurfaceStateHeapOffset << "\n"; + out << indent << " uint32_t DataParamOffset;// = " << value.DataParamOffset << "\n"; + out << indent << " uint32_t DataParamSize;// = " << value.DataParamSize << "\n"; + out << indent << " uint32_t PerThreadPrivateMemorySize;// = " << value.PerThreadPrivateMemorySize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchMediaVFEState &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchMediaVFEState :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ScratchSpaceOffset;// = " << value.ScratchSpaceOffset << "\n"; + out << indent << " uint32_t PerThreadScratchSpace;// = " << value.PerThreadScratchSpace << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchAllocateStatelessEventPoolSurface &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchAllocateStatelessEventPoolSurface :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t EventPoolSurfaceIndex;// = " << value.EventPoolSurfaceIndex << "\n"; + out << indent << " uint32_t SurfaceStateHeapOffset;// = " << value.SurfaceStateHeapOffset << "\n"; + out << indent << " uint32_t DataParamOffset;// = " << value.DataParamOffset << "\n"; + out << indent << " uint32_t DataParamSize;// = " << value.DataParamSize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchAllocateStatelessDefaultDeviceQueueSurface &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchAllocateStatelessDefaultDeviceQueueSurface :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t SurfaceStateHeapOffset;// = " << value.SurfaceStateHeapOffset << "\n"; + out << indent << " uint32_t DataParamOffset;// = " << value.DataParamOffset << "\n"; + out << indent << " uint32_t DataParamSize;// = " << value.DataParamSize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchStatelessDeviceQueueKernelArgument &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchStatelessDeviceQueueKernelArgument :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t ArgumentNumber;// = " << value.ArgumentNumber << "\n"; + out << indent << " uint32_t SurfaceStateHeapOffset;// = " << value.SurfaceStateHeapOffset << "\n"; + out << indent << " uint32_t DataParamOffset;// = " << value.DataParamOffset << "\n"; + out << indent << " uint32_t DataParamSize;// = " << value.DataParamSize << "\n"; + out << indent << " uint32_t LocationIndex;// = " << value.LocationIndex << "\n"; + out << indent << " uint32_t LocationIndex2;// = " << value.LocationIndex2 << "\n"; + out << indent << " uint32_t IsEmulationArgument;// = " << value.IsEmulationArgument << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchGtpinFreeGRFInfo &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchGtpinFreeGRFInfo :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t BufferSize;// = " << value.BufferSize << "\n"; + out << indent << "}\n"; +} + +void dump(const SPatchFunctionTableInfo &value, std::stringstream &out, const std::string &indent) { + out << indent << "struct SPatchFunctionTableInfo :\n"; + out << indent << " SPatchItemHeader ("; + dumpPatchItemHeaderInline(value, out, ""); + out << ")\n" + << indent << "{\n"; + out << indent << " uint32_t NumEntries;// = " << value.NumEntries << "\n"; + out << indent << "}\n"; +} + +template +void dumpOrNull(const T *value, const std::string &messageIfNull, std::stringstream &out, const std::string &indent) { + if (value == nullptr) { + if (messageIfNull.empty() == false) { + out << indent << messageIfNull; + } + return; + } + dump(*value, out, indent); +} + +template +void dumpOrNullObjArg(const T *value, std::stringstream &out, const std::string &indent) { + if (value == nullptr) { + return; + } + switch (value->Token) { + default: + UNRECOVERABLE_IF(value->Token != PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT); + dumpOrNull(reinterpret_cast(value), "", out, indent); + break; + case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT: + dumpOrNull(reinterpret_cast(value), "", out, indent); + break; + case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: + dumpOrNull(reinterpret_cast(value), "", out, indent); + break; + case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: + dumpOrNull(reinterpret_cast(value), "", out, indent); + break; + case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT: + dumpOrNull(reinterpret_cast(value), "", out, indent); + break; + case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT: + dumpOrNull(reinterpret_cast(value), "", out, indent); + break; + } +} + +template +void dumpOrNullArrayIfNotEmpty(T (&value)[Size], const std::string &arrayName, std::stringstream &out, const std::string &indent) { + bool allEmpty = true; + for (size_t i = 0; i < Size; ++i) { + allEmpty = allEmpty && (value[i] == nullptr); + } + if (allEmpty) { + return; + } + out << indent << arrayName << " [" << Size << "] :\n"; + for (size_t i = 0; i < Size; ++i) { + if (value[i] == nullptr) { + continue; + } + out << indent << " + [" << i << "]:\n"; + dump(*value[i], out, indent + " | "); + } +} + +template +void dumpVecIfNotEmpty(const T &vector, const std::string &vectorName, std::stringstream &out, const std::string &indent) { + if (vector.size() == 0) { + return; + } + out << indent << vectorName << " [" << vector.size() << "] :\n"; + for (size_t i = 0; i < vector.size(); ++i) { + out << indent << " + [" << i << "]:\n"; + dumpOrNull(vector[i], "DECODER INTERNAL ERROR\n", out, indent + " | "); + } +} + +const char *asString(DecoderError err) { + switch (err) { + default: + DEBUG_BREAK_IF(err != DecoderError::InvalidBinary); + return "with invalid binary"; + break; + case DecoderError::Success: + return "decoded successfully"; + break; + case DecoderError::Undefined: + return "in undefined status"; + break; + } +} + +std::string asString(const ProgramFromPatchtokens &prog) { + std::stringstream stream; + stream << "Program of size : " << prog.blobs.programInfo.size() + << " " << asString(prog.decodeStatus) << "\n"; + dumpOrNull(prog.header, "WARNING : Program header is missing\n", stream, ""); + stream << "Program-scope tokens section size : " << prog.blobs.patchList.size() << "\n"; + dumpVecIfNotEmpty(prog.unhandledTokens, "WARNING : Unhandled program-scope tokens detected", stream, " "); + dumpVecIfNotEmpty(prog.programScopeTokens.allocateConstantMemorySurface, "Inline Costant Surface(s)", stream, " "); + dumpVecIfNotEmpty(prog.programScopeTokens.constantPointer, "Inline Costant Surface - self relocations", stream, " "); + dumpVecIfNotEmpty(prog.programScopeTokens.allocateGlobalMemorySurface, "Inline Global Variable Surface(s)", stream, " "); + dumpVecIfNotEmpty(prog.programScopeTokens.globalPointer, "Inline Global Variable Surface - self relocations", stream, " "); + dumpOrNull(prog.programScopeTokens.symbolTable, "", stream, " "); + stream << "Kernels section size : " << prog.blobs.kernelsInfo.size() << "\n"; + for (size_t i = 0; i < prog.kernels.size(); ++i) { + stream << "kernel[" << i << "] " << (prog.kernels[i].name.size() > 0 ? std::string(prog.kernels[i].name.begin(), prog.kernels[i].name.end()).c_str() : "") << ":\n"; + stream << asString(prog.kernels[i]); + } + return stream.str(); +} + +std::string asString(const KernelFromPatchtokens &kern) { + std::stringstream stream; + std::string indentLevel1 = " "; + stream << "Kernel of size : " << kern.blobs.kernelInfo.size() << " " + << " " << asString(kern.decodeStatus) << "\n"; + dumpOrNull(kern.header, "WARNING : Kernel header is missing\n", stream, ""); + stream << "Kernel-scope tokens section size : " << kern.blobs.patchList.size() << "\n"; + dumpVecIfNotEmpty(kern.unhandledTokens, "WARNING : Unhandled kernel-scope tokens detected", stream, indentLevel1); + dumpOrNull(kern.tokens.executionEnvironment, "", stream, indentLevel1); + dumpOrNull(kern.tokens.threadPayload, "", stream, indentLevel1); + dumpOrNull(kern.tokens.samplerStateArray, "", stream, indentLevel1); + dumpOrNull(kern.tokens.bindingTableState, "", stream, indentLevel1); + dumpOrNull(kern.tokens.allocateLocalSurface, "", stream, indentLevel1); + dumpOrNullArrayIfNotEmpty(kern.tokens.mediaVfeState, "mediaVfeState", stream, indentLevel1); + dumpOrNull(kern.tokens.mediaInterfaceDescriptorLoad, "", stream, indentLevel1); + dumpOrNull(kern.tokens.interfaceDescriptorData, "", stream, indentLevel1); + dumpOrNull(kern.tokens.kernelAttributesInfo, "", stream, indentLevel1); + dumpOrNull(kern.tokens.allocateStatelessPrivateSurface, "", stream, indentLevel1); + dumpOrNull(kern.tokens.allocateStatelessConstantMemorySurfaceWithInitialization, "", stream, indentLevel1); + dumpOrNull(kern.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization, "", stream, indentLevel1); + dumpOrNull(kern.tokens.allocateStatelessPrintfSurface, "", stream, indentLevel1); + dumpOrNull(kern.tokens.allocateStatelessEventPoolSurface, "", stream, indentLevel1); + dumpOrNull(kern.tokens.allocateStatelessDefaultDeviceQueueSurface, "", stream, indentLevel1); + dumpOrNull(kern.tokens.inlineVmeSamplerInfo, "", stream, indentLevel1); + dumpOrNull(kern.tokens.gtpinFreeGrfInfo, "", stream, indentLevel1); + dumpOrNull(kern.tokens.stateSip, "", stream, indentLevel1); + dumpOrNull(kern.tokens.allocateSystemThreadSurface, "", stream, indentLevel1); + dumpOrNull(kern.tokens.gtpinInfo, "", stream, indentLevel1); + dumpOrNull(kern.tokens.programSymbolTable, "", stream, indentLevel1); + dumpOrNull(kern.tokens.programRelocationTable, "", stream, indentLevel1); + dumpOrNull(kern.tokens.dataParameterStream, "", stream, indentLevel1); + dumpVecIfNotEmpty(kern.tokens.strings, "String literals", stream, indentLevel1); + dumpOrNullArrayIfNotEmpty(kern.tokens.crossThreadPayloadArgs.localWorkSize, "localWorkSize", stream, indentLevel1); + dumpOrNullArrayIfNotEmpty(kern.tokens.crossThreadPayloadArgs.localWorkSize2, "localWorkSize2", stream, indentLevel1); + dumpOrNullArrayIfNotEmpty(kern.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize, "enqueuedLocalWorkSize", stream, indentLevel1); + dumpOrNullArrayIfNotEmpty(kern.tokens.crossThreadPayloadArgs.numWorkGroups, "numWorkGroups", stream, indentLevel1); + dumpOrNullArrayIfNotEmpty(kern.tokens.crossThreadPayloadArgs.globalWorkOffset, "globalWorkOffset", stream, indentLevel1); + dumpOrNullArrayIfNotEmpty(kern.tokens.crossThreadPayloadArgs.globalWorkSize, "globalWorkSize", stream, indentLevel1); + dumpOrNull(kern.tokens.crossThreadPayloadArgs.maxWorkGroupSize, "", stream, indentLevel1); + dumpOrNull(kern.tokens.crossThreadPayloadArgs.workDimensions, "", stream, indentLevel1); + dumpOrNull(kern.tokens.crossThreadPayloadArgs.simdSize, "", stream, indentLevel1); + dumpOrNull(kern.tokens.crossThreadPayloadArgs.parentEvent, "", stream, indentLevel1); + dumpOrNull(kern.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize, "", stream, indentLevel1); + dumpOrNull(kern.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize, "", stream, indentLevel1); + dumpOrNull(kern.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress, "", stream, indentLevel1); + dumpOrNull(kern.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple, "", stream, indentLevel1); + dumpVecIfNotEmpty(kern.tokens.crossThreadPayloadArgs.childBlockSimdSize, "Child block simd size(s)", stream, indentLevel1); + + if (kern.tokens.kernelArgs.size() != 0) { + stream << "Kernel arguments [" << kern.tokens.kernelArgs.size() << "] :\n"; + for (size_t i = 0; i < kern.tokens.kernelArgs.size(); ++i) { + stream << " + kernelArg[" << i << "]:\n"; + stream << asString(kern.tokens.kernelArgs[i], indentLevel1 + "| "); + } + } + return stream.str(); +} + +std::string asString(ArgObjectType type, ArgObjectTypeSpecialized typeSpecialized) { + std::string typeAsStr; + switch (type) { + default: + UNRECOVERABLE_IF(ArgObjectType::None != type); + return "unspecified"; + case ArgObjectType::Buffer: + typeAsStr = "BUFFER"; + break; + case ArgObjectType::Image: + typeAsStr = "IMAGE"; + break; + case ArgObjectType::Sampler: + typeAsStr = "SAMPLER"; + break; + case ArgObjectType::Slm: + typeAsStr = "SLM"; + break; + } + + switch (typeSpecialized) { + default: + UNRECOVERABLE_IF(ArgObjectTypeSpecialized::None != typeSpecialized); + break; + case ArgObjectTypeSpecialized::Vme: + typeAsStr += " [ VME ]"; + } + + return typeAsStr; +} + +std::string asString(const KernelArgFromPatchtokens &arg, const std::string &indent) { + std::stringstream stream; + stream << indent << "Kernel argument of type " << asString(arg.objectType, arg.objectTypeSpecialized) << "\n"; + std::string indentLevel1 = indent + " "; + std::string indentLevel2 = indentLevel1 + " "; + dumpOrNull(arg.argInfo, "", stream, indentLevel1); + dumpOrNullObjArg(arg.objectArg, stream, indentLevel1); + dumpOrNull(arg.objectId, "", stream, indentLevel1); + switch (arg.objectType) { + default: + break; + case ArgObjectType::Buffer: + stream << indentLevel1 << "Buffer Metadata:\n"; + dumpOrNull(arg.metadata.buffer.bufferOffset, "", stream, indentLevel2); + dumpOrNull(arg.metadata.buffer.pureStateful, "", stream, indentLevel2); + break; + case ArgObjectType::Image: + stream << indentLevel1 << "Image Metadata:\n"; + dumpOrNull(arg.metadata.image.width, "", stream, indentLevel2); + dumpOrNull(arg.metadata.image.height, "", stream, indentLevel2); + dumpOrNull(arg.metadata.image.depth, "", stream, indentLevel2); + dumpOrNull(arg.metadata.image.channelDataType, "", stream, indentLevel2); + dumpOrNull(arg.metadata.image.channelOrder, "", stream, indentLevel2); + dumpOrNull(arg.metadata.image.arraySize, "", stream, indentLevel2); + dumpOrNull(arg.metadata.image.numSamples, "", stream, indentLevel2); + dumpOrNull(arg.metadata.image.numMipLevels, "", stream, indentLevel2); + break; + case ArgObjectType::Sampler: + stream << indentLevel1 << "Sampler Metadata:\n"; + dumpOrNull(arg.metadata.sampler.addressMode, "", stream, indentLevel2); + dumpOrNull(arg.metadata.sampler.coordinateSnapWaRequired, "", stream, indentLevel2); + dumpOrNull(arg.metadata.sampler.normalizedCoords, "", stream, indentLevel2); + break; + case ArgObjectType::Slm: + stream << indentLevel1 << "Slm Metadata:\n"; + dumpOrNull(arg.metadata.slm.token, "", stream, indentLevel2); + break; + } + switch (arg.objectTypeSpecialized) { + default: + break; + case ArgObjectTypeSpecialized::Vme: + stream << indentLevel1 << "Vme Metadata:\n"; + dumpOrNull(arg.metadataSpecialized.vme.mbBlockType, "", stream, indentLevel2); + dumpOrNull(arg.metadataSpecialized.vme.sadAdjustMode, "", stream, indentLevel2); + dumpOrNull(arg.metadataSpecialized.vme.searchPathType, "", stream, indentLevel2); + dumpOrNull(arg.metadataSpecialized.vme.subpixelMode, "", stream, indentLevel2); + break; + } + + dumpVecIfNotEmpty(arg.byValMap, " Data passed by value ", stream, indentLevel1); + return stream.str(); +} + +} // namespace PatchTokenBinary + +} // namespace NEO diff --git a/runtime/compiler_interface/patchtokens_dumper.h b/runtime/compiler_interface/patchtokens_dumper.h new file mode 100644 index 0000000000..8a929c5e18 --- /dev/null +++ b/runtime/compiler_interface/patchtokens_dumper.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include + +namespace NEO { + +namespace PatchTokenBinary { + +struct ProgramFromPatchtokens; +struct KernelFromPatchtokens; +struct KernelArgFromPatchtokens; + +std::string asString(const ProgramFromPatchtokens &prog); +std::string asString(const KernelFromPatchtokens &kern); +std::string asString(const KernelArgFromPatchtokens &arg, const std::string &indent); + +} // namespace PatchTokenBinary + +} // namespace NEO diff --git a/runtime/compiler_interface/patchtokens_validator.inl b/runtime/compiler_interface/patchtokens_validator.inl new file mode 100644 index 0000000000..01c48dc0eb --- /dev/null +++ b/runtime/compiler_interface/patchtokens_validator.inl @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "runtime/compiler_interface/patchtokens_decoder.h" +#include "runtime/helpers/hw_info.h" + +#include "igfxfmid.h" + +#include + +namespace NEO { + +namespace PatchTokenBinary { + +enum class ValidatorError { + Success = 0, + Undefined = 1, + InvalidBinary = 2, + NotEnoughSlm = 3, +}; + +constexpr bool isDeviceSupported(GFXCORE_FAMILY device) { + return (device < (sizeof(familyEnabled) / sizeof(familyEnabled[0]))) && familyEnabled[device]; +} + +template +inline ValidatorError validate(const ProgramFromPatchtokens &decodedProgram, + size_t sharedLocalMemorySize, + const UknownTokenValidatorT &tokenValidator, + std::string &outErrReason, std::string &outWarnings) { + if (decodedProgram.decodeStatus != PatchTokenBinary::DecoderError::Success) { + outErrReason = "ProgramFromPatchtokens wasn't successfully decoded"; + return ValidatorError::InvalidBinary; + } + + if (decodedProgram.programScopeTokens.allocateConstantMemorySurface.size() > 1) { + outErrReason = "Unhandled number of global constants surfaces > 1"; + return ValidatorError::InvalidBinary; + } + + if (decodedProgram.programScopeTokens.allocateGlobalMemorySurface.size() > 1) { + outErrReason = "Unhandled number of global variables surfaces > 1"; + return ValidatorError::InvalidBinary; + } + + for (const auto &globalConstantPointerToken : decodedProgram.programScopeTokens.constantPointer) { + bool isUnhandled = (globalConstantPointerToken->ConstantBufferIndex != 0); + isUnhandled |= (globalConstantPointerToken->BufferIndex != 0); + isUnhandled |= (globalConstantPointerToken->BufferType != PROGRAM_SCOPE_CONSTANT_BUFFER); + isUnhandled |= (0 == decodedProgram.programScopeTokens.allocateConstantMemorySurface.size()) || decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]->InlineDataSize < globalConstantPointerToken->ConstantPointerOffset + sizeof(uint32_t); + + if (isUnhandled) { + outErrReason = "Unhandled SPatchConstantPointerProgramBinaryInfo"; + return ValidatorError::InvalidBinary; + } + } + + for (const auto &globalVariablePointerToken : decodedProgram.programScopeTokens.globalPointer) { + bool isUnhandled = (globalVariablePointerToken->GlobalBufferIndex != 0); + isUnhandled |= (globalVariablePointerToken->BufferIndex != 0); + isUnhandled |= (globalVariablePointerToken->BufferType != PROGRAM_SCOPE_GLOBAL_BUFFER); + isUnhandled |= (0 == decodedProgram.programScopeTokens.allocateGlobalMemorySurface.size()) || decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]->InlineDataSize < globalVariablePointerToken->GlobalPointerOffset + sizeof(uint32_t); + + if (isUnhandled) { + outErrReason = "Unhandled SPatchGlobalPointerProgramBinaryInfo"; + return ValidatorError::InvalidBinary; + } + } + + for (const auto &unhandledToken : decodedProgram.unhandledTokens) { + if (false == tokenValidator.isSafeToSkipUnhandledToken(unhandledToken->Token)) { + outErrReason = "Unhandled required program-scope Patch Token : " + std::to_string(unhandledToken->Token); + return ValidatorError::InvalidBinary; + } else { + outWarnings = "Unknown program-scope Patch Token : " + std::to_string(unhandledToken->Token); + } + } + + UNRECOVERABLE_IF(nullptr == decodedProgram.header); + if (decodedProgram.header->Version != CURRENT_ICBE_VERSION) { + outErrReason = "Unhandled Version of Patchtokens: expected: " + std::to_string(CURRENT_ICBE_VERSION) + ", got: " + std::to_string(decodedProgram.header->Version); + return ValidatorError::InvalidBinary; + } + + if (false == isDeviceSupported(static_cast(decodedProgram.header->Device))) { + outErrReason = "Unsupported device binary, device GFXCORE_FAMILY : " + std::to_string(decodedProgram.header->Device); + return ValidatorError::InvalidBinary; + } + + for (const auto &decodedKernel : decodedProgram.kernels) { + if (decodedKernel.decodeStatus != PatchTokenBinary::DecoderError::Success) { + outErrReason = "KernelFromPatchtokens wasn't successfully decoded"; + return ValidatorError::InvalidBinary; + } + + UNRECOVERABLE_IF(nullptr == decodedKernel.header); + if (hasInvalidChecksum(decodedKernel)) { + outErrReason = "KernelFromPatchtokens has invalid checksum"; + return ValidatorError::InvalidBinary; + } + + if (decodedKernel.tokens.allocateLocalSurface) { + if (sharedLocalMemorySize < decodedKernel.tokens.allocateLocalSurface->TotalInlineLocalMemorySize) { + outErrReason = "KernelFromPatchtokens requires too much SLM"; + return ValidatorError::NotEnoughSlm; + } + } + + for (const auto &unhandledToken : decodedKernel.unhandledTokens) { + if (false == tokenValidator.isSafeToSkipUnhandledToken(unhandledToken->Token)) { + outErrReason = "Unhandled required kernel-scope Patch Token : " + std::to_string(unhandledToken->Token); + return ValidatorError::InvalidBinary; + } else { + outWarnings = "Unknown kernel-scope Patch Token : " + std::to_string(unhandledToken->Token); + } + } + } + + return ValidatorError::Success; +} + +} // namespace PatchTokenBinary + +} // namespace NEO diff --git a/runtime/device_queue/device_queue_hw_bdw_plus.inl b/runtime/device_queue/device_queue_hw_bdw_plus.inl index 6ec0a94652..f8640de113 100644 --- a/runtime/device_queue/device_queue_hw_bdw_plus.inl +++ b/runtime/device_queue/device_queue_hw_bdw_plus.inl @@ -6,6 +6,7 @@ */ #include "runtime/device_queue/device_queue_hw_base.inl" +#include "runtime/program/block_kernel_manager.h" namespace NEO { diff --git a/runtime/helpers/hardware_commands_helper.h b/runtime/helpers/hardware_commands_helper.h index b6338cff2f..39a47f4321 100644 --- a/runtime/helpers/hardware_commands_helper.h +++ b/runtime/helpers/hardware_commands_helper.h @@ -164,42 +164,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { static size_t getTotalSizeRequiredSSH( const MultiDispatchInfo &multiDispatchInfo); - static size_t getSizeRequiredForExecutionModel(IndirectHeap::Type heapType, const Kernel &kernel) { - typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE; - - size_t totalSize = 0; - BlockKernelManager *blockManager = kernel.getProgram()->getBlockKernelManager(); - uint32_t blockCount = static_cast(blockManager->getCount()); - uint32_t maxBindingTableCount = 0; - - if (heapType == IndirectHeap::SURFACE_STATE) { - totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1; - - for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - totalSize += pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize; - totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - - maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState->Count); - } - } - - if (heapType == IndirectHeap::INDIRECT_OBJECT || heapType == IndirectHeap::SURFACE_STATE) { - BuiltIns &builtIns = *kernel.getDevice().getExecutionEnvironment()->getBuiltIns(); - SchedulerKernel &scheduler = builtIns.getSchedulerKernel(kernel.getContext()); - - if (heapType == IndirectHeap::INDIRECT_OBJECT) { - totalSize += getSizeRequiredIOH(scheduler); - } else { - totalSize += getSizeRequiredSSH(scheduler); - - totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; - totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - } - } - return totalSize; - } - + static size_t getSizeRequiredForExecutionModel(IndirectHeap::Type heapType, const Kernel &kernel); static void setInterfaceDescriptorOffset( WALKER_TYPE *walkerCmd, uint32_t &interfaceDescriptorIndex); diff --git a/runtime/helpers/hardware_commands_helper.inl b/runtime/helpers/hardware_commands_helper.inl index b28a3ead92..fb85878634 100644 --- a/runtime/helpers/hardware_commands_helper.inl +++ b/runtime/helpers/hardware_commands_helper.inl @@ -17,6 +17,7 @@ #include "runtime/indirect_heap/indirect_heap.h" #include "runtime/kernel/kernel.h" #include "runtime/os_interface/debug_settings_manager.h" +#include "runtime/program/block_kernel_manager.h" #include @@ -131,6 +132,43 @@ size_t HardwareCommandsHelper::getTotalSizeRequiredSSH( return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel()); }); } +template +size_t HardwareCommandsHelper::getSizeRequiredForExecutionModel(IndirectHeap::Type heapType, const Kernel &kernel) { + typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE; + + size_t totalSize = 0; + BlockKernelManager *blockManager = kernel.getProgram()->getBlockKernelManager(); + uint32_t blockCount = static_cast(blockManager->getCount()); + uint32_t maxBindingTableCount = 0; + + if (heapType == IndirectHeap::SURFACE_STATE) { + totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1; + + for (uint32_t i = 0; i < blockCount; i++) { + const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + totalSize += pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize; + totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); + + maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState->Count); + } + } + + if (heapType == IndirectHeap::INDIRECT_OBJECT || heapType == IndirectHeap::SURFACE_STATE) { + BuiltIns &builtIns = *kernel.getDevice().getExecutionEnvironment()->getBuiltIns(); + SchedulerKernel &scheduler = builtIns.getSchedulerKernel(kernel.getContext()); + + if (heapType == IndirectHeap::INDIRECT_OBJECT) { + totalSize += getSizeRequiredIOH(scheduler); + } else { + totalSize += getSizeRequiredSSH(scheduler); + + totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; + totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); + } + } + return totalSize; +} + template size_t HardwareCommandsHelper::sendInterfaceDescriptorData( const IndirectHeap &indirectHeap, diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 1ca2c123b0..aba2c954b9 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -38,6 +38,7 @@ #include "runtime/memory_manager/surface.h" #include "runtime/os_interface/debug_settings_manager.h" #include "runtime/platform/platform.h" +#include "runtime/program/block_kernel_manager.h" #include "runtime/program/kernel_info.h" #include "runtime/sampler/sampler.h" @@ -1823,7 +1824,7 @@ void Kernel::ReflectionSurfaceHelper::getCurbeParams(std::vector(kernelInfo.heapInfo.pSsh) + kernelInfo.patchInfo.bindingTableState->Offset; + const void *ssh = static_cast(kernelInfo.heapInfo.pSsh) + kernelInfo.patchInfo.bindingTableState->Offset; for (uint32_t i = 0; i < kernelInfo.patchInfo.bindingTableState->Count; i++) { @@ -1869,11 +1870,9 @@ void Kernel::ReflectionSurfaceHelper::getCurbeParams(std::vectorType == DATA_PARAMETER_KERNEL_ARGUMENT) { - curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_KERNEL_ARGUMENT, param->DataSize, param->Offset, param->ArgumentNumber}); - tokenMask |= ((uint64_t)1 << DATA_PARAMETER_KERNEL_ARGUMENT); - } + for (auto param : kernelInfo.patchInfo.dataParameterBuffersKernelArgs) { + curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_KERNEL_ARGUMENT, param->DataSize, param->Offset, param->ArgumentNumber}); + tokenMask |= ((uint64_t)1 << DATA_PARAMETER_KERNEL_ARGUMENT); } for (uint32_t i = 0; i < 3; i++) { @@ -2319,4 +2318,13 @@ void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocati } } +void Kernel::setReflectionSurfaceBlockBtOffset(uint32_t blockID, uint32_t offset) { + DEBUG_BREAK_IF(blockID >= program->getBlockKernelManager()->getCount()); + ReflectionSurfaceHelper::setKernelAddressDataBtOffset(getKernelReflectionSurface()->getUnderlyingBuffer(), blockID, offset); +} + +bool Kernel::checkIfIsParentKernelAndBlocksUsesPrintf() { + return isParentKernel && getProgram()->getBlockKernelManager()->getIfBlockUsesPrintf(); +} + } // namespace NEO diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index dfe67b98fa..fde592acce 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -8,6 +8,7 @@ #pragma once #include "core/helpers/preamble.h" #include "core/unified_memory/unified_memory.h" +#include "core/utilities/stackvec.h" #include "runtime/api/cl_types.h" #include "runtime/command_stream/thread_arbitration_policy.h" #include "runtime/device_queue/device_queue.h" @@ -23,6 +24,7 @@ namespace NEO { struct CompletionStamp; class Buffer; +class CommandStreamReceiver; class GraphicsAllocation; class ImageTransformer; class Surface; @@ -294,10 +296,7 @@ class Kernel : public BaseObject<_cl_kernel> { bool hasPrintfOutput() const; - void setReflectionSurfaceBlockBtOffset(uint32_t blockID, uint32_t offset) { - DEBUG_BREAK_IF(blockID >= program->getBlockKernelManager()->getCount()); - ReflectionSurfaceHelper::setKernelAddressDataBtOffset(getKernelReflectionSurface()->getUnderlyingBuffer(), blockID, offset); - } + void setReflectionSurfaceBlockBtOffset(uint32_t blockID, uint32_t offset); cl_int checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, @@ -353,9 +352,7 @@ class Kernel : public BaseObject<_cl_kernel> { return ThreadArbitrationPolicy::AgeBased; } } - bool checkIfIsParentKernelAndBlocksUsesPrintf() { - return isParentKernel && getProgram()->getBlockKernelManager()->getIfBlockUsesPrintf(); - } + bool checkIfIsParentKernelAndBlocksUsesPrintf(); bool is32Bit() const { return kernelInfo.gpuPointerSize == 4; diff --git a/runtime/kernel/kernel.inl b/runtime/kernel/kernel.inl index 6f7761dbc2..6fcfc2fab3 100644 --- a/runtime/kernel/kernel.inl +++ b/runtime/kernel/kernel.inl @@ -6,6 +6,7 @@ */ #include "runtime/kernel/kernel.h" +#include "runtime/program/block_kernel_manager.h" #include "runtime/program/printf_handler.h" namespace NEO { diff --git a/runtime/program/CMakeLists.txt b/runtime/program/CMakeLists.txt index 9275c41bc9..81323f7b46 100644 --- a/runtime/program/CMakeLists.txt +++ b/runtime/program/CMakeLists.txt @@ -19,6 +19,8 @@ set(RUNTIME_SRCS_PROGRAM ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_info.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info.h + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_from_patchtokens.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_from_patchtokens.h ${CMAKE_CURRENT_SOURCE_DIR}/link.cpp ${CMAKE_CURRENT_SOURCE_DIR}/patch_info.h ${CMAKE_CURRENT_SOURCE_DIR}/print_formatter.cpp diff --git a/runtime/program/build.cpp b/runtime/program/build.cpp index 6efa6a8ce0..66ed1483bc 100644 --- a/runtime/program/build.cpp +++ b/runtime/program/build.cpp @@ -13,6 +13,7 @@ #include "runtime/helpers/validators.h" #include "runtime/os_interface/debug_settings_manager.h" #include "runtime/platform/platform.h" +#include "runtime/program/kernel_info.h" #include "runtime/program/program.h" #include "runtime/source_level_debugger/source_level_debugger.h" @@ -197,15 +198,6 @@ cl_int Program::build(const cl_device_id device, const char *buildOptions, bool return ret; } -cl_int Program::build( - const char *pKernelData, - size_t kernelDataSize) { - cl_int retVal = CL_SUCCESS; - processKernel(pKernelData, 0U, retVal); - - return retVal; -} - void Program::extractInternalOptions(std::string &options) { for (auto &optionString : internalOptionsToExtract) { size_t pos = options.find(optionString); diff --git a/runtime/program/create.inl b/runtime/program/create.inl index 94a6ddf83b..e3f9fee3f6 100644 --- a/runtime/program/create.inl +++ b/runtime/program/create.inl @@ -7,6 +7,7 @@ #include "runtime/context/context.h" #include "runtime/device/device.h" +#include "runtime/helpers/string_helpers.h" #include "runtime/os_interface/debug_settings_manager.h" #include "runtime/program/program.h" diff --git a/runtime/program/heap_info.h b/runtime/program/heap_info.h index 68352927f7..a55195a38e 100644 --- a/runtime/program/heap_info.h +++ b/runtime/program/heap_info.h @@ -13,25 +13,11 @@ namespace NEO { struct HeapInfo { - const SKernelBinaryHeaderCommon *pKernelHeader; - const void *pKernelHeap; - const void *pGsh; - const void *pDsh; - void *pSsh; - const void *pPatchList; - const void *pBlob; - size_t blobSize; - - HeapInfo() { - pKernelHeader = nullptr; - pKernelHeap = nullptr; - pGsh = nullptr; - pDsh = nullptr; - pSsh = nullptr; - pPatchList = nullptr; - pBlob = nullptr; - blobSize = 0; - } + const SKernelBinaryHeaderCommon *pKernelHeader = nullptr; + const void *pKernelHeap = nullptr; + const void *pGsh = nullptr; + const void *pDsh = nullptr; + const void *pSsh = nullptr; }; } // namespace NEO diff --git a/runtime/program/kernel_info.cpp b/runtime/program/kernel_info.cpp index d21c129308..882064bc4b 100644 --- a/runtime/program/kernel_info.cpp +++ b/runtime/program/kernel_info.cpp @@ -8,6 +8,7 @@ #include "core/helpers/aligned_memory.h" #include "core/helpers/ptr_math.h" #include "core/helpers/string.h" +#include "runtime/compiler_interface/patchtokens_decoder.h" #include "runtime/device/device.h" #include "runtime/gen_common/hw_cmds.h" #include "runtime/helpers/dispatch_info.h" @@ -202,44 +203,57 @@ KernelInfo::~KernelInfo() { delete[] crossThreadData; } -cl_int KernelInfo::storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo) { - cl_int retVal = CL_SUCCESS; - - if (pkernelArgInfo == nullptr) { - retVal = CL_INVALID_BINARY; - } else { - uint32_t argNum = pkernelArgInfo->ArgumentNumber; - auto pCurArgAttrib = ptrOffset( - reinterpret_cast(pkernelArgInfo), - sizeof(SPatchKernelArgumentInfo)); - - resizeKernelArgInfoAndRegisterParameter(argNum); - - kernelArgInfo[argNum].addressQualifierStr = pCurArgAttrib; - pCurArgAttrib += pkernelArgInfo->AddressQualifierSize; - - kernelArgInfo[argNum].accessQualifierStr = pCurArgAttrib; - pCurArgAttrib += pkernelArgInfo->AccessQualifierSize; - - kernelArgInfo[argNum].name = pCurArgAttrib; - pCurArgAttrib += pkernelArgInfo->ArgumentNameSize; - - { - auto argType = strchr(pCurArgAttrib, ';'); - DEBUG_BREAK_IF(argType == nullptr); - - kernelArgInfo[argNum].typeStr.assign(pCurArgAttrib, argType - pCurArgAttrib); - pCurArgAttrib += pkernelArgInfo->TypeNameSize; - - ++argType; - } - - kernelArgInfo[argNum].typeQualifierStr = pCurArgAttrib; - - patchInfo.kernelArgumentInfo.push_back(pkernelArgInfo); +void KernelInfo::storePatchToken(const SPatchExecutionEnvironment *execEnv) { + this->patchInfo.executionEnvironment = execEnv; + if (execEnv->RequiredWorkGroupSizeX != 0) { + this->reqdWorkGroupSize[0] = execEnv->RequiredWorkGroupSizeX; + this->reqdWorkGroupSize[1] = execEnv->RequiredWorkGroupSizeY; + this->reqdWorkGroupSize[2] = execEnv->RequiredWorkGroupSizeZ; + DEBUG_BREAK_IF(!(execEnv->RequiredWorkGroupSizeY > 0)); + DEBUG_BREAK_IF(!(execEnv->RequiredWorkGroupSizeZ > 0)); + } + this->workgroupWalkOrder[0] = 0; + this->workgroupWalkOrder[1] = 1; + this->workgroupWalkOrder[2] = 2; + if (execEnv->WorkgroupWalkOrderDims) { + constexpr auto dimensionMask = 0b11; + constexpr auto dimensionSize = 2; + this->workgroupWalkOrder[0] = execEnv->WorkgroupWalkOrderDims & dimensionMask; + this->workgroupWalkOrder[1] = (execEnv->WorkgroupWalkOrderDims >> dimensionSize) & dimensionMask; + this->workgroupWalkOrder[2] = (execEnv->WorkgroupWalkOrderDims >> dimensionSize * 2) & dimensionMask; + this->requiresWorkGroupOrder = true; } - return retVal; + for (uint32_t i = 0; i < 3; ++i) { + // inverts the walk order mapping (from ORDER_ID->DIM_ID to DIM_ID->ORDER_ID) + this->workgroupDimensionsOrder[this->workgroupWalkOrder[i]] = i; + } + + if (execEnv->CompiledForGreaterThan4GBBuffers == false) { + this->requiresSshForBuffers = true; + } +} + +void KernelInfo::storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo) { + if (pkernelArgInfo == nullptr) { + return; + } + + uint32_t argNum = pkernelArgInfo->ArgumentNumber; + resizeKernelArgInfoAndRegisterParameter(argNum); + + auto inlineData = PatchTokenBinary::getInlineData(pkernelArgInfo); + + kernelArgInfo[argNum].addressQualifierStr = std::string(inlineData.addressQualifier.begin(), inlineData.addressQualifier.end()).c_str(); + kernelArgInfo[argNum].accessQualifierStr = std::string(inlineData.accessQualifier.begin(), inlineData.accessQualifier.end()).c_str(); + kernelArgInfo[argNum].name = std::string(inlineData.argName.begin(), inlineData.argName.end()).c_str(); + + auto argTypeDelim = strchr(inlineData.typeName.begin(), ';'); + DEBUG_BREAK_IF(argTypeDelim == nullptr); + kernelArgInfo[argNum].typeStr = std::string(inlineData.typeName.begin(), ptrDiff(argTypeDelim, inlineData.typeName.begin())).c_str(); + kernelArgInfo[argNum].typeQualifierStr = std::string(inlineData.typeQualifiers.begin(), inlineData.typeQualifiers.end()).c_str(); + + patchInfo.kernelArgumentInfo.push_back(pkernelArgInfo); } void KernelInfo::storeKernelArgument( @@ -380,6 +394,7 @@ void KernelInfo::storePatchToken(const SPatchString *pStringArg) { } void KernelInfo::storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo) { + this->patchInfo.pKernelAttributesInfo = pKernelAttributesInfo; attributes = reinterpret_cast(pKernelAttributesInfo) + sizeof(SPatchKernelAttributesInfo); auto start = attributes.find("intel_reqd_sub_group_size("); diff --git a/runtime/program/kernel_info.h b/runtime/program/kernel_info.h index c1869dde2e..d04a2568c0 100644 --- a/runtime/program/kernel_info.h +++ b/runtime/program/kernel_info.h @@ -38,47 +38,24 @@ extern std::unordered_map addressQualifierMap; extern std::map typeSizeMap; struct WorkloadInfo { - uint32_t globalWorkOffsetOffsets[3]; - uint32_t globalWorkSizeOffsets[3]; - uint32_t localWorkSizeOffsets[3]; - uint32_t localWorkSizeOffsets2[3]; - uint32_t enqueuedLocalWorkSizeOffsets[3]; - uint32_t numWorkGroupsOffset[3]; - uint32_t maxWorkGroupSizeOffset; - uint32_t workDimOffset; - uint32_t slmStaticSize = 0; - uint32_t simdSizeOffset; - uint32_t parentEventOffset; - uint32_t preferredWkgMultipleOffset; - static const uint32_t undefinedOffset; static const uint32_t invalidParentEvent; - WorkloadInfo() { - globalWorkOffsetOffsets[0] = undefinedOffset; - globalWorkOffsetOffsets[1] = undefinedOffset; - globalWorkOffsetOffsets[2] = undefinedOffset; - globalWorkSizeOffsets[0] = undefinedOffset; - globalWorkSizeOffsets[1] = undefinedOffset; - globalWorkSizeOffsets[2] = undefinedOffset; - localWorkSizeOffsets[0] = undefinedOffset; - localWorkSizeOffsets[1] = undefinedOffset; - localWorkSizeOffsets[2] = undefinedOffset; - localWorkSizeOffsets2[0] = undefinedOffset; - localWorkSizeOffsets2[1] = undefinedOffset; - localWorkSizeOffsets2[2] = undefinedOffset; - enqueuedLocalWorkSizeOffsets[0] = undefinedOffset; - enqueuedLocalWorkSizeOffsets[1] = undefinedOffset; - enqueuedLocalWorkSizeOffsets[2] = undefinedOffset; - numWorkGroupsOffset[0] = undefinedOffset; - numWorkGroupsOffset[1] = undefinedOffset; - numWorkGroupsOffset[2] = undefinedOffset; - maxWorkGroupSizeOffset = undefinedOffset; - workDimOffset = undefinedOffset; - simdSizeOffset = undefinedOffset; - parentEventOffset = undefinedOffset; - preferredWkgMultipleOffset = undefinedOffset; - } + uint32_t globalWorkOffsetOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; + uint32_t globalWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; + uint32_t localWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; + uint32_t localWorkSizeOffsets2[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; + uint32_t enqueuedLocalWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; + uint32_t numWorkGroupsOffset[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; + uint32_t maxWorkGroupSizeOffset = undefinedOffset; + uint32_t workDimOffset = undefinedOffset; + uint32_t slmStaticSize = 0; + uint32_t simdSizeOffset = undefinedOffset; + uint32_t parentEventOffset = undefinedOffset; + uint32_t preferredWkgMultipleOffset = undefinedOffset; + uint32_t privateMemoryStatelessSizeOffset = undefinedOffset; + uint32_t localMemoryStatelessWindowSizeOffset = undefinedOffset; + uint32_t localMemoryStatelessWindowStartAddressOffset = undefinedOffset; }; static const float YTilingRatioValue = 1.3862943611198906188344642429164f; @@ -115,24 +92,12 @@ struct DebugData { struct KernelInfo { public: - KernelInfo() { - heapInfo = {}; - patchInfo = {}; - workloadInfo = {}; - kernelArgInfo = {}; - kernelNonArgInfo = {}; - childrenKernelsIdOffset = {}; - reqdWorkGroupSize[0] = WorkloadInfo::undefinedOffset; - reqdWorkGroupSize[1] = WorkloadInfo::undefinedOffset; - reqdWorkGroupSize[2] = WorkloadInfo::undefinedOffset; - } - + KernelInfo() = default; KernelInfo(const KernelInfo &) = delete; KernelInfo &operator=(const KernelInfo &) = delete; - ~KernelInfo(); - cl_int storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo); + void storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo); void storeKernelArgument(const SPatchDataParameterBuffer *pDataParameterKernelArg); void storeKernelArgument(const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg); void storeKernelArgument(const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg); @@ -140,6 +105,7 @@ struct KernelInfo { void storeKernelArgument(const SPatchStatelessConstantMemoryObjectKernelArgument *pStatelessConstMemObjKernelArg); void storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg); void storeKernelArgument(const SPatchSamplerKernelArgument *pSamplerKernelArg); + void storePatchToken(const SPatchExecutionEnvironment *execEnv); void storePatchToken(const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg); void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg); void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg); @@ -216,18 +182,18 @@ struct KernelInfo { std::string name; std::string attributes; - HeapInfo heapInfo; - PatchInfo patchInfo; + HeapInfo heapInfo = {}; + PatchInfo patchInfo = {}; std::vector kernelArgInfo; std::vector kernelNonArgInfo; - WorkloadInfo workloadInfo; + WorkloadInfo workloadInfo = {}; std::vector> childrenKernelsIdOffset; bool usesSsh = false; bool requiresSshForBuffers = false; bool isValid = false; bool isVmeWorkload = false; char *crossThreadData = nullptr; - size_t reqdWorkGroupSize[3]; + size_t reqdWorkGroupSize[3] = {WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset}; size_t requiredSubGroupSize = 0; std::array workgroupWalkOrder = {{0, 1, 2}}; std::array workgroupDimensionsOrder = {{0, 1, 2}}; diff --git a/runtime/program/kernel_info_from_patchtokens.cpp b/runtime/program/kernel_info_from_patchtokens.cpp new file mode 100644 index 0000000000..8cd1ae99d2 --- /dev/null +++ b/runtime/program/kernel_info_from_patchtokens.cpp @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "runtime/program/kernel_info_from_patchtokens.h" + +#include "runtime/compiler_interface/patchtokens_decoder.h" +#include "runtime/program/kernel_info.h" + +namespace NEO { + +using namespace iOpenCL; + +template +inline void storeTokenIfNotNull(KernelInfo &kernelInfo, T *token) { + if (token != nullptr) { + kernelInfo.storePatchToken(token); + } +} + +template +inline uint32_t getOffset(T *token) { + if (token != nullptr) { + return token->Offset; + } + return WorkloadInfo::undefinedOffset; +} + +void populateKernelInfoArg(KernelInfo &dstKernelInfo, KernelArgInfo &dstKernelInfoArg, const PatchTokenBinary::KernelArgFromPatchtokens src) { + dstKernelInfoArg.needPatch = true; + dstKernelInfo.storeArgInfo(src.argInfo); + if (src.objectArg != nullptr) { + switch (src.objectArg->Token) { + default: + UNRECOVERABLE_IF(true); + case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT: + dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); + break; + case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT: + dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); + break; + case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: + dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); + break; + case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: + dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); + break; + case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT: + dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); + break; + case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT: + dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); + break; + } + } + + switch (src.objectType) { + default: + UNRECOVERABLE_IF(PatchTokenBinary::ArgObjectType::None != src.objectType); + break; + case PatchTokenBinary::ArgObjectType::Buffer: + dstKernelInfoArg.offsetBufferOffset = getOffset(src.metadata.buffer.bufferOffset); + dstKernelInfoArg.pureStatefulBufferAccess = (src.metadata.buffer.pureStateful != nullptr); + break; + case PatchTokenBinary::ArgObjectType::Image: + dstKernelInfoArg.offsetImgWidth = getOffset(src.metadata.image.width); + dstKernelInfoArg.offsetImgHeight = getOffset(src.metadata.image.height); + dstKernelInfoArg.offsetImgDepth = getOffset(src.metadata.image.depth); + dstKernelInfoArg.offsetChannelDataType = getOffset(src.metadata.image.channelDataType); + dstKernelInfoArg.offsetChannelOrder = getOffset(src.metadata.image.channelOrder); + dstKernelInfoArg.offsetArraySize = getOffset(src.metadata.image.arraySize); + dstKernelInfoArg.offsetNumSamples = getOffset(src.metadata.image.numSamples); + dstKernelInfoArg.offsetNumMipLevels = getOffset(src.metadata.image.numMipLevels); + break; + case PatchTokenBinary::ArgObjectType::Sampler: + dstKernelInfoArg.offsetSamplerSnapWa = getOffset(src.metadata.sampler.coordinateSnapWaRequired); + dstKernelInfoArg.offsetSamplerAddressingMode = getOffset(src.metadata.sampler.addressMode); + dstKernelInfoArg.offsetSamplerNormalizedCoords = getOffset(src.metadata.sampler.normalizedCoords); + break; + case PatchTokenBinary::ArgObjectType::Slm: + dstKernelInfoArg.slmAlignment = src.metadata.slm.token->SourceOffset; + break; + } + + switch (src.objectTypeSpecialized) { + default: + UNRECOVERABLE_IF(PatchTokenBinary::ArgObjectTypeSpecialized::None != src.objectTypeSpecialized); + break; + case PatchTokenBinary::ArgObjectTypeSpecialized::Vme: + dstKernelInfoArg.offsetVmeMbBlockType = getOffset(src.metadataSpecialized.vme.mbBlockType); + dstKernelInfoArg.offsetVmeSubpixelMode = getOffset(src.metadataSpecialized.vme.subpixelMode); + dstKernelInfoArg.offsetVmeSadAdjustMode = getOffset(src.metadataSpecialized.vme.sadAdjustMode); + dstKernelInfoArg.offsetVmeSearchPathType = getOffset(src.metadataSpecialized.vme.searchPathType); + break; + } + + for (auto &byValArg : src.byValMap) { + dstKernelInfo.storeKernelArgument(byValArg); + if (byValArg->Type == DATA_PARAMETER_KERNEL_ARGUMENT) { + dstKernelInfo.patchInfo.dataParameterBuffersKernelArgs.push_back(byValArg); + } + } + + dstKernelInfoArg.offsetObjectId = getOffset(src.objectId); +} + +void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src) { + dst.heapInfo.pKernelHeader = src.header; + dst.name = std::string(src.name.begin(), src.name.end()).c_str(); + dst.heapInfo.pKernelHeap = src.isa.begin(); + dst.heapInfo.pGsh = src.heaps.generalState.begin(); + dst.heapInfo.pDsh = src.heaps.dynamicState.begin(); + dst.heapInfo.pSsh = src.heaps.surfaceState.begin(); + + storeTokenIfNotNull(dst, src.tokens.executionEnvironment); + dst.patchInfo.samplerStateArray = src.tokens.samplerStateArray; + dst.patchInfo.bindingTableState = src.tokens.bindingTableState; + dst.usesSsh = src.tokens.bindingTableState && (src.tokens.bindingTableState->Count > 0); + dst.patchInfo.localsurface = src.tokens.allocateLocalSurface; + dst.workloadInfo.slmStaticSize = src.tokens.allocateLocalSurface ? src.tokens.allocateLocalSurface->TotalInlineLocalMemorySize : 0U; + dst.patchInfo.mediavfestate = src.tokens.mediaVfeState[0]; + dst.patchInfo.mediaVfeStateSlot1 = src.tokens.mediaVfeState[1]; + dst.patchInfo.interfaceDescriptorDataLoad = src.tokens.mediaInterfaceDescriptorLoad; + dst.patchInfo.interfaceDescriptorData = src.tokens.interfaceDescriptorData; + dst.patchInfo.threadPayload = src.tokens.threadPayload; + dst.patchInfo.dataParameterStream = src.tokens.dataParameterStream; + + dst.patchInfo.kernelArgumentInfo.reserve(src.tokens.kernelArgs.size()); + dst.kernelArgInfo.resize(src.tokens.kernelArgs.size()); + dst.argumentsToPatchNum = static_cast(src.tokens.kernelArgs.size()); + + for (size_t i = 0U; i < src.tokens.kernelArgs.size(); ++i) { + auto &decodedKernelArg = src.tokens.kernelArgs[i]; + auto &kernelInfoArg = dst.kernelArgInfo[i]; + populateKernelInfoArg(dst, kernelInfoArg, decodedKernelArg); + } + + storeTokenIfNotNull(dst, src.tokens.kernelAttributesInfo); + storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrivateSurface); + storeTokenIfNotNull(dst, src.tokens.allocateStatelessConstantMemorySurfaceWithInitialization); + storeTokenIfNotNull(dst, src.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization); + storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrintfSurface); + storeTokenIfNotNull(dst, src.tokens.allocateStatelessEventPoolSurface); + storeTokenIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface); + + for (auto &str : src.tokens.strings) { + dst.storePatchToken(str); + } + + dst.isVmeWorkload = dst.isVmeWorkload || (src.tokens.inlineVmeSamplerInfo != nullptr); + dst.systemKernelOffset = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U; + storeTokenIfNotNull(dst, src.tokens.allocateSystemThreadSurface); + + for (uint32_t i = 0; i < 3U; ++i) { + dst.workloadInfo.localWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize[i]); + dst.workloadInfo.localWorkSizeOffsets2[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize2[i]); + dst.workloadInfo.globalWorkOffsetOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkOffset[i]); + dst.workloadInfo.enqueuedLocalWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[i]); + dst.workloadInfo.globalWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkSize[i]); + dst.workloadInfo.numWorkGroupsOffset[i] = getOffset(src.tokens.crossThreadPayloadArgs.numWorkGroups[i]); + } + + dst.workloadInfo.maxWorkGroupSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.maxWorkGroupSize); + dst.workloadInfo.workDimOffset = getOffset(src.tokens.crossThreadPayloadArgs.workDimensions); + dst.workloadInfo.simdSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.simdSize); + dst.workloadInfo.parentEventOffset = getOffset(src.tokens.crossThreadPayloadArgs.parentEvent); + dst.workloadInfo.preferredWkgMultipleOffset = getOffset(src.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple); + dst.workloadInfo.privateMemoryStatelessSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize); + dst.workloadInfo.localMemoryStatelessWindowSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize); + dst.workloadInfo.localMemoryStatelessWindowStartAddressOffset = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress); + for (auto &childSimdSize : src.tokens.crossThreadPayloadArgs.childBlockSimdSize) { + dst.childrenKernelsIdOffset.push_back({childSimdSize->ArgumentNumber, childSimdSize->Offset}); + } + + if (src.tokens.gtpinInfo) { + dst.igcInfoForGtpin = reinterpret_cast(src.tokens.gtpinInfo + 1); + } + + dst.isValid = (false == NEO::PatchTokenBinary::hasInvalidChecksum(src)); +} + +} // namespace NEO diff --git a/runtime/program/kernel_info_from_patchtokens.h b/runtime/program/kernel_info_from_patchtokens.h new file mode 100644 index 0000000000..53a77889fa --- /dev/null +++ b/runtime/program/kernel_info_from_patchtokens.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +namespace NEO { + +struct KernelInfo; + +namespace PatchTokenBinary { +struct KernelFromPatchtokens; +} + +void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src); + +} // namespace NEO diff --git a/runtime/program/link.cpp b/runtime/program/link.cpp index 878ee68a7e..b28052956f 100644 --- a/runtime/program/link.cpp +++ b/runtime/program/link.cpp @@ -7,6 +7,7 @@ #include "core/compiler_interface/compiler_interface.h" #include "core/elf/writer.h" +#include "core/utilities/stackvec.h" #include "runtime/compiler_interface/compiler_options.h" #include "runtime/device/device.h" #include "runtime/helpers/validators.h" diff --git a/runtime/program/patch_info.h b/runtime/program/patch_info.h index e8740cc66e..9929c7c9dc 100644 --- a/runtime/program/patch_info.h +++ b/runtime/program/patch_info.h @@ -54,7 +54,7 @@ struct PatchInfo { const SPatchInterfaceDescriptorData *interfaceDescriptorData = nullptr; const SPatchSamplerStateArray *samplerStateArray = nullptr; const SPatchBindingTableState *bindingTableState = nullptr; - ::std::vector dataParameterBuffers; + ::std::vector dataParameterBuffersKernelArgs; ::std::vector statelessGlobalMemObjKernelArgs; ::std::vector @@ -74,9 +74,6 @@ struct PatchInfo { const SPatchAllocateSystemThreadSurface *pAllocateSystemThreadSurface = nullptr; ::std::unordered_map stringDataMap; ::std::vector kernelArgumentInfo; - - PatchInfo() { - } }; } // namespace NEO diff --git a/runtime/program/process_elf_binary.cpp b/runtime/program/process_elf_binary.cpp index d7831c9052..c937e0a3cc 100644 --- a/runtime/program/process_elf_binary.cpp +++ b/runtime/program/process_elf_binary.cpp @@ -9,6 +9,7 @@ #include "core/elf/reader.h" #include "core/elf/writer.h" #include "core/helpers/string.h" +#include "runtime/compiler_interface/patchtokens_decoder.h" #include "program.h" @@ -61,12 +62,12 @@ cl_int Program::processElfBinary( break; case CLElfLib::E_SH_TYPE::SH_TYPE_OPENCL_DEV_BINARY: - if (sectionHeader.DataSize > 0 && validateGenBinaryHeader(reinterpret_cast(elfReader.getSectionData(sectionHeader.DataOffset)))) { + if (sectionHeader.DataSize > 0 && validateGenBinaryHeader(reinterpret_cast(elfReader.getSectionData(sectionHeader.DataOffset)))) { this->genBinary = makeCopy(elfReader.getSectionData(sectionHeader.DataOffset), static_cast(sectionHeader.DataSize)); this->genBinarySize = static_cast(sectionHeader.DataSize); isCreatedFromBinary = true; } else { - getProgramCompilerVersion(reinterpret_cast(elfReader.getSectionData(sectionHeader.DataOffset)), binaryVersion); + binaryVersion = reinterpret_cast(elfReader.getSectionData(sectionHeader.DataOffset))->Version; return CL_INVALID_BINARY; } break; diff --git a/runtime/program/process_gen_binary.cpp b/runtime/program/process_gen_binary.cpp index bf86a92c16..9d8efdf41c 100644 --- a/runtime/program/process_gen_binary.cpp +++ b/runtime/program/process_gen_binary.cpp @@ -7,14 +7,18 @@ #include "core/helpers/aligned_memory.h" #include "core/helpers/debug_helpers.h" -#include "core/helpers/hash.h" #include "core/helpers/ptr_math.h" #include "core/helpers/string.h" #include "core/memory_manager/unified_memory_manager.h" +#include "runtime/compiler_interface/patchtokens_decoder.h" +#include "runtime/compiler_interface/patchtokens_dumper.h" +#include "runtime/compiler_interface/patchtokens_validator.inl" #include "runtime/context/context.h" #include "runtime/device/device.h" #include "runtime/gtpin/gtpin_notify.h" #include "runtime/memory_manager/memory_manager.h" +#include "runtime/program/kernel_info.h" +#include "runtime/program/kernel_info_from_patchtokens.h" #include "runtime/program/program.h" #include "patch_list.h" @@ -62,815 +66,73 @@ std::string Program::getKernelNamesString() const { return semiColonDelimitedKernelNameStr; } -size_t Program::processKernel( - const void *pKernelBlob, +void Program::populateKernelInfo( + const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram, uint32_t kernelNum, cl_int &retVal) { - size_t sizeProcessed = 0; - do { - auto pKernelInfo = new KernelInfo(); - if (!pKernelInfo) { - retVal = CL_OUT_OF_HOST_MEMORY; - break; - } + auto kernelInfo = std::make_unique(); + const PatchTokenBinary::KernelFromPatchtokens &decodedKernel = decodedProgram.kernels[kernelNum]; - auto pCurKernelPtr = pKernelBlob; - pKernelInfo->heapInfo.pBlob = pKernelBlob; + NEO::populateKernelInfo(*kernelInfo, decodedKernel); + retVal = kernelInfo->resolveKernelInfo(); + if (retVal != CL_SUCCESS) { + return; + } + kernelInfo->gpuPointerSize = decodedProgram.header->GPUPointerSizeInBytes; - pKernelInfo->heapInfo.pKernelHeader = reinterpret_cast(pCurKernelPtr); - pCurKernelPtr = ptrOffset(pCurKernelPtr, sizeof(SKernelBinaryHeaderCommon)); - - std::string readName{reinterpret_cast(pCurKernelPtr), pKernelInfo->heapInfo.pKernelHeader->KernelNameSize}; - pKernelInfo->name = readName.c_str(); - pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->KernelNameSize); - - pKernelInfo->heapInfo.pKernelHeap = pCurKernelPtr; - pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->KernelHeapSize); - - pKernelInfo->heapInfo.pGsh = pCurKernelPtr; - pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->GeneralStateHeapSize); - - pKernelInfo->heapInfo.pDsh = pCurKernelPtr; - pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->DynamicStateHeapSize); - - pKernelInfo->heapInfo.pSsh = const_cast(pCurKernelPtr); - pCurKernelPtr = ptrOffset(pCurKernelPtr, pKernelInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize); - - pKernelInfo->heapInfo.pPatchList = pCurKernelPtr; - - retVal = parsePatchList(*pKernelInfo, kernelNum); - if (retVal != CL_SUCCESS) { - delete pKernelInfo; - sizeProcessed = ptrDiff(pCurKernelPtr, pKernelBlob); - break; - } - - auto pKernelHeader = pKernelInfo->heapInfo.pKernelHeader; - auto pKernel = ptrOffset(pKernelBlob, sizeof(SKernelBinaryHeaderCommon)); - - if (genBinary) - pKernelInfo->gpuPointerSize = reinterpret_cast(genBinary.get())->GPUPointerSizeInBytes; - - uint32_t kernelSize = - pKernelHeader->DynamicStateHeapSize + - pKernelHeader->GeneralStateHeapSize + - pKernelHeader->KernelHeapSize + - pKernelHeader->KernelNameSize + - pKernelHeader->PatchListSize + - pKernelHeader->SurfaceStateHeapSize; - - pKernelInfo->heapInfo.blobSize = kernelSize + sizeof(SKernelBinaryHeaderCommon); - - uint32_t kernelCheckSum = pKernelInfo->heapInfo.pKernelHeader->CheckSum; - - uint64_t hashValue = Hash::hash(reinterpret_cast(pKernel), kernelSize); - - uint32_t calcCheckSum = hashValue & 0xFFFFFFFF; - pKernelInfo->isValid = (calcCheckSum == kernelCheckSum); - - retVal = CL_SUCCESS; - sizeProcessed = sizeof(SKernelBinaryHeaderCommon) + kernelSize; - kernelInfoArray.push_back(pKernelInfo); - if (pKernelInfo->hasDeviceEnqueue()) { - parentKernelInfoArray.push_back(pKernelInfo); - } - if (pKernelInfo->requiresSubgroupIndependentForwardProgress()) { - subgroupKernelInfoArray.push_back(pKernelInfo); - } - } while (false); - - return sizeProcessed; -} - -cl_int Program::parsePatchList(KernelInfo &kernelInfo, uint32_t kernelNum) { - cl_int retVal = CL_SUCCESS; - - auto pPatchList = kernelInfo.heapInfo.pPatchList; - auto patchListSize = kernelInfo.heapInfo.pKernelHeader->PatchListSize; - auto pCurPatchListPtr = pPatchList; - uint32_t PrivateMemoryStatelessSizeOffset = 0xFFffFFff; - uint32_t LocalMemoryStatelessWindowSizeOffset = 0xFFffFFff; - uint32_t LocalMemoryStatelessWindowStartAddressOffset = 0xFFffFFff; - - //Speed up containers by giving some pre-allocated storage - kernelInfo.kernelArgInfo.reserve(10); - kernelInfo.patchInfo.kernelArgumentInfo.reserve(10); - kernelInfo.patchInfo.dataParameterBuffers.reserve(20); - - DBG_LOG(LogPatchTokens, "\nPATCH_TOKENs for kernel", kernelInfo.name); - - while (ptrDiff(pCurPatchListPtr, pPatchList) < patchListSize) { - uint32_t index = 0; - uint32_t argNum = 0; - auto pPatch = reinterpret_cast(pCurPatchListPtr); - const SPatchDataParameterBuffer *pDataParameterBuffer = nullptr; - - switch (pPatch->Token) { - case PATCH_TOKEN_SAMPLER_STATE_ARRAY: - kernelInfo.patchInfo.samplerStateArray = - reinterpret_cast(pPatch); - DBG_LOG(LogPatchTokens, - "\n.SAMPLER_STATE_ARRAY", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .Offset", kernelInfo.patchInfo.samplerStateArray->Offset, - "\n .Count", kernelInfo.patchInfo.samplerStateArray->Count, - "\n .BorderColorOffset", kernelInfo.patchInfo.samplerStateArray->BorderColorOffset); - break; - - case PATCH_TOKEN_BINDING_TABLE_STATE: - kernelInfo.patchInfo.bindingTableState = - reinterpret_cast(pPatch); - kernelInfo.usesSsh = (kernelInfo.patchInfo.bindingTableState->Count > 0); - DBG_LOG(LogPatchTokens, - "\n.BINDING_TABLE_STATE", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .Offset", kernelInfo.patchInfo.bindingTableState->Offset, - "\n .Count", kernelInfo.patchInfo.bindingTableState->Count, - "\n .SurfaceStateOffset", kernelInfo.patchInfo.bindingTableState->SurfaceStateOffset); - break; - - case PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE: - kernelInfo.patchInfo.localsurface = - reinterpret_cast(pPatch); - kernelInfo.workloadInfo.slmStaticSize = kernelInfo.patchInfo.localsurface->TotalInlineLocalMemorySize; - DBG_LOG(LogPatchTokens, - "\n.ALLOCATE_LOCAL_SURFACE", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .TotalInlineLocalMemorySize", kernelInfo.patchInfo.localsurface->TotalInlineLocalMemorySize); - break; - - case PATCH_TOKEN_MEDIA_VFE_STATE: - kernelInfo.patchInfo.mediavfestate = - reinterpret_cast(pPatch); - DBG_LOG(LogPatchTokens, - "\n.MEDIA_VFE_STATE", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ScratchSpaceOffset", kernelInfo.patchInfo.mediavfestate->ScratchSpaceOffset, - "\n .PerThreadScratchSpace", kernelInfo.patchInfo.mediavfestate->PerThreadScratchSpace); - break; - - case PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1: - kernelInfo.patchInfo.mediaVfeStateSlot1 = - reinterpret_cast(pPatch); - DBG_LOG(LogPatchTokens, - "\n.MEDIA_VFE_STATE_SLOT1", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ScratchSpaceOffset", kernelInfo.patchInfo.mediaVfeStateSlot1->ScratchSpaceOffset, - "\n .PerThreadScratchSpace", kernelInfo.patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace); - break; - - case PATCH_TOKEN_DATA_PARAMETER_BUFFER: - DBG_LOG(LogPatchTokens, - "\n.DATA_PARAMETER_BUFFER", pPatch->Token, - "\n .Size", pPatch->Size); - - pDataParameterBuffer = reinterpret_cast(pPatch); - kernelInfo.patchInfo.dataParameterBuffers.push_back( - pDataParameterBuffer); - argNum = pDataParameterBuffer->ArgumentNumber; - switch (pDataParameterBuffer->Type) { - case DATA_PARAMETER_KERNEL_ARGUMENT: - kernelInfo.storeKernelArgument(pDataParameterBuffer); - DBG_LOG(LogPatchTokens, "\n .Type", "KERNEL_ARGUMENT"); - break; - - case DATA_PARAMETER_LOCAL_WORK_SIZE: { - DBG_LOG(LogPatchTokens, "\n .Type", "LOCAL_WORK_SIZE"); - index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t); - if (kernelInfo.workloadInfo.localWorkSizeOffsets[2] == WorkloadInfo::undefinedOffset) { - kernelInfo.workloadInfo.localWorkSizeOffsets[index] = - pDataParameterBuffer->Offset; - } else { - kernelInfo.workloadInfo.localWorkSizeOffsets2[index] = - pDataParameterBuffer->Offset; - } - break; - } - - case DATA_PARAMETER_GLOBAL_WORK_OFFSET: - DBG_LOG(LogPatchTokens, "\n .Type", "GLOBAL_WORK_OFFSET"); - index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t); - kernelInfo.workloadInfo.globalWorkOffsetOffsets[index] = - pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE: - DBG_LOG(LogPatchTokens, "\n .Type", "ENQUEUED_LOCAL_WORK_SIZE"); - index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t); - kernelInfo.workloadInfo.enqueuedLocalWorkSizeOffsets[index] = - pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_GLOBAL_WORK_SIZE: - DBG_LOG(LogPatchTokens, "\n .Type", "GLOBAL_WORK_SIZE"); - index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t); - kernelInfo.workloadInfo.globalWorkSizeOffsets[index] = - pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_NUM_WORK_GROUPS: - DBG_LOG(LogPatchTokens, "\n .Type", "NUM_WORK_GROUPS"); - index = pDataParameterBuffer->SourceOffset / sizeof(uint32_t); - kernelInfo.workloadInfo.numWorkGroupsOffset[index] = - pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_MAX_WORKGROUP_SIZE: - DBG_LOG(LogPatchTokens, "\n .Type", "MAX_WORKGROUP_SIZE"); - kernelInfo.workloadInfo.maxWorkGroupSizeOffset = pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_WORK_DIMENSIONS: - DBG_LOG(LogPatchTokens, "\n .Type", "WORK_DIMENSIONS"); - kernelInfo.workloadInfo.workDimOffset = pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES: { - DBG_LOG(LogPatchTokens, "\n .Type", "SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - - KernelArgPatchInfo kernelArgPatchInfo; - kernelArgPatchInfo.size = pDataParameterBuffer->DataSize; - kernelArgPatchInfo.crossthreadOffset = pDataParameterBuffer->Offset; - - kernelInfo.kernelArgInfo[argNum].slmAlignment = pDataParameterBuffer->SourceOffset; - kernelInfo.kernelArgInfo[argNum].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); - } break; - - case DATA_PARAMETER_IMAGE_WIDTH: - DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_WIDTH"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetImgWidth = pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_IMAGE_HEIGHT: - DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_HEIGHT"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetImgHeight = pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_IMAGE_DEPTH: - DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_DEPTH"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetImgDepth = pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED: - DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_COORDINATE_SNAP_WA_REQUIRED"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetSamplerSnapWa = pDataParameterBuffer->Offset; - break; - case DATA_PARAMETER_SAMPLER_ADDRESS_MODE: - DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_ADDRESS_MODE"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetSamplerAddressingMode = pDataParameterBuffer->Offset; - break; - case DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS: - DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_ADDRESS_MODE"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetSamplerNormalizedCoords = pDataParameterBuffer->Offset; - break; - case DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE: - DBG_LOG(LogPatchTokens, "\n .Type", "SAMPLER_ADDRESS_MODE"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetChannelDataType = pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_IMAGE_CHANNEL_ORDER: - DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_CHANNEL_ORDER"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetChannelOrder = pDataParameterBuffer->Offset; - break; - case DATA_PARAMETER_IMAGE_ARRAY_SIZE: - DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_ARRAY_SIZE"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetArraySize = pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_OBJECT_ID: - DBG_LOG(LogPatchTokens, "\n .Type", "OBJECT_ID"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetObjectId = pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_SIMD_SIZE: - DBG_LOG(LogPatchTokens, "\n .Type", "SIMD_SIZE"); - kernelInfo.workloadInfo.simdSizeOffset = pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_PARENT_EVENT: - DBG_LOG(LogPatchTokens, "\n .Type", "PARENT_EVENT"); - kernelInfo.workloadInfo.parentEventOffset = pDataParameterBuffer->Offset; - break; - - case DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE: - DBG_LOG(LogPatchTokens, "\n .Type", "CHILD_BLOCK_SIMD_SIZE"); - kernelInfo.childrenKernelsIdOffset.push_back({argNum, pDataParameterBuffer->Offset}); - break; - - case DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE: - DBG_LOG(LogPatchTokens, "\n .Type", "PRIVATE_MEMORY_STATELESS_SIZE"); - PrivateMemoryStatelessSizeOffset = pDataParameterBuffer->Offset; - break; - case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE: - DBG_LOG(LogPatchTokens, "\n .Type", "LOCAL_MEMORY_STATELESS_WINDOW_SIZE"); - LocalMemoryStatelessWindowSizeOffset = pDataParameterBuffer->Offset; - break; - case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS: - DBG_LOG(LogPatchTokens, "\n .Type", "LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS"); - LocalMemoryStatelessWindowStartAddressOffset = pDataParameterBuffer->Offset; - break; - case DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE: - DBG_LOG(LogPatchTokens, "\n .Type", "PREFERRED_WORKGROUP_MULTIPLE"); - kernelInfo.workloadInfo.preferredWkgMultipleOffset = pDataParameterBuffer->Offset; - break; - case DATA_PARAMETER_BUFFER_OFFSET: - DBG_LOG(LogPatchTokens, "\n .Type", "DATA_PARAMETER_BUFFER_OFFSET"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetBufferOffset = pDataParameterBuffer->Offset; - break; - case DATA_PARAMETER_NUM_HARDWARE_THREADS: - case DATA_PARAMETER_PRINTF_SURFACE_SIZE: - DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled", pDataParameterBuffer->Type); - printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, - "Program::parsePatchList.Unhandled Data parameter: %d\n", pDataParameterBuffer->Type); - break; - - case DATA_PARAMETER_VME_MB_BLOCK_TYPE: - DBG_LOG(LogPatchTokens, "\n .Type", "VME_MB_BLOCK_TYPE"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetVmeMbBlockType = pDataParameterBuffer->Offset; - DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t)); - break; - case DATA_PARAMETER_VME_SUBPIXEL_MODE: - DBG_LOG(LogPatchTokens, "\n .Type", "VME_SUBPIXEL_MODE"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetVmeSubpixelMode = pDataParameterBuffer->Offset; - DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t)); - break; - case DATA_PARAMETER_VME_SAD_ADJUST_MODE: - DBG_LOG(LogPatchTokens, "\n .Type", "VME_SAD_ADJUST_MODE"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetVmeSadAdjustMode = pDataParameterBuffer->Offset; - DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t)); - break; - case DATA_PARAMETER_VME_SEARCH_PATH_TYPE: - DBG_LOG(LogPatchTokens, "\n .Type", "VME_SEARCH_PATH_TYPE"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetVmeSearchPathType = pDataParameterBuffer->Offset; - DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t)); - break; - case DATA_PARAMETER_IMAGE_NUM_SAMPLES: - DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_NUM_SAMPLES"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetNumSamples = pDataParameterBuffer->Offset; - DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t)); - break; - - case DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS: - DBG_LOG(LogPatchTokens, "\n .Type", "IMAGE_NUM_MIP_LEVELS"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].offsetNumMipLevels = pDataParameterBuffer->Offset; - DEBUG_BREAK_IF(pDataParameterBuffer->DataSize != sizeof(uint32_t)); - break; - case DATA_PARAMETER_BUFFER_STATEFUL: - DBG_LOG(LogPatchTokens, "\n .Type", "BUFFER_STATEFUL"); - kernelInfo.resizeKernelArgInfoAndRegisterParameter(argNum); - kernelInfo.kernelArgInfo[argNum].pureStatefulBufferAccess = true; - break; - case DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER: - case DATA_PARAMETER_STAGE_IN_GRID_ORIGIN: - case DATA_PARAMETER_STAGE_IN_GRID_SIZE: - break; - - case DATA_PARAMETER_LOCAL_ID: - case DATA_PARAMETER_EXECUTION_MASK: - case DATA_PARAMETER_VME_IMAGE_TYPE: - case DATA_PARAMETER_VME_MB_SKIP_BLOCK_TYPE: - break; - - default: - kernelInfo.patchInfo.dataParameterBuffers.pop_back(); - - DBG_LOG(LogPatchTokens, "\n .Type", "Unhandled", pDataParameterBuffer->Type); - DEBUG_BREAK_IF(true); - } - - DBG_LOG(LogPatchTokens, - "\n .ArgumentNumber", pDataParameterBuffer->ArgumentNumber, - "\n .Offset", pDataParameterBuffer->Offset, - "\n .DataSize", pDataParameterBuffer->DataSize, - "\n .SourceOffset", pDataParameterBuffer->SourceOffset); - - break; - - case PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD: - kernelInfo.patchInfo.interfaceDescriptorDataLoad = - reinterpret_cast(pPatch); - DBG_LOG(LogPatchTokens, - "\n.MEDIA_INTERFACE_DESCRIPTOR_LOAD", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .InterfaceDescriptorDataOffset", kernelInfo.patchInfo.interfaceDescriptorDataLoad->InterfaceDescriptorDataOffset); - break; - - case PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA: - kernelInfo.patchInfo.interfaceDescriptorData = - reinterpret_cast(pPatch); - DBG_LOG(LogPatchTokens, - "\n.INTERFACE_DESCRIPTOR_DATA", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .Offset", kernelInfo.patchInfo.interfaceDescriptorData->Offset, - "\n .SamplerStateOffset", kernelInfo.patchInfo.interfaceDescriptorData->SamplerStateOffset, - "\n .KernelOffset", kernelInfo.patchInfo.interfaceDescriptorData->KernelOffset, - "\n .BindingTableOffset", kernelInfo.patchInfo.interfaceDescriptorData->BindingTableOffset); - break; - - case PATCH_TOKEN_THREAD_PAYLOAD: - kernelInfo.patchInfo.threadPayload = - reinterpret_cast(pPatch); - DBG_LOG(LogPatchTokens, - "\n.THREAD_PAYLOAD", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .HeaderPresent", kernelInfo.patchInfo.threadPayload->HeaderPresent, - "\n .LocalIDXPresent", kernelInfo.patchInfo.threadPayload->LocalIDXPresent, - "\n .LocalIDYPresent", kernelInfo.patchInfo.threadPayload->LocalIDYPresent, - "\n .LocalIDZPresent", kernelInfo.patchInfo.threadPayload->LocalIDZPresent, - "\n .LocalIDFlattenedPresent", kernelInfo.patchInfo.threadPayload->LocalIDFlattenedPresent, - "\n .IndirectPayloadStorage", kernelInfo.patchInfo.threadPayload->IndirectPayloadStorage, - "\n .UnusedPerThreadConstantPresent", kernelInfo.patchInfo.threadPayload->UnusedPerThreadConstantPresent, - "\n .GetLocalIDPresent", kernelInfo.patchInfo.threadPayload->GetLocalIDPresent, - "\n .GetGroupIDPresent", kernelInfo.patchInfo.threadPayload->GetGroupIDPresent, - "\n .GetGlobalOffsetPresent", kernelInfo.patchInfo.threadPayload->GetGlobalOffsetPresent, - "\n .OffsetToSkipPerThreadDataLoad", kernelInfo.patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad, - "\n .PassInlineData", kernelInfo.patchInfo.threadPayload->PassInlineData); - break; - - case PATCH_TOKEN_EXECUTION_ENVIRONMENT: - kernelInfo.patchInfo.executionEnvironment = - reinterpret_cast(pPatch); - if (kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeX != 0) { - kernelInfo.reqdWorkGroupSize[0] = kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeX; - kernelInfo.reqdWorkGroupSize[1] = kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeY; - kernelInfo.reqdWorkGroupSize[2] = kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; - DEBUG_BREAK_IF(!(kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeY > 0)); - DEBUG_BREAK_IF(!(kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeZ > 0)); - } - kernelInfo.workgroupWalkOrder[0] = 0; - kernelInfo.workgroupWalkOrder[1] = 1; - kernelInfo.workgroupWalkOrder[2] = 2; - if (kernelInfo.patchInfo.executionEnvironment->WorkgroupWalkOrderDims) { - constexpr auto dimensionMask = 0b11; - constexpr auto dimensionSize = 2; - kernelInfo.workgroupWalkOrder[0] = kernelInfo.patchInfo.executionEnvironment->WorkgroupWalkOrderDims & dimensionMask; - kernelInfo.workgroupWalkOrder[1] = (kernelInfo.patchInfo.executionEnvironment->WorkgroupWalkOrderDims >> dimensionSize) & dimensionMask; - kernelInfo.workgroupWalkOrder[2] = (kernelInfo.patchInfo.executionEnvironment->WorkgroupWalkOrderDims >> dimensionSize * 2) & dimensionMask; - kernelInfo.requiresWorkGroupOrder = true; - } - - for (uint32_t i = 0; i < 3; ++i) { - // inverts the walk order mapping (from ORDER_ID->DIM_ID to DIM_ID->ORDER_ID) - kernelInfo.workgroupDimensionsOrder[kernelInfo.workgroupWalkOrder[i]] = i; - } - - if (kernelInfo.patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers == false) { - kernelInfo.requiresSshForBuffers = true; - } - DBG_LOG(LogPatchTokens, - "\n.EXECUTION_ENVIRONMENT", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .RequiredWorkGroupSizeX", kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeX, - "\n .RequiredWorkGroupSizeY", kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeY, - "\n .RequiredWorkGroupSizeZ", kernelInfo.patchInfo.executionEnvironment->RequiredWorkGroupSizeZ, - "\n .LargestCompiledSIMDSize", kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize, - "\n .CompiledSubGroupsNumber", kernelInfo.patchInfo.executionEnvironment->CompiledSubGroupsNumber, - "\n .HasBarriers", kernelInfo.patchInfo.executionEnvironment->HasBarriers, - "\n .DisableMidThreadPreemption", kernelInfo.patchInfo.executionEnvironment->DisableMidThreadPreemption, - "\n .CompiledSIMD8", kernelInfo.patchInfo.executionEnvironment->CompiledSIMD8, - "\n .CompiledSIMD16", kernelInfo.patchInfo.executionEnvironment->CompiledSIMD16, - "\n .CompiledSIMD32", kernelInfo.patchInfo.executionEnvironment->CompiledSIMD32, - "\n .HasDeviceEnqueue", kernelInfo.patchInfo.executionEnvironment->HasDeviceEnqueue, - "\n .MayAccessUndeclaredResource", kernelInfo.patchInfo.executionEnvironment->MayAccessUndeclaredResource, - "\n .UsesFencesForReadWriteImages", kernelInfo.patchInfo.executionEnvironment->UsesFencesForReadWriteImages, - "\n .UsesStatelessSpillFill", kernelInfo.patchInfo.executionEnvironment->UsesStatelessSpillFill, - "\n .IsCoherent", kernelInfo.patchInfo.executionEnvironment->IsCoherent, - "\n .SubgroupIndependentForwardProgressRequired", kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired, - "\n .WorkgroupWalkOrderDim0", kernelInfo.workgroupWalkOrder[0], - "\n .WorkgroupWalkOrderDim1", kernelInfo.workgroupWalkOrder[1], - "\n .WorkgroupWalkOrderDim2", kernelInfo.workgroupWalkOrder[2], - "\n .NumGRFRequired", kernelInfo.patchInfo.executionEnvironment->NumGRFRequired); - break; - - case PATCH_TOKEN_DATA_PARAMETER_STREAM: - kernelInfo.patchInfo.dataParameterStream = - reinterpret_cast(pPatch); - DBG_LOG(LogPatchTokens, - "\n.DATA_PARAMETER_STREAM", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .DataParameterStreamSize", kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize); - break; - - case PATCH_TOKEN_KERNEL_ARGUMENT_INFO: { - auto pkernelArgInfo = reinterpret_cast(pPatch); - kernelInfo.storeArgInfo(pkernelArgInfo); - DBG_LOG(LogPatchTokens, - "\n.KERNEL_ARGUMENT_INFO", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ArgumentNumber", pkernelArgInfo->ArgumentNumber, - "\n .AddressQualifierSize", pkernelArgInfo->AddressQualifierSize, - "\n .AccessQualifierSize", pkernelArgInfo->AccessQualifierSize, - "\n .ArgumentNameSize", pkernelArgInfo->ArgumentNameSize, - "\n .TypeNameSize", pkernelArgInfo->TypeNameSize, - "\n .TypeQualifierSize", pkernelArgInfo->TypeQualifierSize); - break; - } - - case PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO: - kernelInfo.patchInfo.pKernelAttributesInfo = - reinterpret_cast(pPatch); - kernelInfo.storePatchToken(kernelInfo.patchInfo.pKernelAttributesInfo); - DBG_LOG(LogPatchTokens, - "\n.KERNEL_ATTRIBUTES_INFO", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .AttributesSize", kernelInfo.patchInfo.pKernelAttributesInfo->AttributesSize); - break; - - case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT: { - const SPatchSamplerKernelArgument *pSamplerKernelObjectKernelArg = nullptr; - - pSamplerKernelObjectKernelArg = reinterpret_cast(pPatch); - kernelInfo.storeKernelArgument(pSamplerKernelObjectKernelArg); - DBG_LOG(LogPatchTokens, - "\n.SAMPLER_KERNEL_ARGUMENT", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ArgumentNumber", pSamplerKernelObjectKernelArg->ArgumentNumber, - "\n .Type", pSamplerKernelObjectKernelArg->Type, - "\n .Offset", pSamplerKernelObjectKernelArg->Offset); - }; - break; - - case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT: { - const SPatchImageMemoryObjectKernelArgument *pImageMemObjectKernelArg = nullptr; - - pImageMemObjectKernelArg = - reinterpret_cast(pPatch); - kernelInfo.storeKernelArgument(pImageMemObjectKernelArg); - DBG_LOG(LogPatchTokens, - "\n.IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ArgumentNumber", pImageMemObjectKernelArg->ArgumentNumber, - "\n .Type", pImageMemObjectKernelArg->Type, - "\n .Offset", pImageMemObjectKernelArg->Offset, - "\n .LocationIndex", pImageMemObjectKernelArg->LocationIndex, - "\n .LocationIndex2", pImageMemObjectKernelArg->LocationIndex2, - "\n .Transformable", pImageMemObjectKernelArg->Transformable); - }; - break; - - case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: { - const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjectKernelArg = nullptr; - pGlobalMemObjectKernelArg = - reinterpret_cast(pPatch); - kernelInfo.storeKernelArgument(pGlobalMemObjectKernelArg); - DBG_LOG(LogPatchTokens, - "\n.GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ArgumentNumber", pGlobalMemObjectKernelArg->ArgumentNumber, - "\n .Offset", pGlobalMemObjectKernelArg->Offset, - "\n .LocationIndex", pGlobalMemObjectKernelArg->LocationIndex, - "\n .LocationIndex2", pGlobalMemObjectKernelArg->LocationIndex2); - }; - break; - - case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: { - const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalMemObjKernelArg = nullptr; - - pStatelessGlobalMemObjKernelArg = - reinterpret_cast(pPatch); - kernelInfo.storeKernelArgument(pStatelessGlobalMemObjKernelArg); - DBG_LOG(LogPatchTokens, - "\n.STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ArgumentNumber", pStatelessGlobalMemObjKernelArg->ArgumentNumber, - "\n .SurfaceStateHeapOffset", pStatelessGlobalMemObjKernelArg->SurfaceStateHeapOffset, - "\n .DataParamOffset", pStatelessGlobalMemObjKernelArg->DataParamOffset, - "\n .DataParamSize", pStatelessGlobalMemObjKernelArg->DataParamSize, - "\n .LocationIndex", pStatelessGlobalMemObjKernelArg->LocationIndex, - "\n .LocationIndex2", pStatelessGlobalMemObjKernelArg->LocationIndex2); - }; - break; - - case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT: { - const SPatchStatelessConstantMemoryObjectKernelArgument *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.storeKernelArgument(pPatchToken); - DBG_LOG(LogPatchTokens, - "\n.STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ArgumentNumber", pPatchToken->ArgumentNumber, - "\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset, - "\n .DataParamOffset", pPatchToken->DataParamOffset, - "\n .DataParamSize", pPatchToken->DataParamSize); - } break; - - case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT: { - const SPatchStatelessDeviceQueueKernelArgument *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.storeKernelArgument(pPatchToken); - DBG_LOG(LogPatchTokens, - "\n.STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ArgumentNumber", pPatchToken->ArgumentNumber, - "\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset, - "\n .DataParamOffset", pPatchToken->DataParamOffset, - "\n .DataParamSize", pPatchToken->DataParamSize); - } break; - - case PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY: { - const SPatchAllocateStatelessPrivateSurface *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.storePatchToken(pPatchToken); - DBG_LOG(LogPatchTokens, - "\n.ALLOCATE_STATELESS_PRIVATE_MEMORY", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset, - "\n .DataParamOffset", pPatchToken->DataParamOffset, - "\n .DataParamSize", pPatchToken->DataParamSize, - "\n .PerThreadPrivateMemorySize", pPatchToken->PerThreadPrivateMemorySize); - } break; - - case PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION: { - const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.storePatchToken(pPatchToken); - DBG_LOG(LogPatchTokens, - "\n.ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ConstantBufferIndex", pPatchToken->ConstantBufferIndex, - "\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset, - "\n .DataParamOffset", pPatchToken->DataParamOffset, - "\n .DataParamSize", pPatchToken->DataParamSize); - } break; - - case PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION: { - const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.storePatchToken(pPatchToken); - DBG_LOG(LogPatchTokens, - "\n.ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .GlobalBufferIndex", pPatchToken->GlobalBufferIndex, - "\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset, - "\n .DataParamOffset", pPatchToken->DataParamOffset, - "\n .DataParamSize", pPatchToken->DataParamSize); - } break; - - case PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE: { - const SPatchAllocateStatelessPrintfSurface *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.storePatchToken(pPatchToken); - DBG_LOG(LogPatchTokens, - "\n.ALLOCATE_STATELESS_PRINTF_SURFACE", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .PrintfSurfaceIndex", pPatchToken->PrintfSurfaceIndex, - "\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset, - "\n .DataParamOffset", pPatchToken->DataParamOffset, - "\n .DataParamSize", pPatchToken->DataParamSize); - } break; - - case PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE: { - const SPatchAllocateStatelessEventPoolSurface *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.storePatchToken(pPatchToken); - DBG_LOG(LogPatchTokens, - "\n.ALLOCATE_STATELESS_EVENT_POOL_SURFACE", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .EventPoolSurfaceIndex", pPatchToken->EventPoolSurfaceIndex, - "\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset, - "\n .DataParamOffset", pPatchToken->DataParamOffset, - "\n .DataParamSize", pPatchToken->DataParamSize); - } break; - - case PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE: { - const SPatchAllocateStatelessDefaultDeviceQueueSurface *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.storePatchToken(pPatchToken); - DBG_LOG(LogPatchTokens, - "\n.ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .SurfaceStateHeapOffset", pPatchToken->SurfaceStateHeapOffset, - "\n .DataParamOffset", pPatchToken->DataParamOffset, - "\n .DataParamSize", pPatchToken->DataParamSize); - } break; - - case PATCH_TOKEN_STRING: { - const SPatchString *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.storePatchToken(pPatchToken); - DBG_LOG(LogPatchTokens, - "\n.STRING", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .Index", pPatchToken->Index, - "\n .StringSize", pPatchToken->StringSize); - } break; - - case PATCH_TOKEN_INLINE_VME_SAMPLER_INFO: - kernelInfo.isVmeWorkload = true; - DBG_LOG(LogPatchTokens, - "\n.INLINE_VME_SAMPLER_INFO", pPatch->Token, - "\n .Size", pPatch->Size); - break; - - case PATCH_TOKEN_GTPIN_FREE_GRF_INFO: { - const SPatchGtpinFreeGRFInfo *pPatchToken = reinterpret_cast(pPatch); - DBG_LOG(LogPatchTokens, - "\n.PATCH_TOKEN_GTPIN_FREE_GRF_INFO", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .BufferSize", pPatchToken->BufferSize); - } break; - - case PATCH_TOKEN_STATE_SIP: { - const SPatchStateSIP *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.systemKernelOffset = pPatchToken->SystemKernelOffset; - DBG_LOG(LogPatchTokens, - "\n.PATCH_TOKEN_STATE_SIP", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .SystemKernelOffset", pPatchToken->SystemKernelOffset); - } break; - - case PATCH_TOKEN_ALLOCATE_SIP_SURFACE: { - auto *pPatchToken = reinterpret_cast(pPatch); - kernelInfo.storePatchToken(pPatchToken); - DBG_LOG(LogPatchTokens, - "\n.PATCH_TOKEN_ALLOCATE_SIP_SURFACE", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .BTI", pPatchToken->BTI, - "\n .Offset", pPatchToken->Offset, - "\n .PerThreadSystemThreadSurfaceSize", pPatchToken->PerThreadSystemThreadSurfaceSize); - } break; - case PATCH_TOKEN_GTPIN_INFO: { - auto igcInfo = ptrOffset(pCurPatchListPtr, sizeof(SPatchItemHeader)); - kernelInfo.igcInfoForGtpin = static_cast(igcInfo); - DBG_LOG(LogPatchTokens, - "\n.PATCH_TOKEN_GTPIN_INFO", pPatch->Token, - "\n .Size", pPatch->Size); - break; - } - - case PATCH_TOKEN_PROGRAM_SYMBOL_TABLE: { - const auto patch = reinterpret_cast(pPatch); - prepareLinkerInputStorage(); - linkerInput->decodeExportedFunctionsSymbolTable(patch + 1, patch->NumEntries, kernelNum); - } break; - - case PATCH_TOKEN_PROGRAM_RELOCATION_TABLE: { - const auto patch = reinterpret_cast(pPatch); - prepareLinkerInputStorage(); - linkerInput->decodeRelocationTable(patch + 1, patch->NumEntries, kernelNum); - } break; - - default: - printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, " Program::parsePatchList. Unknown Patch Token: %d\n", pPatch->Token); - if (false == isSafeToSkipUnhandledToken(pPatch->Token)) { - retVal = CL_INVALID_KERNEL; - } - break; - } - - if (retVal != CL_SUCCESS) { - break; - } - pCurPatchListPtr = ptrOffset(pCurPatchListPtr, pPatch->Size); + if (decodedKernel.tokens.programSymbolTable) { + prepareLinkerInputStorage(); + linkerInput->decodeExportedFunctionsSymbolTable(decodedKernel.tokens.programSymbolTable + 1, decodedKernel.tokens.programSymbolTable->NumEntries, kernelNum); } - if (retVal == CL_SUCCESS) { - retVal = kernelInfo.resolveKernelInfo(); + if (decodedKernel.tokens.programRelocationTable) { + prepareLinkerInputStorage(); + linkerInput->decodeRelocationTable(decodedKernel.tokens.programRelocationTable + 1, decodedKernel.tokens.programRelocationTable->NumEntries, kernelNum); } - if (kernelInfo.patchInfo.dataParameterStream && kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize) { - uint32_t crossThreadDataSize = kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize; - kernelInfo.crossThreadData = new char[crossThreadDataSize]; - memset(kernelInfo.crossThreadData, 0x00, crossThreadDataSize); + if (kernelInfo->patchInfo.dataParameterStream && kernelInfo->patchInfo.dataParameterStream->DataParameterStreamSize) { + uint32_t crossThreadDataSize = kernelInfo->patchInfo.dataParameterStream->DataParameterStreamSize; + kernelInfo->crossThreadData = new char[crossThreadDataSize]; + memset(kernelInfo->crossThreadData, 0x00, crossThreadDataSize); - if (LocalMemoryStatelessWindowStartAddressOffset != 0xFFffFFff) { - *(uintptr_t *)&(kernelInfo.crossThreadData[LocalMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast(this->executionEnvironment.memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment)); + uint32_t privateMemoryStatelessSizeOffset = kernelInfo->workloadInfo.privateMemoryStatelessSizeOffset; + uint32_t localMemoryStatelessWindowSizeOffset = kernelInfo->workloadInfo.localMemoryStatelessWindowSizeOffset; + uint32_t localMemoryStatelessWindowStartAddressOffset = kernelInfo->workloadInfo.localMemoryStatelessWindowStartAddressOffset; + + if (localMemoryStatelessWindowStartAddressOffset != 0xFFffFFff) { + *(uintptr_t *)&(kernelInfo->crossThreadData[localMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast(this->executionEnvironment.memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment)); } - if (LocalMemoryStatelessWindowSizeOffset != 0xFFffFFff) { - *(uint32_t *)&(kernelInfo.crossThreadData[LocalMemoryStatelessWindowSizeOffset]) = (uint32_t)this->pDevice->getDeviceInfo().localMemSize; + if (localMemoryStatelessWindowSizeOffset != 0xFFffFFff) { + *(uint32_t *)&(kernelInfo->crossThreadData[localMemoryStatelessWindowSizeOffset]) = (uint32_t)this->pDevice->getDeviceInfo().localMemSize; } - if (kernelInfo.patchInfo.pAllocateStatelessPrivateSurface && (PrivateMemoryStatelessSizeOffset != 0xFFffFFff)) { - *(uint32_t *)&(kernelInfo.crossThreadData[PrivateMemoryStatelessSizeOffset]) = kernelInfo.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize * this->getDevice(0).getDeviceInfo().computeUnitsUsedForScratch * kernelInfo.getMaxSimdSize(); + if (kernelInfo->patchInfo.pAllocateStatelessPrivateSurface && (privateMemoryStatelessSizeOffset != 0xFFffFFff)) { + *(uint32_t *)&(kernelInfo->crossThreadData[privateMemoryStatelessSizeOffset]) = kernelInfo->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize * this->getDevice(0).getDeviceInfo().computeUnitsUsedForScratch * kernelInfo->getMaxSimdSize(); } - if (kernelInfo.workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) { - *(uint32_t *)&(kernelInfo.crossThreadData[kernelInfo.workloadInfo.maxWorkGroupSizeOffset]) = (uint32_t)this->getDevice(0).getDeviceInfo().maxWorkGroupSize; + if (kernelInfo->workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) { + *(uint32_t *)&(kernelInfo->crossThreadData[kernelInfo->workloadInfo.maxWorkGroupSizeOffset]) = (uint32_t)this->getDevice(0).getDeviceInfo().maxWorkGroupSize; } } - if (kernelInfo.heapInfo.pKernelHeader->KernelHeapSize && this->pDevice) { - retVal = kernelInfo.createKernelAllocation(this->pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager()) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY; + if (kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && this->pDevice) { + retVal = kernelInfo->createKernelAllocation(this->pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager()) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY; } - if (this->pDevice && kernelInfo.workloadInfo.slmStaticSize > this->pDevice->getDeviceInfo().localMemSize) { - retVal = CL_OUT_OF_RESOURCES; + DEBUG_BREAK_IF(kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && !this->pDevice); + if (retVal != CL_SUCCESS) { + return; } - DEBUG_BREAK_IF(kernelInfo.heapInfo.pKernelHeader->KernelHeapSize && !this->pDevice); - - return retVal; + if (kernelInfo->hasDeviceEnqueue()) { + parentKernelInfoArray.push_back(kernelInfo.get()); + } + if (kernelInfo->requiresSubgroupIndependentForwardProgress()) { + subgroupKernelInfoArray.push_back(kernelInfo.get()); + } + kernelInfoArray.push_back(kernelInfo.release()); } inline uint64_t readMisalignedUint64(const uint64_t *address) { @@ -902,155 +164,67 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::Context *ctx, NEO::Device *devic } } -cl_int Program::parseProgramScopePatchList() { - cl_int retVal = CL_SUCCESS; - size_t globalVariablesSurfaceSize = 0U, globalConstantsSurfaceSize = 0U; - const void *globalVariablesInitData = nullptr, *globalConstantsInitData = nullptr; - - auto pPatchList = programScopePatchList; - auto patchListSize = programScopePatchListSize; - auto pCurPatchListPtr = pPatchList; - cl_uint headerSize = 0; - - std::vector globalVariablesSelfPatches; - std::vector globalConstantsSelfPatches; - - while (ptrDiff(pCurPatchListPtr, pPatchList) < patchListSize) { - auto pPatch = reinterpret_cast(pCurPatchListPtr); - switch (pPatch->Token) { - case PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO: { - auto patch = *(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo *)pPatch; - - if (constantSurface) { - pDevice->getMemoryManager()->freeGraphicsMemory(constantSurface); - } - - globalConstantsSurfaceSize = patch.InlineDataSize; - headerSize = sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo); - - globalConstantsInitData = (cl_char *)pPatch + headerSize; - pCurPatchListPtr = ptrOffset(pCurPatchListPtr, globalConstantsSurfaceSize); - DBG_LOG(LogPatchTokens, - "\n .ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ConstantBufferIndex", patch.ConstantBufferIndex, - "\n .InitializationDataSize", patch.InlineDataSize); - }; - break; - - case PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO: { - auto patch = *(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo *)pPatch; - - if (globalSurface) { - pDevice->getMemoryManager()->freeGraphicsMemory(globalSurface); - } - - globalVariablesSurfaceSize = patch.InlineDataSize; - globalVarTotalSize += (size_t)globalVariablesSurfaceSize; - headerSize = sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo); - - globalVariablesInitData = (cl_char *)pPatch + headerSize; - pCurPatchListPtr = ptrOffset(pCurPatchListPtr, globalVariablesSurfaceSize); - DBG_LOG(LogPatchTokens, - "\n .ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .BufferType", patch.Type, - "\n .GlobalBufferIndex", patch.GlobalBufferIndex, - "\n .InitializationDataSize", patch.InlineDataSize); - }; - break; - - case PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO: { - auto patch = *(SPatchGlobalPointerProgramBinaryInfo *)pPatch; - if ((patch.GlobalBufferIndex == 0) && (patch.BufferIndex == 0) && (patch.BufferType == PROGRAM_SCOPE_GLOBAL_BUFFER)) { - globalVariablesSelfPatches.push_back(readMisalignedUint64(&patch.GlobalPointerOffset)); - } else { - printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Program::parseProgramScopePatchList. Unhandled Data parameter: %d\n", pPatch->Token); - } - DBG_LOG(LogPatchTokens, - "\n .GLOBAL_POINTER_PROGRAM_BINARY_INFO", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .GlobalBufferIndex", patch.GlobalBufferIndex, - "\n .GlobalPointerOffset", patch.GlobalPointerOffset, - "\n .BufferType", patch.BufferType, - "\n .BufferIndex", patch.BufferIndex); - } break; - - case PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO: { - auto patch = *(SPatchConstantPointerProgramBinaryInfo *)pPatch; - if ((patch.ConstantBufferIndex == 0) && (patch.BufferIndex == 0) && (patch.BufferType == PROGRAM_SCOPE_CONSTANT_BUFFER)) { - globalConstantsSelfPatches.push_back(readMisalignedUint64(&patch.ConstantPointerOffset)); - } else { - printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Program::parseProgramScopePatchList. Unhandled Data parameter: %d\n", pPatch->Token); - } - DBG_LOG(LogPatchTokens, - "\n .CONSTANT_POINTER_PROGRAM_BINARY_INFO", pPatch->Token, - "\n .Size", pPatch->Size, - "\n .ConstantBufferIndex", patch.ConstantBufferIndex, - "\n .ConstantPointerOffset", patch.ConstantPointerOffset, - "\n .BufferType", patch.BufferType, - "\n .BufferIndex", patch.BufferIndex); - } break; - - case PATCH_TOKEN_PROGRAM_SYMBOL_TABLE: { - const auto patch = reinterpret_cast(pPatch); - prepareLinkerInputStorage(); - linkerInput->decodeGlobalVariablesSymbolTable(patch + 1, patch->NumEntries); - } break; - +cl_int Program::isHandled(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) const { + std::string validatorErrMessage; + std::string validatorWarnings; + auto availableSlm = this->pDevice ? static_cast(this->pDevice->getDeviceInfo().localMemSize) : 0U; + auto validatorErr = PatchTokenBinary::validate(decodedProgram, availableSlm, *this, validatorErrMessage, validatorWarnings); + if (validatorWarnings.empty() == false) { + printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", validatorWarnings.c_str()); + } + if (validatorErr != PatchTokenBinary::ValidatorError::Success) { + printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", validatorErrMessage.c_str()); + switch (validatorErr) { default: - if (false == isSafeToSkipUnhandledToken(pPatch->Token)) { - retVal = CL_INVALID_BINARY; - } - printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, " Program::parseProgramScopePatchList. Unknown Patch Token: %d\n", pPatch->Token); - DBG_LOG(LogPatchTokens, - "\n .Program Unknown Patch Token", pPatch->Token, - "\n .Size", pPatch->Size); + return CL_INVALID_BINARY; + case PatchTokenBinary::ValidatorError::NotEnoughSlm: + return CL_OUT_OF_RESOURCES; } + } + return CL_SUCCESS; +} - if (retVal != CL_SUCCESS) { - break; - } - pCurPatchListPtr = ptrOffset(pCurPatchListPtr, pPatch->Size); +void Program::processProgramScopeMetadata(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) { + if (decodedProgram.programScopeTokens.symbolTable != nullptr) { + const auto patch = decodedProgram.programScopeTokens.symbolTable; + this->prepareLinkerInputStorage(); + this->linkerInput->decodeGlobalVariablesSymbolTable(patch + 1, patch->NumEntries); } - if (globalConstantsSurfaceSize != 0) { + if (decodedProgram.programScopeTokens.allocateConstantMemorySurface.size() != 0) { + pDevice->getMemoryManager()->freeGraphicsMemory(this->constantSurface); + auto exportsGlobals = (linkerInput && linkerInput->getTraits().exportsGlobalConstants); - constantSurface = allocateGlobalsSurface(context, pDevice, globalConstantsSurfaceSize, true, exportsGlobals, globalConstantsInitData); + size_t globalConstantsSurfaceSize = decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]->InlineDataSize; + const void *globalConstantsInitData = NEO::PatchTokenBinary::getInlineData(decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]); + this->constantSurface = allocateGlobalsSurface(context, pDevice, globalConstantsSurfaceSize, true, exportsGlobals, globalConstantsInitData); } - if (globalVariablesSurfaceSize != 0) { + if (decodedProgram.programScopeTokens.allocateGlobalMemorySurface.size() != 0) { + pDevice->getMemoryManager()->freeGraphicsMemory(this->globalSurface); + auto exportsGlobals = (linkerInput && linkerInput->getTraits().exportsGlobalVariables); - globalSurface = allocateGlobalsSurface(context, pDevice, globalVariablesSurfaceSize, false, exportsGlobals, globalVariablesInitData); + size_t globalVariablesSurfaceSize = decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]->InlineDataSize; + const void *globalVariablesInitData = NEO::PatchTokenBinary::getInlineData(decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]); + this->globalVarTotalSize = globalVariablesSurfaceSize; + this->globalSurface = allocateGlobalsSurface(context, pDevice, globalVariablesSurfaceSize, false, exportsGlobals, globalVariablesInitData); } - for (auto offset : globalVariablesSelfPatches) { - if (globalSurface == nullptr) { - retVal = CL_INVALID_BINARY; - } else { - void *pPtr = ptrOffset(globalSurface->getUnderlyingBuffer(), static_cast(offset)); - if (globalSurface->is32BitAllocation()) { - *reinterpret_cast(pPtr) += static_cast(globalSurface->getGpuAddressToPatch()); - } else { - *reinterpret_cast(pPtr) += static_cast(globalSurface->getGpuAddressToPatch()); - } - } + for (const auto &globalConstantPointerToken : decodedProgram.programScopeTokens.constantPointer) { + UNRECOVERABLE_IF(this->constantSurface == nullptr); + auto offset = readMisalignedUint64(&globalConstantPointerToken->ConstantPointerOffset); + UNRECOVERABLE_IF(this->constantSurface->getUnderlyingBufferSize() < ((offset + constantSurface->is32BitAllocation()) ? 4 : sizeof(uintptr_t))); + void *patchOffset = ptrOffset(this->constantSurface->getUnderlyingBuffer(), static_cast(offset)); + patchIncrement(patchOffset, constantSurface->is32BitAllocation() ? 4 : sizeof(uintptr_t), constantSurface->getGpuAddressToPatch()); } - for (auto offset : globalConstantsSelfPatches) { - if (constantSurface == nullptr) { - retVal = CL_INVALID_BINARY; - } else { - void *pPtr = ptrOffset(constantSurface->getUnderlyingBuffer(), static_cast(offset)); - if (constantSurface->is32BitAllocation()) { - *reinterpret_cast(pPtr) += static_cast(constantSurface->getGpuAddressToPatch()); - } else { - *reinterpret_cast(pPtr) += static_cast(constantSurface->getGpuAddressToPatch()); - } - } + for (const auto &globalVariablePointerToken : decodedProgram.programScopeTokens.globalPointer) { + UNRECOVERABLE_IF(this->globalSurface == nullptr); + auto offset = readMisalignedUint64(&globalVariablePointerToken->GlobalPointerOffset); + UNRECOVERABLE_IF(this->globalSurface->getUnderlyingBufferSize() < ((offset + globalSurface->is32BitAllocation()) ? 4 : sizeof(uintptr_t))); + void *patchOffset = ptrOffset(this->globalSurface->getUnderlyingBuffer(), static_cast(offset)); + patchIncrement(patchOffset, globalSurface->is32BitAllocation() ? 4 : sizeof(uintptr_t), globalSurface->getGpuAddressToPatch()); } - - return retVal; } cl_int Program::linkBinary() { @@ -1113,45 +287,30 @@ cl_int Program::linkBinary() { } cl_int Program::processGenBinary() { - cl_int retVal = CL_SUCCESS; - cleanCurrentKernelInfo(); - do { - if (!genBinary || genBinarySize == 0) { - retVal = CL_INVALID_BINARY; - break; - } - - auto pCurBinaryPtr = genBinary.get(); - auto pGenBinaryHeader = reinterpret_cast(pCurBinaryPtr); - if (!validateGenBinaryHeader(pGenBinaryHeader)) { - retVal = CL_INVALID_BINARY; - break; - } - - pCurBinaryPtr = ptrOffset(pCurBinaryPtr, sizeof(SProgramBinaryHeader)); - programScopePatchList = pCurBinaryPtr; - programScopePatchListSize = pGenBinaryHeader->PatchListSize; - - pCurBinaryPtr = ptrOffset(pCurBinaryPtr, pGenBinaryHeader->PatchListSize); - - auto numKernels = pGenBinaryHeader->NumberOfKernels; - for (uint32_t i = 0; i < numKernels && retVal == CL_SUCCESS; i++) { - - size_t bytesProcessed = processKernel(pCurBinaryPtr, i, retVal); - pCurBinaryPtr = ptrOffset(pCurBinaryPtr, bytesProcessed); - } - - if (programScopePatchListSize != 0u) { - retVal = parseProgramScopePatchList(); - } - } while (false); - - if (retVal == CL_SUCCESS) { - retVal = linkBinary(); + auto blob = ArrayRef(reinterpret_cast(genBinary.get()), genBinarySize); + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram = {}; + NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(blob, decodedProgram); + DBG_LOG(LogPatchTokens, NEO::PatchTokenBinary::asString(decodedProgram).c_str()); + cl_int retVal = this->isHandled(decodedProgram); + if (CL_SUCCESS != retVal) { + return retVal; } + auto numKernels = decodedProgram.header->NumberOfKernels; + for (uint32_t i = 0; i < numKernels && retVal == CL_SUCCESS; i++) { + populateKernelInfo(decodedProgram, i, retVal); + } + + if (retVal != CL_SUCCESS) { + return retVal; + } + + processProgramScopeMetadata(decodedProgram); + + retVal = linkBinary(); + return retVal; } diff --git a/runtime/program/program.cpp b/runtime/program/program.cpp index 53d20e47d8..494a8c4809 100644 --- a/runtime/program/program.cpp +++ b/runtime/program/program.cpp @@ -18,6 +18,8 @@ #include "runtime/device/device.h" #include "runtime/memory_manager/memory_manager.h" #include "runtime/os_interface/os_context.h" +#include "runtime/program/block_kernel_manager.h" +#include "runtime/program/kernel_info.h" #include @@ -53,8 +55,6 @@ Program::Program(ExecutionEnvironment &executionEnvironment, Context *context, b constantSurface = nullptr; globalSurface = nullptr; globalVarTotalSize = 0; - programScopePatchListSize = 0; - programScopePatchList = nullptr; programOptionVersion = 12u; allowNonUniform = false; char paramValue[32] = {}; @@ -254,14 +254,6 @@ cl_int Program::updateSpecializationConstant(cl_uint specId, size_t specSize, co return CL_INVALID_SPEC_ID; } -void Program::getProgramCompilerVersion( - SProgramBinaryHeader *pSectionData, - uint32_t &binaryVersion) const { - if (pSectionData != nullptr) { - binaryVersion = pSectionData->Version; - } -} - bool Program::isValidLlvmBinary( const void *pBinary, size_t binarySize) { diff --git a/runtime/program/program.h b/runtime/program/program.h index 01ed468340..67c5edfa8a 100644 --- a/runtime/program/program.h +++ b/runtime/program/program.h @@ -8,29 +8,28 @@ #pragma once #include "core/compiler_interface/compiler_interface.h" #include "core/compiler_interface/linker.h" -#include "core/elf/reader.h" #include "core/elf/writer.h" -#include "core/helpers/stdio.h" #include "runtime/api/cl_types.h" #include "runtime/helpers/base_object.h" -#include "runtime/helpers/string_helpers.h" -#include "runtime/program/block_kernel_manager.h" -#include "runtime/program/kernel_info.h" #include "cif/builtins/memory/buffer/buffer.h" -#include "igfxfmid.h" #include "patch_list.h" #include #include #include -#define OCLRT_ALIGN(a, b) ((((a) % (b)) != 0) ? ((a) - ((a) % (b)) + (b)) : (a)) - namespace NEO { +namespace PatchTokenBinary { +struct ProgramFromPatchtokens; +} + +class BlockKernelManager; +class BuiltinDispatchInfoBuilder; class Context; class CompilerInterface; class ExecutionEnvironment; +struct KernelInfo; template <> struct OpenCLObjectMapper<_cl_program> { typedef class Program DerivedType; @@ -53,8 +52,6 @@ constexpr cl_int asClError(TranslationOutput::ErrorCode err) { } } -bool isSafeToSkipUnhandledToken(unsigned int token); - class Program : public BaseObject<_cl_program> { public: static const cl_ulong objectMagic = 0x5651C89100AAACFELL; @@ -122,8 +119,6 @@ class Program : public BaseObject<_cl_program> { cl_int build(const cl_device_id device, const char *buildOptions, bool enableCaching, std::unordered_map &builtinsMap); - cl_int build(const char *pKernelData, size_t kernelDataSize); - MOCKABLE_VIRTUAL cl_int processGenBinary(); cl_int compile(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions, @@ -191,10 +186,6 @@ class Program : public BaseObject<_cl_program> { return isSpirV; } - size_t getProgramScopePatchListSize() const { - return programScopePatchListSize; - } - GraphicsAllocation *getConstantSurface() const { return constantSurface; } @@ -255,32 +246,23 @@ class Program : public BaseObject<_cl_program> { return this->linkerInput.get(); } + MOCKABLE_VIRTUAL bool isSafeToSkipUnhandledToken(unsigned int token) const; + protected: Program(ExecutionEnvironment &executionEnvironment); - MOCKABLE_VIRTUAL bool isSafeToSkipUnhandledToken(unsigned int token) const; - MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize); - bool optionsAreNew(const char *options) const; - - cl_int processElfHeader(const CLElfLib::SElf64Header *pElfHeader, - cl_program_binary_type &binaryType, uint32_t &numSections); - - void getProgramCompilerVersion(SProgramBinaryHeader *pSectionData, uint32_t &binaryVersion) const; - cl_int resolveProgramBinary(); MOCKABLE_VIRTUAL cl_int linkBinary(); - cl_int parseProgramScopePatchList(); + MOCKABLE_VIRTUAL cl_int isHandled(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) const; + void processProgramScopeMetadata(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram); + void populateKernelInfo(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram, uint32_t kernelNum, cl_int &retVal); MOCKABLE_VIRTUAL cl_int rebuildProgramFromIr(); - cl_int parsePatchList(KernelInfo &pKernelInfo, uint32_t kernelNum); - - size_t processKernel(const void *pKernelBlob, uint32_t kernelNum, cl_int &retVal); - bool validateGenBinaryDevice(GFXCORE_FAMILY device) const; bool validateGenBinaryHeader(const iOpenCL::SProgramBinaryHeader *pGenBinaryHeader) const; @@ -321,10 +303,6 @@ class Program : public BaseObject<_cl_program> { std::vector kernelInfoArray; std::vector parentKernelInfoArray; std::vector subgroupKernelInfoArray; - BlockKernelManager *blockKernelManager; - - const void *programScopePatchList; - size_t programScopePatchListSize; GraphicsAllocation *constantSurface; GraphicsAllocation *globalSurface; @@ -340,8 +318,6 @@ class Program : public BaseObject<_cl_program> { std::string options; std::string internalOptions; static const std::vector internalOptionsToExtract; - std::string hashFileName; - std::string hashFilePath; uint32_t programOptionVersion; bool allowNonUniform; @@ -356,6 +332,7 @@ class Program : public BaseObject<_cl_program> { CIF::RAII::UPtr_t specConstantsSizes; CIF::RAII::UPtr_t specConstantsValues; + BlockKernelManager *blockKernelManager; ExecutionEnvironment &executionEnvironment; Context *context; Device *pDevice; @@ -363,6 +340,5 @@ class Program : public BaseObject<_cl_program> { bool isBuiltIn; bool kernelDebugEnabled = false; - friend class OfflineCompiler; }; } // namespace NEO diff --git a/unit_tests/api/cl_build_program_tests.inl b/unit_tests/api/cl_build_program_tests.inl index ac9c442090..5182661d28 100644 --- a/unit_tests/api/cl_build_program_tests.inl +++ b/unit_tests/api/cl_build_program_tests.inl @@ -9,6 +9,7 @@ #include "core/helpers/file_io.h" #include "runtime/context/context.h" #include "runtime/device/device.h" +#include "runtime/program/kernel_info.h" #include "runtime/program/program.h" #include "unit_tests/helpers/kernel_binary_helper.h" #include "unit_tests/helpers/test_files.h" diff --git a/unit_tests/api/cl_create_kernel_tests.inl b/unit_tests/api/cl_create_kernel_tests.inl index e5988ad567..cb5040e8c2 100644 --- a/unit_tests/api/cl_create_kernel_tests.inl +++ b/unit_tests/api/cl_create_kernel_tests.inl @@ -7,6 +7,7 @@ #include "core/helpers/file_io.h" #include "runtime/context/context.h" +#include "runtime/program/kernel_info.h" #include "unit_tests/helpers/test_files.h" #include "unit_tests/mocks/mock_program.h" diff --git a/unit_tests/compiler_interface/CMakeLists.txt b/unit_tests/compiler_interface/CMakeLists.txt index 97eceae01e..bd6864c797 100644 --- a/unit_tests/compiler_interface/CMakeLists.txt +++ b/unit_tests/compiler_interface/CMakeLists.txt @@ -7,6 +7,10 @@ set(IGDRCL_SRCS_tests_compiler_interface ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/default_cl_cache_config_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/patchtokens_decoder_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/patchtokens_dumper_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/patchtokens_validator_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/patchtokens_tests.h ) get_property(NEO_CORE_COMPILER_INTERFACE_TESTS GLOBAL PROPERTY NEO_CORE_COMPILER_INTERFACE_TESTS) diff --git a/unit_tests/compiler_interface/patchtokens_decoder_tests.cpp b/unit_tests/compiler_interface/patchtokens_decoder_tests.cpp new file mode 100644 index 0000000000..f932755108 --- /dev/null +++ b/unit_tests/compiler_interface/patchtokens_decoder_tests.cpp @@ -0,0 +1,1180 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "core/helpers/hash.h" +#include "runtime/compiler_interface/patchtokens_decoder.h" +#include "test.h" + +#include "patchtokens_tests.h" + +#include + +bool hasEmptyHeaps(const NEO::PatchTokenBinary::KernelFromPatchtokens &kernel) { + return kernel.heaps.generalState.empty() && kernel.heaps.dynamicState.empty() && kernel.heaps.surfaceState.empty(); +} + +bool hasEmptyTokensInfo(const NEO::PatchTokenBinary::KernelFromPatchtokens &kernel) { + auto &toks = kernel.tokens; + bool empty = true; + empty &= nullptr == toks.samplerStateArray; + empty &= nullptr == toks.bindingTableState; + empty &= nullptr == toks.allocateLocalSurface; + empty &= nullptr == toks.mediaVfeState[0]; + empty &= nullptr == toks.mediaVfeState[1]; + empty &= nullptr == toks.mediaInterfaceDescriptorLoad; + empty &= nullptr == toks.interfaceDescriptorData; + empty &= nullptr == toks.threadPayload; + empty &= nullptr == toks.executionEnvironment; + empty &= nullptr == toks.dataParameterStream; + empty &= nullptr == toks.kernelAttributesInfo; + empty &= nullptr == toks.allocateStatelessPrivateSurface; + empty &= nullptr == toks.allocateStatelessConstantMemorySurfaceWithInitialization; + empty &= nullptr == toks.allocateStatelessGlobalMemorySurfaceWithInitialization; + empty &= nullptr == toks.allocateStatelessPrintfSurface; + empty &= nullptr == toks.allocateStatelessEventPoolSurface; + empty &= nullptr == toks.allocateStatelessDefaultDeviceQueueSurface; + empty &= nullptr == toks.inlineVmeSamplerInfo; + empty &= nullptr == toks.gtpinFreeGrfInfo; + empty &= nullptr == toks.stateSip; + empty &= nullptr == toks.allocateSystemThreadSurface; + empty &= nullptr == toks.gtpinInfo; + empty &= nullptr == toks.programSymbolTable; + empty &= nullptr == toks.programRelocationTable; + empty &= toks.kernelArgs.empty(); + empty &= toks.strings.empty(); + for (int i = 0; i < 3; ++i) { + empty &= nullptr == toks.crossThreadPayloadArgs.localWorkSize[i]; + empty &= nullptr == toks.crossThreadPayloadArgs.localWorkSize[i]; + empty &= nullptr == toks.crossThreadPayloadArgs.localWorkSize2[i]; + empty &= nullptr == toks.crossThreadPayloadArgs.enqueuedLocalWorkSize[i]; + empty &= nullptr == toks.crossThreadPayloadArgs.numWorkGroups[i]; + empty &= nullptr == toks.crossThreadPayloadArgs.globalWorkOffset[i]; + empty &= nullptr == toks.crossThreadPayloadArgs.globalWorkSize[i]; + } + empty &= nullptr == toks.crossThreadPayloadArgs.maxWorkGroupSize; + empty &= nullptr == toks.crossThreadPayloadArgs.workDimensions; + empty &= nullptr == toks.crossThreadPayloadArgs.simdSize; + empty &= nullptr == toks.crossThreadPayloadArgs.parentEvent; + empty &= nullptr == toks.crossThreadPayloadArgs.privateMemoryStatelessSize; + empty &= nullptr == toks.crossThreadPayloadArgs.localMemoryStatelessWindowSize; + empty &= nullptr == toks.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress; + empty &= nullptr == toks.crossThreadPayloadArgs.preferredWorkgroupMultiple; + empty &= toks.crossThreadPayloadArgs.childBlockSimdSize.empty(); + return empty; +} + +bool hasEmptyTokensInfo(const NEO::PatchTokenBinary::ProgramFromPatchtokens &program) { + auto &toks = program.programScopeTokens; + bool empty = true; + empty &= toks.allocateConstantMemorySurface.empty(); + empty &= toks.allocateGlobalMemorySurface.empty(); + empty &= toks.constantPointer.empty(); + empty &= toks.globalPointer.empty(); + empty &= nullptr == toks.symbolTable; + return empty; +} + +template +uint32_t pushBackToken(iOpenCL::PATCH_TOKEN token, std::vector &storage) { + auto offset = storage.size(); + TokenT tok = PatchTokensTestData::initToken(token); + storage.insert(storage.end(), reinterpret_cast(&tok), reinterpret_cast((&tok) + 1)); + return static_cast(offset); +} + +template +uint32_t pushBackToken(const TokenT &token, std::vector &storage) { + auto offset = storage.size(); + storage.insert(storage.end(), reinterpret_cast(&token), reinterpret_cast(&token) + token.Size); + return static_cast(offset); +} + +uint32_t pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_TOKEN type, std::vector &storage, uint32_t sourceIndex = 0, uint32_t argNum = 0) { + iOpenCL::SPatchDataParameterBuffer tok = PatchTokensTestData::initDataParameterBufferToken(type, sourceIndex, argNum); + + auto offset = storage.size(); + storage.insert(storage.end(), reinterpret_cast(&tok), reinterpret_cast(&tok) + tok.Size); + return static_cast(offset); +} + +bool tokenOffsetMatched(const uint8_t *base, size_t tokenOffset, const iOpenCL::SPatchItemHeader *expectedToken) { + return (base + tokenOffset) == reinterpret_cast(expectedToken); +} + +TEST(GetInlineData, GivenConstantMemorySurfaceTokenThenReturnProperOffsetToInlineData) { + iOpenCL::SPatchAllocateConstantMemorySurfaceProgramBinaryInfo surfTok[2]; + EXPECT_EQ(reinterpret_cast(&surfTok[1]), NEO::PatchTokenBinary::getInlineData(&surfTok[0])); +} + +TEST(GetInlineData, GivenGlobalMemorySurfaceTokenThenReturnProperOffsetToInlineData) { + iOpenCL::SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo surfTok[2]; + EXPECT_EQ(reinterpret_cast(&surfTok[1]), NEO::PatchTokenBinary::getInlineData(&surfTok[0])); +} + +TEST(GetInlineData, GivenStringTokenThenReturnProperOffsetToInlineData) { + iOpenCL::SPatchString surfTok[2]; + EXPECT_EQ(reinterpret_cast(&surfTok[1]), NEO::PatchTokenBinary::getInlineData(&surfTok[0])); +} + +TEST(GetInlineData, GivenKernelArgumentInfoTokenThenReturnDecodedInlineData) { + std::vector storage; + std::string addressQualifier = "__global"; + std::string accessQualifier = "read_write"; + std::string argName = "custom_arg"; + std::string typeName = "int*;"; + std::string typeQualifier = "const"; + + PatchTokensTestData::pushBackArgInfoToken(storage, 0U, addressQualifier, accessQualifier, argName, typeName, typeQualifier); + + auto inlineData = NEO::PatchTokenBinary::getInlineData(reinterpret_cast(storage.data())); + EXPECT_STREQ(addressQualifier.c_str(), std::string(inlineData.addressQualifier.begin(), inlineData.addressQualifier.end()).c_str()); + EXPECT_STREQ(accessQualifier.c_str(), std::string(inlineData.accessQualifier.begin(), inlineData.accessQualifier.end()).c_str()); + EXPECT_STREQ(argName.c_str(), std::string(inlineData.argName.begin(), inlineData.argName.end()).c_str()); + EXPECT_STREQ(typeName.c_str(), std::string(inlineData.typeName.begin(), inlineData.typeName.end()).c_str()); + EXPECT_STREQ(typeQualifier.c_str(), std::string(inlineData.typeQualifiers.begin(), inlineData.typeQualifiers.end()).c_str()); +} + +TEST(GetInlineData, GivenKernelArgumentInfoTokenWhenNotEnoughDataThenArrayIsBoundsProtected) { + iOpenCL::SPatchKernelArgumentInfo tokInline = {}; + tokInline.AddressQualifierSize = 4; + tokInline.AccessQualifierSize = 8; + tokInline.ArgumentNameSize = 32; + tokInline.TypeNameSize = 16; + tokInline.TypeQualifierSize = 6; + tokInline.Size = sizeof(iOpenCL::SPatchKernelArgumentInfo); + auto inlineData = NEO::PatchTokenBinary::getInlineData(&tokInline); + EXPECT_EQ(0U, inlineData.addressQualifier.size()); + EXPECT_EQ(0U, inlineData.accessQualifier.size()); + EXPECT_EQ(0U, inlineData.argName.size()); + EXPECT_EQ(0U, inlineData.typeName.size()); + EXPECT_EQ(0U, inlineData.typeQualifiers.size()); +} + +TEST(KernelChecksum, GivenKernelBlobThenChecksumIsCalculatedBasedOnDataAfterKernelHeader) { + std::vector storage; + auto kernel = PatchTokensTestData::ValidEmptyKernel::create(storage); + auto calculatedChecksum = NEO::PatchTokenBinary::calcKernelChecksum(kernel.blobs.kernelInfo); + + auto dataToHash = ArrayRef(ptrOffset(storage.data(), sizeof(iOpenCL::SKernelBinaryHeaderCommon)), ptrOffset(storage.data(), storage.size())); + uint64_t hashValue = NEO::Hash::hash(reinterpret_cast(dataToHash.begin()), dataToHash.size()); + uint32_t expectedChecksum = hashValue & 0xFFFFFFFF; + EXPECT_EQ(expectedChecksum, calculatedChecksum); +} + +TEST(KernelChecksum, GivenKernelWithProperChecksumThenValidationSucceeds) { + std::vector storage; + auto kernel = PatchTokensTestData::ValidEmptyKernel::create(storage); + auto calculatedChecksum = NEO::PatchTokenBinary::calcKernelChecksum(kernel.blobs.kernelInfo); + EXPECT_EQ(kernel.header->CheckSum, calculatedChecksum); + EXPECT_FALSE(NEO::PatchTokenBinary::hasInvalidChecksum(kernel)); +} + +TEST(KernelChecksum, GivenKernelWithInvalidChecksumThenValidationFails) { + std::vector storage; + auto kernel = PatchTokensTestData::ValidEmptyKernel::create(storage); + auto calculatedChecksum = NEO::PatchTokenBinary::calcKernelChecksum(kernel.blobs.kernelInfo); + EXPECT_EQ(kernel.header->CheckSum, calculatedChecksum); + + ASSERT_EQ(storage.data(), kernel.blobs.kernelInfo.begin()); + reinterpret_cast(storage.data())->CheckSum += 1; + + EXPECT_TRUE(NEO::PatchTokenBinary::hasInvalidChecksum(kernel)); +} + +TEST(KernelDecoder, GivenValidEmptyKernelThenDecodingOfHeaderSucceeds) { + std::vector storage; + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(kernelToEncode.blobs.kernelInfo, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + ASSERT_NE(nullptr, decodedKernel.header); + EXPECT_EQ(kernelToEncode.header, decodedKernel.header); + EXPECT_EQ(kernelToEncode.name, decodedKernel.name); + EXPECT_EQ(kernelToEncode.blobs.kernelInfo, decodedKernel.blobs.kernelInfo); + EXPECT_EQ(0U, decodedKernel.isa.size()); + EXPECT_TRUE(hasEmptyHeaps(decodedKernel)); + EXPECT_EQ(0U, decodedKernel.unhandledTokens.size()); + EXPECT_TRUE(hasEmptyTokensInfo(decodedKernel)); +} + +TEST(KernelDecoder, GivenEmptyKernelWhenBlobSmallerThanKernelHeaderThenDecodingFails) { + std::vector storage; + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + auto brokenBlob = ArrayRef(kernelToEncode.blobs.kernelInfo.begin(), + kernelToEncode.blobs.kernelInfo.begin() + sizeof(iOpenCL::SKernelBinaryHeader) - 1); + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(brokenBlob, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); +} + +TEST(KernelDecoder, GivenValidKernelWithHeapsThenDecodingSucceedsAndHeapsAreProperlySet) { + std::vector storage; + storage.reserve(512); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + + size_t isaOffset = storage.size(); + kernelHeader->KernelHeapSize = 16U; + storage.resize(storage.size() + kernelHeader->KernelHeapSize); + + size_t generalStateHeapOffset = storage.size(); + kernelHeader->GeneralStateHeapSize = 24U; + storage.resize(storage.size() + kernelHeader->GeneralStateHeapSize); + + size_t dynamicStateHeapOffset = storage.size(); + kernelHeader->DynamicStateHeapSize = 8U; + storage.resize(storage.size() + kernelHeader->DynamicStateHeapSize); + + size_t surfaceStateHeapOffset = storage.size(); + kernelHeader->SurfaceStateHeapSize = 32U; + storage.resize(storage.size() + kernelHeader->SurfaceStateHeapSize); + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + + EXPECT_EQ(kernelToEncode.header, decodedKernel.header); + EXPECT_EQ(0U, decodedKernel.unhandledTokens.size()); + EXPECT_TRUE(hasEmptyTokensInfo(decodedKernel)); + + EXPECT_EQ(kernelToEncode.name, decodedKernel.name); + EXPECT_EQ(ArrayRef(storage.data() + isaOffset, kernelHeader->KernelHeapSize), decodedKernel.isa); + EXPECT_EQ(ArrayRef(storage.data() + generalStateHeapOffset, kernelHeader->GeneralStateHeapSize), decodedKernel.heaps.generalState); + EXPECT_EQ(ArrayRef(storage.data() + dynamicStateHeapOffset, kernelHeader->DynamicStateHeapSize), decodedKernel.heaps.dynamicState); + EXPECT_EQ(ArrayRef(storage.data() + surfaceStateHeapOffset, kernelHeader->SurfaceStateHeapSize), decodedKernel.heaps.surfaceState); +} + +TEST(KernelDecoder, GivenEmptyKernelWhenBlobDoesntHaveEnoughSpaceForHeaderDataThenDecodingFails) { + std::vector storage; + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + iOpenCL::SKernelBinaryHeaderCommon originalHeader = *kernelToEncode.header; + uint32_t outOfBoundsSize = static_cast(storage.size()); + + decodedKernel = {}; + kernelHeader->KernelNameSize = outOfBoundsSize; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(kernelToEncode.blobs.kernelInfo, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + kernelHeader->KernelNameSize = originalHeader.KernelNameSize; + + kernelHeader->KernelHeapSize = outOfBoundsSize; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(kernelToEncode.blobs.kernelInfo, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + kernelHeader->KernelHeapSize = originalHeader.KernelHeapSize; + + kernelHeader->GeneralStateHeapSize = outOfBoundsSize; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(kernelToEncode.blobs.kernelInfo, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + kernelHeader->GeneralStateHeapSize = originalHeader.GeneralStateHeapSize; + + kernelHeader->DynamicStateHeapSize = outOfBoundsSize; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(kernelToEncode.blobs.kernelInfo, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + kernelHeader->DynamicStateHeapSize = originalHeader.DynamicStateHeapSize; + + kernelHeader->SurfaceStateHeapSize = outOfBoundsSize; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(kernelToEncode.blobs.kernelInfo, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + kernelHeader->SurfaceStateHeapSize = originalHeader.SurfaceStateHeapSize; + + kernelHeader->PatchListSize = outOfBoundsSize; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(kernelToEncode.blobs.kernelInfo, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + kernelHeader->PatchListSize = originalHeader.PatchListSize; +} + +TEST(KernelDecoder, GivenKernelWithValidKernelPatchtokensThenDecodingSucceedsAndTokensAreProperlyAssigned) { + using namespace iOpenCL; + + std::vector storage; + storage.reserve(1024); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + auto samplerStateArrayOff = pushBackToken(PATCH_TOKEN_SAMPLER_STATE_ARRAY, storage); + auto bindingTableStateOff = pushBackToken(PATCH_TOKEN_BINDING_TABLE_STATE, storage); + auto allocateLocalSurfaceOff = pushBackToken(PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE, storage); + auto mediaVfeState0Off = pushBackToken(PATCH_TOKEN_MEDIA_VFE_STATE, storage); + auto mediaVfeState1Off = pushBackToken(PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1, storage); + auto mediaInterfaceDescriptorLoadOff = pushBackToken(PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD, storage); + auto interfaceDescriptorDataOff = pushBackToken(PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA, storage); + auto threadPayloadOff = pushBackToken(PATCH_TOKEN_THREAD_PAYLOAD, storage); + auto executionEnvironmentOff = pushBackToken(PATCH_TOKEN_EXECUTION_ENVIRONMENT, storage); + auto kernelAttributesInfoOff = pushBackToken(PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO, storage); + auto allocatedStatelessPrivateMemoryOff = pushBackToken(PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY, storage); + auto allocateStatelessConstantMemorySurfaceWithInitializationOff = pushBackToken(PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION, storage); + auto allocateStatelessGlobalMemorySurfaceWithInitializationOff = pushBackToken(PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION, storage); + auto allocateStatelessPrintfSurfaceOff = pushBackToken(PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE, storage); + auto allocateStatelessEventPoolSurfaceOff = pushBackToken(PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE, storage); + auto allocateStatelessDefaultDeviceQueueSurfaceOff = pushBackToken(PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE, storage); + auto inlineVmeSamplerInfoOff = pushBackToken(PATCH_TOKEN_INLINE_VME_SAMPLER_INFO, storage); + auto gtpinFreeGrfInfoOff = pushBackToken(PATCH_TOKEN_GTPIN_FREE_GRF_INFO, storage); + auto gtpinInfoOff = pushBackToken(PATCH_TOKEN_GTPIN_INFO, storage); + auto stateSipOff = pushBackToken(PATCH_TOKEN_STATE_SIP, storage); + auto programSymbolTableOff = pushBackToken(PATCH_TOKEN_PROGRAM_SYMBOL_TABLE, storage); + auto programRelocationTableOff = pushBackToken(PATCH_TOKEN_PROGRAM_RELOCATION_TABLE, storage); + auto dataParameterStreamOff = pushBackToken(PATCH_TOKEN_DATA_PARAMETER_STREAM, storage); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); + EXPECT_EQ(ptrOffset(storage.data(), patchListOffset), decodedKernel.blobs.patchList.begin()); + EXPECT_EQ(ptrOffset(storage.data(), storage.size()), decodedKernel.blobs.patchList.end()); + + auto base = storage.data(); + EXPECT_TRUE(tokenOffsetMatched(base, samplerStateArrayOff, decodedKernel.tokens.samplerStateArray)); + EXPECT_TRUE(tokenOffsetMatched(base, bindingTableStateOff, decodedKernel.tokens.bindingTableState)); + EXPECT_TRUE(tokenOffsetMatched(base, allocateLocalSurfaceOff, decodedKernel.tokens.allocateLocalSurface)); + EXPECT_TRUE(tokenOffsetMatched(base, mediaVfeState0Off, decodedKernel.tokens.mediaVfeState[0])); + EXPECT_TRUE(tokenOffsetMatched(base, mediaVfeState1Off, decodedKernel.tokens.mediaVfeState[1])); + EXPECT_TRUE(tokenOffsetMatched(base, mediaInterfaceDescriptorLoadOff, decodedKernel.tokens.mediaInterfaceDescriptorLoad)); + EXPECT_TRUE(tokenOffsetMatched(base, interfaceDescriptorDataOff, decodedKernel.tokens.interfaceDescriptorData)); + EXPECT_TRUE(tokenOffsetMatched(base, threadPayloadOff, decodedKernel.tokens.threadPayload)); + EXPECT_TRUE(tokenOffsetMatched(base, executionEnvironmentOff, decodedKernel.tokens.executionEnvironment)); + EXPECT_TRUE(tokenOffsetMatched(base, kernelAttributesInfoOff, decodedKernel.tokens.kernelAttributesInfo)); + EXPECT_TRUE(tokenOffsetMatched(base, allocatedStatelessPrivateMemoryOff, decodedKernel.tokens.allocateStatelessPrivateSurface)); + EXPECT_TRUE(tokenOffsetMatched(base, allocateStatelessConstantMemorySurfaceWithInitializationOff, decodedKernel.tokens.allocateStatelessConstantMemorySurfaceWithInitialization)); + EXPECT_TRUE(tokenOffsetMatched(base, allocateStatelessGlobalMemorySurfaceWithInitializationOff, decodedKernel.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization)); + EXPECT_TRUE(tokenOffsetMatched(base, allocateStatelessPrintfSurfaceOff, decodedKernel.tokens.allocateStatelessPrintfSurface)); + EXPECT_TRUE(tokenOffsetMatched(base, allocateStatelessEventPoolSurfaceOff, decodedKernel.tokens.allocateStatelessEventPoolSurface)); + EXPECT_TRUE(tokenOffsetMatched(base, allocateStatelessDefaultDeviceQueueSurfaceOff, decodedKernel.tokens.allocateStatelessDefaultDeviceQueueSurface)); + EXPECT_TRUE(tokenOffsetMatched(base, inlineVmeSamplerInfoOff, decodedKernel.tokens.inlineVmeSamplerInfo)); + EXPECT_TRUE(tokenOffsetMatched(base, gtpinFreeGrfInfoOff, decodedKernel.tokens.gtpinFreeGrfInfo)); + EXPECT_TRUE(tokenOffsetMatched(base, gtpinInfoOff, decodedKernel.tokens.gtpinInfo)); + EXPECT_TRUE(tokenOffsetMatched(base, stateSipOff, decodedKernel.tokens.stateSip)); + EXPECT_TRUE(tokenOffsetMatched(base, programSymbolTableOff, decodedKernel.tokens.programSymbolTable)); + EXPECT_TRUE(tokenOffsetMatched(base, programRelocationTableOff, decodedKernel.tokens.programRelocationTable)); + EXPECT_TRUE(tokenOffsetMatched(base, dataParameterStreamOff, decodedKernel.tokens.dataParameterStream)); +} + +TEST(KernelDecoder, GivenKernelWithValidStringPatchtokensThenDecodingSucceedsAndTokensAreProperlyAssigned) { + std::vector storage; + storage.reserve(512); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchString stringTok = {}; + stringTok.Token = iOpenCL::PATCH_TOKEN::PATCH_TOKEN_STRING; + + auto patchListOffset = static_cast(storage.size()); + auto string1Off = PatchTokensTestData::pushBackStringToken("str1", 1, storage); + auto string2Off = PatchTokensTestData::pushBackStringToken("str2", 2, storage); + auto string0Off = PatchTokensTestData::pushBackStringToken("str0", 0, storage); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); + + auto base = storage.data(); + ASSERT_EQ(3U, decodedKernel.tokens.strings.size()); + EXPECT_TRUE(tokenOffsetMatched(base, string0Off, decodedKernel.tokens.strings[0])); + EXPECT_TRUE(tokenOffsetMatched(base, string1Off, decodedKernel.tokens.strings[1])); + EXPECT_TRUE(tokenOffsetMatched(base, string2Off, decodedKernel.tokens.strings[2])); +} + +TEST(KernelDecoder, GivenKernelWithValidArgInfoPatchtokensThenDecodingSucceedsAndTokensAreProperlyAssigned) { + std::vector storage; + storage.reserve(512); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchKernelArgumentInfo argInfoTok = {}; + argInfoTok.Token = iOpenCL::PATCH_TOKEN::PATCH_TOKEN_KERNEL_ARGUMENT_INFO; + + auto patchListOffset = static_cast(storage.size()); + + auto arg1Off = static_cast(storage.size()); + argInfoTok.ArgumentNumber = 1; + auto additionalDataSize = 8; + argInfoTok.Size = sizeof(argInfoTok) + additionalDataSize; + storage.insert(storage.end(), reinterpret_cast(&argInfoTok), reinterpret_cast((&argInfoTok) + 1)); + storage.resize(storage.size() + additionalDataSize); + + auto arg2Off = static_cast(storage.size()); + argInfoTok.ArgumentNumber = 2; + additionalDataSize = 16; + argInfoTok.Size = sizeof(argInfoTok) + additionalDataSize; + storage.insert(storage.end(), reinterpret_cast(&argInfoTok), reinterpret_cast((&argInfoTok) + 1)); + storage.resize(storage.size() + additionalDataSize); + + auto arg0Off = static_cast(storage.size()); + argInfoTok.ArgumentNumber = 0; + additionalDataSize = 24; + argInfoTok.Size = sizeof(argInfoTok) + additionalDataSize; + storage.insert(storage.end(), reinterpret_cast(&argInfoTok), reinterpret_cast((&argInfoTok) + 1)); + storage.resize(storage.size() + additionalDataSize); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); + + auto base = storage.data(); + ASSERT_EQ(3U, decodedKernel.tokens.kernelArgs.size()); + EXPECT_TRUE(tokenOffsetMatched(base, arg0Off, decodedKernel.tokens.kernelArgs[0].argInfo)); + EXPECT_TRUE(tokenOffsetMatched(base, arg1Off, decodedKernel.tokens.kernelArgs[1].argInfo)); + EXPECT_TRUE(tokenOffsetMatched(base, arg2Off, decodedKernel.tokens.kernelArgs[2].argInfo)); +} + +TEST(KernelDecoder, GivenKernelWithValidObjectArgPatchtokensThenDecodingSucceedsAndTokensAreProperlyAssigned) { + std::vector storage; + storage.reserve(512); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + + iOpenCL::SPatchSamplerKernelArgument samplerTok = {}; + samplerTok.Token = iOpenCL::PATCH_TOKEN::PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT; + samplerTok.Size = sizeof(samplerTok); + samplerTok.ArgumentNumber = 3; + + iOpenCL::SPatchImageMemoryObjectKernelArgument imageTok = {}; + imageTok.Token = iOpenCL::PATCH_TOKEN::PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT; + imageTok.Size = sizeof(imageTok); + imageTok.ArgumentNumber = 1; + + iOpenCL::SPatchGlobalMemoryObjectKernelArgument globalMemTok = {}; + globalMemTok.Token = iOpenCL::PATCH_TOKEN::PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; + globalMemTok.Size = sizeof(globalMemTok); + globalMemTok.ArgumentNumber = 2; + + iOpenCL::SPatchStatelessGlobalMemoryObjectKernelArgument statelessGlobalMemTok = {}; + statelessGlobalMemTok.Token = iOpenCL::PATCH_TOKEN::PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; + statelessGlobalMemTok.Size = sizeof(statelessGlobalMemTok); + statelessGlobalMemTok.ArgumentNumber = 0; + + iOpenCL::SPatchStatelessConstantMemoryObjectKernelArgument statelessConstantMemTok = {}; + statelessConstantMemTok.Token = iOpenCL::PATCH_TOKEN::PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT; + statelessConstantMemTok.Size = sizeof(statelessConstantMemTok); + statelessConstantMemTok.ArgumentNumber = 5; + + iOpenCL::SPatchStatelessDeviceQueueKernelArgument statelessDeviceQueueTok = {}; + statelessDeviceQueueTok.Token = iOpenCL::PATCH_TOKEN::PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT; + statelessDeviceQueueTok.Size = sizeof(iOpenCL::SPatchGlobalMemoryObjectKernelArgument); + statelessDeviceQueueTok.ArgumentNumber = 4; + + auto samplerOff = pushBackToken(samplerTok, storage); + auto imageOff = pushBackToken(imageTok, storage); + auto globalMemOff = pushBackToken(globalMemTok, storage); + auto statelessGlobalMemOff = pushBackToken(statelessGlobalMemTok, storage); + auto statelessConstantMemOff = pushBackToken(statelessConstantMemTok, storage); + auto statelessDeviceQueueOff = pushBackToken(statelessDeviceQueueTok, storage); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); + + auto base = storage.data(); + ASSERT_EQ(6U, decodedKernel.tokens.kernelArgs.size()); + EXPECT_TRUE(tokenOffsetMatched(base, samplerOff, decodedKernel.tokens.kernelArgs[samplerTok.ArgumentNumber].objectArg)); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectType::Sampler, decodedKernel.tokens.kernelArgs[samplerTok.ArgumentNumber].objectType); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[samplerTok.ArgumentNumber].objectTypeSpecialized); + + EXPECT_TRUE(tokenOffsetMatched(base, imageOff, decodedKernel.tokens.kernelArgs[imageTok.ArgumentNumber].objectArg)); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectType::Image, decodedKernel.tokens.kernelArgs[imageTok.ArgumentNumber].objectType); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[imageTok.ArgumentNumber].objectTypeSpecialized); + + EXPECT_TRUE(tokenOffsetMatched(base, globalMemOff, decodedKernel.tokens.kernelArgs[globalMemTok.ArgumentNumber].objectArg)); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectType::Buffer, decodedKernel.tokens.kernelArgs[globalMemTok.ArgumentNumber].objectType); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[globalMemTok.ArgumentNumber].objectTypeSpecialized); + + EXPECT_TRUE(tokenOffsetMatched(base, statelessGlobalMemOff, decodedKernel.tokens.kernelArgs[statelessGlobalMemTok.ArgumentNumber].objectArg)); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectType::Buffer, decodedKernel.tokens.kernelArgs[statelessGlobalMemTok.ArgumentNumber].objectType); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[statelessGlobalMemTok.ArgumentNumber].objectTypeSpecialized); + + EXPECT_TRUE(tokenOffsetMatched(base, statelessConstantMemOff, decodedKernel.tokens.kernelArgs[statelessConstantMemTok.ArgumentNumber].objectArg)); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectType::Buffer, decodedKernel.tokens.kernelArgs[statelessConstantMemTok.ArgumentNumber].objectType); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[statelessConstantMemTok.ArgumentNumber].objectTypeSpecialized); + + EXPECT_TRUE(tokenOffsetMatched(base, statelessDeviceQueueOff, decodedKernel.tokens.kernelArgs[statelessDeviceQueueTok.ArgumentNumber].objectArg)); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectType::Buffer, decodedKernel.tokens.kernelArgs[statelessDeviceQueueTok.ArgumentNumber].objectType); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[statelessDeviceQueueTok.ArgumentNumber].objectTypeSpecialized); + + for (int i = 0; i < 6; ++i) { + EXPECT_EQ(nullptr, decodedKernel.tokens.kernelArgs[i].argInfo); + EXPECT_EQ(0U, decodedKernel.tokens.kernelArgs[i].byValMap.size()); + EXPECT_EQ(nullptr, decodedKernel.tokens.kernelArgs[i].objectId); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[i].objectTypeSpecialized); + } +} + +TEST(KernelDecoder, GivenKernelWithValidNonArgCrossThreadDataPatchtokensThenDecodingSucceedsAndTokensAreProperlyAssigned) { + using namespace iOpenCL; + + std::vector storage; + storage.reserve(2048); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + auto localWorkSize0Off = pushBackDataParameterToken(DATA_PARAMETER_LOCAL_WORK_SIZE, storage, 0U); + auto localWorkSize20Off = pushBackDataParameterToken(DATA_PARAMETER_LOCAL_WORK_SIZE, storage, 0U); + auto localWorkSize1Off = pushBackDataParameterToken(DATA_PARAMETER_LOCAL_WORK_SIZE, storage, 1U); + auto localWorkSize2Off = pushBackDataParameterToken(DATA_PARAMETER_LOCAL_WORK_SIZE, storage, 2U); + auto localWorkSize21Off = pushBackDataParameterToken(DATA_PARAMETER_LOCAL_WORK_SIZE, storage, 1U); + auto localWorkSize22Off = pushBackDataParameterToken(DATA_PARAMETER_LOCAL_WORK_SIZE, storage, 2U); + auto globalWorkOffset0Off = pushBackDataParameterToken(DATA_PARAMETER_GLOBAL_WORK_OFFSET, storage, 0U); + auto globalWorkOffset1Off = pushBackDataParameterToken(DATA_PARAMETER_GLOBAL_WORK_OFFSET, storage, 1U); + auto globalWorkOffset2Off = pushBackDataParameterToken(DATA_PARAMETER_GLOBAL_WORK_OFFSET, storage, 2U); + auto enqueuedLocalWorkSize0Off = pushBackDataParameterToken(DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE, storage, 0U); + auto enqueuedLocalWorkSize1Off = pushBackDataParameterToken(DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE, storage, 1U); + auto enqueuedLocalWorkSize2Off = pushBackDataParameterToken(DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE, storage, 2U); + auto globalWorkSize0Off = pushBackDataParameterToken(DATA_PARAMETER_GLOBAL_WORK_SIZE, storage, 0U); + auto globalWorkSize1Off = pushBackDataParameterToken(DATA_PARAMETER_GLOBAL_WORK_SIZE, storage, 1U); + auto globalWorkSize2Off = pushBackDataParameterToken(DATA_PARAMETER_GLOBAL_WORK_SIZE, storage, 2U); + auto numWorkGroups0Off = pushBackDataParameterToken(DATA_PARAMETER_NUM_WORK_GROUPS, storage, 0U); + auto numWorkGroups1Off = pushBackDataParameterToken(DATA_PARAMETER_NUM_WORK_GROUPS, storage, 1U); + auto numWorkGroups2Off = pushBackDataParameterToken(DATA_PARAMETER_NUM_WORK_GROUPS, storage, 2U); + auto maxWorkGroupsOff = pushBackDataParameterToken(DATA_PARAMETER_MAX_WORKGROUP_SIZE, storage); + auto workDimensionsOff = pushBackDataParameterToken(DATA_PARAMETER_WORK_DIMENSIONS, storage); + auto simdSizeOff = pushBackDataParameterToken(DATA_PARAMETER_SIMD_SIZE, storage); + auto privateMemoryStatelessSizeOff = pushBackDataParameterToken(DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE, storage); + auto localMemoryStatelessWindowSizeOff = pushBackDataParameterToken(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE, storage); + auto localMemoryStatelessWindowStartAddrOff = pushBackDataParameterToken(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS, storage); + auto parentEventOff = pushBackDataParameterToken(DATA_PARAMETER_PARENT_EVENT, storage); + auto preferredWorkgroupMultipleOff = pushBackDataParameterToken(DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE, storage); + auto childBlockSimdSize0Off = pushBackDataParameterToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, storage); + auto childBlockSimdSize1Off = pushBackDataParameterToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, storage); + auto childBlockSimdSize2Off = pushBackDataParameterToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, storage); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); + + auto base = storage.data(); + EXPECT_TRUE(tokenOffsetMatched(base, localWorkSize0Off, decodedKernel.tokens.crossThreadPayloadArgs.localWorkSize[0])); + EXPECT_TRUE(tokenOffsetMatched(base, localWorkSize20Off, decodedKernel.tokens.crossThreadPayloadArgs.localWorkSize2[0])); + EXPECT_TRUE(tokenOffsetMatched(base, localWorkSize1Off, decodedKernel.tokens.crossThreadPayloadArgs.localWorkSize[1])); + EXPECT_TRUE(tokenOffsetMatched(base, localWorkSize2Off, decodedKernel.tokens.crossThreadPayloadArgs.localWorkSize[2])); + EXPECT_TRUE(tokenOffsetMatched(base, localWorkSize21Off, decodedKernel.tokens.crossThreadPayloadArgs.localWorkSize2[1])); + EXPECT_TRUE(tokenOffsetMatched(base, localWorkSize22Off, decodedKernel.tokens.crossThreadPayloadArgs.localWorkSize2[2])); + EXPECT_TRUE(tokenOffsetMatched(base, globalWorkOffset0Off, decodedKernel.tokens.crossThreadPayloadArgs.globalWorkOffset[0])); + EXPECT_TRUE(tokenOffsetMatched(base, globalWorkOffset1Off, decodedKernel.tokens.crossThreadPayloadArgs.globalWorkOffset[1])); + EXPECT_TRUE(tokenOffsetMatched(base, globalWorkOffset2Off, decodedKernel.tokens.crossThreadPayloadArgs.globalWorkOffset[2])); + EXPECT_TRUE(tokenOffsetMatched(base, enqueuedLocalWorkSize0Off, decodedKernel.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[0])); + EXPECT_TRUE(tokenOffsetMatched(base, enqueuedLocalWorkSize1Off, decodedKernel.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[1])); + EXPECT_TRUE(tokenOffsetMatched(base, enqueuedLocalWorkSize2Off, decodedKernel.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[2])); + EXPECT_TRUE(tokenOffsetMatched(base, globalWorkSize0Off, decodedKernel.tokens.crossThreadPayloadArgs.globalWorkSize[0])); + EXPECT_TRUE(tokenOffsetMatched(base, globalWorkSize1Off, decodedKernel.tokens.crossThreadPayloadArgs.globalWorkSize[1])); + EXPECT_TRUE(tokenOffsetMatched(base, globalWorkSize2Off, decodedKernel.tokens.crossThreadPayloadArgs.globalWorkSize[2])); + EXPECT_TRUE(tokenOffsetMatched(base, numWorkGroups0Off, decodedKernel.tokens.crossThreadPayloadArgs.numWorkGroups[0])); + EXPECT_TRUE(tokenOffsetMatched(base, numWorkGroups1Off, decodedKernel.tokens.crossThreadPayloadArgs.numWorkGroups[1])); + EXPECT_TRUE(tokenOffsetMatched(base, numWorkGroups2Off, decodedKernel.tokens.crossThreadPayloadArgs.numWorkGroups[2])); + EXPECT_TRUE(tokenOffsetMatched(base, maxWorkGroupsOff, decodedKernel.tokens.crossThreadPayloadArgs.maxWorkGroupSize)); + EXPECT_TRUE(tokenOffsetMatched(base, workDimensionsOff, decodedKernel.tokens.crossThreadPayloadArgs.workDimensions)); + EXPECT_TRUE(tokenOffsetMatched(base, simdSizeOff, decodedKernel.tokens.crossThreadPayloadArgs.simdSize)); + EXPECT_TRUE(tokenOffsetMatched(base, privateMemoryStatelessSizeOff, decodedKernel.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize)); + EXPECT_TRUE(tokenOffsetMatched(base, localMemoryStatelessWindowSizeOff, decodedKernel.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize)); + EXPECT_TRUE(tokenOffsetMatched(base, localMemoryStatelessWindowStartAddrOff, decodedKernel.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress)); + EXPECT_TRUE(tokenOffsetMatched(base, parentEventOff, decodedKernel.tokens.crossThreadPayloadArgs.parentEvent)); + EXPECT_TRUE(tokenOffsetMatched(base, preferredWorkgroupMultipleOff, decodedKernel.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple)); + ASSERT_EQ(3U, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize.size()); + EXPECT_TRUE(tokenOffsetMatched(base, childBlockSimdSize0Off, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize[0])); + EXPECT_TRUE(tokenOffsetMatched(base, childBlockSimdSize1Off, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize[1])); + EXPECT_TRUE(tokenOffsetMatched(base, childBlockSimdSize2Off, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize[2])); +} + +TEST(KernelDecoder, GivenKernelWithArgCrossThreadDataPatchtokensWhenSourceIndexIsGreaterThan2ThenThenDecodingSucceedsButTokenIsMarkedAsUnhandled) { + std::vector storage; + storage.reserve(128); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + + auto localWorkSize3Off = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, storage, 3U); + auto globalWorkOffset3Off = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_GLOBAL_WORK_OFFSET, storage, 3U); + auto enqueuedLocalWorkSize3Off = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE, storage, 3U); + auto globalWorkSize3Off = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_GLOBAL_WORK_SIZE, storage, 3U); + auto numWorkGroups3Off = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_NUM_WORK_GROUPS, storage, 3U); + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + ASSERT_EQ(5U, decodedKernel.unhandledTokens.size()); + + auto base = storage.data(); + EXPECT_TRUE(tokenOffsetMatched(base, localWorkSize3Off, decodedKernel.unhandledTokens[0])); + EXPECT_TRUE(tokenOffsetMatched(base, globalWorkOffset3Off, decodedKernel.unhandledTokens[1])); + EXPECT_TRUE(tokenOffsetMatched(base, enqueuedLocalWorkSize3Off, decodedKernel.unhandledTokens[2])); + EXPECT_TRUE(tokenOffsetMatched(base, globalWorkSize3Off, decodedKernel.unhandledTokens[3])); + EXPECT_TRUE(tokenOffsetMatched(base, numWorkGroups3Off, decodedKernel.unhandledTokens[4])); +} + +TEST(KernelDecoder, GivenKernelWithUnkownPatchtokensThenDecodingSucceedsButTokenIsMarkedAsUnhandled) { + std::vector storage; + storage.reserve(128); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + auto unknownTokOff = pushBackToken(iOpenCL::NUM_PATCH_TOKENS, storage); + auto unknownCrossThreadTokOff = pushBackDataParameterToken(iOpenCL::NUM_DATA_PARAMETER_TOKENS, storage); + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + ASSERT_EQ(2U, decodedKernel.unhandledTokens.size()); + + auto base = storage.data(); + EXPECT_TRUE(tokenOffsetMatched(base, unknownTokOff, decodedKernel.unhandledTokens[0])); + EXPECT_TRUE(tokenOffsetMatched(base, unknownCrossThreadTokOff, decodedKernel.unhandledTokens[1])); +} + +TEST(KernelDecoder, GivenKernelWithValidObjectArgMetadataPatchtokensThenDecodingSucceedsAndTokensAreProperlyAssigned) { + std::vector storage; + storage.reserve(1024); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + + auto arg0ObjectIdOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_OBJECT_ID, storage, 0U, 0U); + auto arg0BufferOffsetOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_BUFFER_OFFSET, storage, 0U, 0U); + auto arg0BufferStatefulOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_BUFFER_STATEFUL, storage, 0U, 0U); + + auto arg1ObjectIdOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_OBJECT_ID, storage, 0U, 1U); + auto arg1ImageWidthOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_IMAGE_WIDTH, storage, 0U, 1U); + auto arg1ImageHeightOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_IMAGE_HEIGHT, storage, 0U, 1U); + auto arg1ImageDepthOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_IMAGE_DEPTH, storage, 0U, 1U); + auto arg1ImageChannelDataTypeOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE, storage, 0U, 1U); + auto arg1ImageChannelOrderOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_ORDER, storage, 0U, 1U); + auto arg1ImageArraySizeOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_IMAGE_ARRAY_SIZE, storage, 0U, 1U); + auto arg1ImageNumSamplesOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_IMAGE_NUM_SAMPLES, storage, 0U, 1U); + auto arg1ImageNumMipLevelOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS, storage, 0U, 1U); + + auto arg2SamplerCoordinateSnapWaRequiredOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED, storage, 0U, 2U); + auto arg2SamplerAddressModeOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE, storage, 0U, 2U); + auto arg2SamplerNormalizedCoordsOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS, storage, 0U, 2U); + + auto arg3SlmTokenOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES, storage, 0U, 3U); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); + + ASSERT_EQ(4U, decodedKernel.tokens.kernelArgs.size()); + ASSERT_EQ(NEO::PatchTokenBinary::ArgObjectType::Buffer, decodedKernel.tokens.kernelArgs[0].objectType); + ASSERT_EQ(NEO::PatchTokenBinary::ArgObjectType::Image, decodedKernel.tokens.kernelArgs[1].objectType); + ASSERT_EQ(NEO::PatchTokenBinary::ArgObjectType::Sampler, decodedKernel.tokens.kernelArgs[2].objectType); + ASSERT_EQ(NEO::PatchTokenBinary::ArgObjectType::Slm, decodedKernel.tokens.kernelArgs[3].objectType); + + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[0].objectTypeSpecialized); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[1].objectTypeSpecialized); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[2].objectTypeSpecialized); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::None, decodedKernel.tokens.kernelArgs[3].objectTypeSpecialized); + + auto base = storage.data(); + EXPECT_TRUE(tokenOffsetMatched(base, arg0ObjectIdOff, decodedKernel.tokens.kernelArgs[0].objectId)); + EXPECT_TRUE(tokenOffsetMatched(base, arg0BufferOffsetOff, decodedKernel.tokens.kernelArgs[0].metadata.buffer.bufferOffset)); + EXPECT_TRUE(tokenOffsetMatched(base, arg0BufferStatefulOff, decodedKernel.tokens.kernelArgs[0].metadata.buffer.pureStateful)); + + EXPECT_TRUE(tokenOffsetMatched(base, arg1ObjectIdOff, decodedKernel.tokens.kernelArgs[1].objectId)); + EXPECT_TRUE(tokenOffsetMatched(base, arg1ImageWidthOff, decodedKernel.tokens.kernelArgs[1].metadata.image.width)); + EXPECT_TRUE(tokenOffsetMatched(base, arg1ImageHeightOff, decodedKernel.tokens.kernelArgs[1].metadata.image.height)); + EXPECT_TRUE(tokenOffsetMatched(base, arg1ImageDepthOff, decodedKernel.tokens.kernelArgs[1].metadata.image.depth)); + EXPECT_TRUE(tokenOffsetMatched(base, arg1ImageChannelDataTypeOff, decodedKernel.tokens.kernelArgs[1].metadata.image.channelDataType)); + EXPECT_TRUE(tokenOffsetMatched(base, arg1ImageChannelOrderOff, decodedKernel.tokens.kernelArgs[1].metadata.image.channelOrder)); + EXPECT_TRUE(tokenOffsetMatched(base, arg1ImageArraySizeOff, decodedKernel.tokens.kernelArgs[1].metadata.image.arraySize)); + EXPECT_TRUE(tokenOffsetMatched(base, arg1ImageNumSamplesOff, decodedKernel.tokens.kernelArgs[1].metadata.image.numSamples)); + EXPECT_TRUE(tokenOffsetMatched(base, arg1ImageNumMipLevelOff, decodedKernel.tokens.kernelArgs[1].metadata.image.numMipLevels)); + + EXPECT_TRUE(tokenOffsetMatched(base, arg2SamplerCoordinateSnapWaRequiredOff, decodedKernel.tokens.kernelArgs[2].metadata.sampler.coordinateSnapWaRequired)); + EXPECT_TRUE(tokenOffsetMatched(base, arg2SamplerAddressModeOff, decodedKernel.tokens.kernelArgs[2].metadata.sampler.addressMode)); + EXPECT_TRUE(tokenOffsetMatched(base, arg2SamplerNormalizedCoordsOff, decodedKernel.tokens.kernelArgs[2].metadata.sampler.normalizedCoords)); + + EXPECT_TRUE(tokenOffsetMatched(base, arg3SlmTokenOff, decodedKernel.tokens.kernelArgs[3].metadata.slm.token)); +} + +TEST(KernelDecoder, GivenKernelWithMismatchedArgMetadataPatchtokensThenDecodingFails) { + std::vector storage; + storage.reserve(128); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + + auto arg0Metadata0Off = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_BUFFER_STATEFUL, storage, 0U, 0U); + auto arg0Metadata1Off = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_BUFFER_OFFSET, storage, 0U, 0U); + + iOpenCL::SPatchDataParameterBuffer *arg0Metadata0 = reinterpret_cast(storage.data() + arg0Metadata0Off); + iOpenCL::SPatchDataParameterBuffer *arg0Metadata1 = reinterpret_cast(storage.data() + arg0Metadata1Off); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_BUFFER_STATEFUL; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_IMAGE_WIDTH; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_BUFFER_STATEFUL; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_BUFFER_STATEFUL; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_IMAGE_WIDTH; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_BUFFER_STATEFUL; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_IMAGE_WIDTH; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_IMAGE_WIDTH; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_BUFFER_STATEFUL; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_IMAGE_WIDTH; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_IMAGE_WIDTH; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + + decodedKernel = {}; + arg0Metadata0->Type = iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES; + arg0Metadata1->Type = iOpenCL::DATA_PARAMETER_BUFFER_STATEFUL; + decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); +} + +TEST(KernelDecoder, GivenKernelWithMismatchedArgMetadataPatchtokensThenDecodingFailsAndStops) { + std::vector storage; + storage.reserve(128); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + + pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_BUFFER_STATEFUL, storage, 0U, 0U); + pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_IMAGE_DEPTH, storage, 0U, 0U); + auto unhandledTokenAfterInvalidOff = pushBackDataParameterToken(iOpenCL::NUM_DATA_PARAMETER_TOKENS, storage, 0U, 0U); + (void)unhandledTokenAfterInvalidOff; + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); +} + +TEST(KernelDecoder, GivenKernelWithByValArgMetadataPatchtokensThenDecodingSucceedsAndTokensAreProperlyAssigned) { + std::vector storage; + storage.reserve(128); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + + auto arg0Val0Off = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT, storage, 0U, 0U); + auto arg0Val1Off = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT, storage, 0U, 0U); + auto arg1SlmOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES, storage, 0U, 1U); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); + + ASSERT_EQ(2U, decodedKernel.tokens.kernelArgs.size()); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectType::None, decodedKernel.tokens.kernelArgs[0].objectType); + ASSERT_EQ(NEO::PatchTokenBinary::ArgObjectType::Slm, decodedKernel.tokens.kernelArgs[1].objectType); + + ASSERT_EQ(2U, decodedKernel.tokens.kernelArgs[0].byValMap.size()); + ASSERT_EQ(1U, decodedKernel.tokens.kernelArgs[1].byValMap.size()); + auto base = storage.data(); + EXPECT_TRUE(tokenOffsetMatched(base, arg0Val0Off, decodedKernel.tokens.kernelArgs[0].byValMap[0])); + EXPECT_TRUE(tokenOffsetMatched(base, arg0Val1Off, decodedKernel.tokens.kernelArgs[0].byValMap[1])); + EXPECT_TRUE(tokenOffsetMatched(base, arg1SlmOff, decodedKernel.tokens.kernelArgs[1].metadata.slm.token)); + EXPECT_TRUE(tokenOffsetMatched(base, arg1SlmOff, decodedKernel.tokens.kernelArgs[1].byValMap[0])); +} + +TEST(KernelDecoder, GivenKernelWithVmeMetadataPatchtokensThenDecodingSucceedsAndTokensAreProperlyAssigned) { + std::vector storage; + storage.reserve(128); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + + auto arg0VmeBlockTypeOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_VME_MB_BLOCK_TYPE, storage, 0U, 0U); + auto arg0VmeSubpixelModeOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_VME_SUBPIXEL_MODE, storage, 0U, 0U); + auto arg0VmeSadAdjustModeOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_VME_SAD_ADJUST_MODE, storage, 0U, 0U); + auto arg0VmeSearchPathTypeOff = pushBackDataParameterToken(iOpenCL::DATA_PARAMETER_VME_SEARCH_PATH_TYPE, storage, 0U, 0U); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); + + ASSERT_EQ(1U, decodedKernel.tokens.kernelArgs.size()); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectType::None, decodedKernel.tokens.kernelArgs[0].objectType); + EXPECT_EQ(NEO::PatchTokenBinary::ArgObjectTypeSpecialized::Vme, decodedKernel.tokens.kernelArgs[0].objectTypeSpecialized); + ; + + auto base = storage.data(); + EXPECT_TRUE(tokenOffsetMatched(base, arg0VmeBlockTypeOff, decodedKernel.tokens.kernelArgs[0].metadataSpecialized.vme.mbBlockType)); + EXPECT_TRUE(tokenOffsetMatched(base, arg0VmeSubpixelModeOff, decodedKernel.tokens.kernelArgs[0].metadataSpecialized.vme.subpixelMode)); + EXPECT_TRUE(tokenOffsetMatched(base, arg0VmeSadAdjustModeOff, decodedKernel.tokens.kernelArgs[0].metadataSpecialized.vme.sadAdjustMode)); + EXPECT_TRUE(tokenOffsetMatched(base, arg0VmeSearchPathTypeOff, decodedKernel.tokens.kernelArgs[0].metadataSpecialized.vme.searchPathType)); +} + +TEST(KernelDecoder, GivenKernelWithOutOfBoundsTokenThenDecodingFails) { + std::vector storage; + storage.reserve(128); + auto kernelToEncode = PatchTokensTestData::ValidEmptyKernel::create(storage); + + auto patchListOffset = static_cast(storage.size()); + pushBackToken(iOpenCL::PATCH_TOKEN_SAMPLER_STATE_ARRAY, storage); + + ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); + auto kernelHeader = reinterpret_cast(storage.data()); + kernelHeader->PatchListSize = static_cast(storage.size()) - patchListOffset; + kernelHeader->PatchListSize -= 1; + + NEO::PatchTokenBinary::KernelFromPatchtokens decodedKernel; + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(storage, decodedKernel); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedKernel.decodeStatus); +} + +TEST(ProgramDecoder, GivenValidEmptyProgramThenDecodingOfHeaderSucceeds) { + std::vector storage; + PatchTokensTestData::ValidEmptyProgram programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedProgram.decodeStatus); + EXPECT_TRUE(decodedProgram.unhandledTokens.empty()); + ASSERT_NE(nullptr, decodedProgram.header); + EXPECT_EQ(programToEncode.header, decodedProgram.header); + EXPECT_EQ(programToEncode.blobs.programInfo, decodedProgram.blobs.programInfo); + EXPECT_TRUE(decodedProgram.blobs.kernelsInfo.empty()); + EXPECT_TRUE(decodedProgram.blobs.patchList.empty()); + EXPECT_TRUE(decodedProgram.kernels.empty()); + EXPECT_TRUE(hasEmptyTokensInfo(decodedProgram)); +} + +TEST(ProgramDecoder, GivenProgramWhenBlobSmallerThanProgramHeaderThenDecodingFails) { + PatchTokensTestData::ValidEmptyProgram programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + auto brokenBlob = ArrayRef(programToEncode.blobs.programInfo.begin(), + programToEncode.blobs.programInfo.begin() + sizeof(iOpenCL::SProgramBinaryHeader) - 1); + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(brokenBlob, decodedProgram); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedProgram.decodeStatus); +} + +TEST(ProgramDecoder, GivenProgramWithInvaidProgramMagicThenDecodingFails) { + PatchTokensTestData::ValidEmptyProgram programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + programToEncode.headerMutable->Magic += 1; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedProgram.decodeStatus); +} + +TEST(ProgramDecoder, GivenProgramWhenBlobDoesntHaveEnoughSpaceForPatchListThenDecodingFails) { + PatchTokensTestData::ValidEmptyProgram programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + programToEncode.headerMutable->PatchListSize = static_cast(programToEncode.blobs.patchList.size() + 1); + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedProgram.decodeStatus); +} + +TEST(ProgramDecoder, GivenValidProgramWithConstantSurfacesThenDecodingSucceedsAndTokensAreProperlyAssigned) { + PatchTokensTestData::ValidProgramWithConstantSurface programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedProgram.decodeStatus); + EXPECT_TRUE(decodedProgram.unhandledTokens.empty()); + ASSERT_EQ(1U, decodedProgram.programScopeTokens.allocateConstantMemorySurface.size()); + EXPECT_EQ(programToEncode.programScopeTokens.allocateConstantMemorySurface[0], decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]); + + decodedProgram = {}; + auto inlineSize = programToEncode.programScopeTokens.allocateConstantMemorySurface[0]->InlineDataSize; + auto secondConstantSurfaceOff = programToEncode.storage.size(); + programToEncode.storage.insert(programToEncode.storage.end(), reinterpret_cast(programToEncode.constSurfMutable), + reinterpret_cast(programToEncode.constSurfMutable + 1)); + programToEncode.storage.resize(programToEncode.storage.size() + inlineSize); + programToEncode.recalcTokPtr(); + decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedProgram.decodeStatus); + EXPECT_TRUE(decodedProgram.unhandledTokens.empty()); + + auto base = programToEncode.storage.data(); + ASSERT_EQ(2U, decodedProgram.programScopeTokens.allocateConstantMemorySurface.size()); + EXPECT_EQ(programToEncode.programScopeTokens.allocateConstantMemorySurface[0], decodedProgram.programScopeTokens.allocateConstantMemorySurface[0]); + EXPECT_TRUE(tokenOffsetMatched(base, secondConstantSurfaceOff, decodedProgram.programScopeTokens.allocateConstantMemorySurface[1])); +} + +TEST(ProgramDecoder, GivenProgramWithConstantSurfaceWhenBlobSmallerThanNeededForInlineDataThenDecodingFails) { + PatchTokensTestData::ValidProgramWithConstantSurface programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + programToEncode.headerMutable->PatchListSize -= 1; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedProgram.decodeStatus); +} + +TEST(ProgramDecoder, GivenValidProgramWithGlobalSurfacesThenDecodingSucceedsAndTokensAreProperlyAssigned) { + PatchTokensTestData::ValidProgramWithGlobalSurface programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedProgram.decodeStatus); + EXPECT_TRUE(decodedProgram.unhandledTokens.empty()); + ASSERT_EQ(1U, decodedProgram.programScopeTokens.allocateGlobalMemorySurface.size()); + EXPECT_EQ(programToEncode.programScopeTokens.allocateGlobalMemorySurface[0], decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]); + + decodedProgram = {}; + auto inlineSize = programToEncode.programScopeTokens.allocateGlobalMemorySurface[0]->InlineDataSize; + auto secondGlobalSurfaceOff = programToEncode.storage.size(); + programToEncode.storage.insert(programToEncode.storage.end(), reinterpret_cast(programToEncode.globalSurfMutable), + reinterpret_cast(programToEncode.globalSurfMutable + 1)); + programToEncode.storage.resize(programToEncode.storage.size() + inlineSize); + programToEncode.recalcTokPtr(); + decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedProgram.decodeStatus); + EXPECT_TRUE(decodedProgram.unhandledTokens.empty()); + + auto base = programToEncode.storage.data(); + ASSERT_EQ(2U, decodedProgram.programScopeTokens.allocateGlobalMemorySurface.size()); + EXPECT_EQ(programToEncode.programScopeTokens.allocateGlobalMemorySurface[0], decodedProgram.programScopeTokens.allocateGlobalMemorySurface[0]); + EXPECT_TRUE(tokenOffsetMatched(base, secondGlobalSurfaceOff, decodedProgram.programScopeTokens.allocateGlobalMemorySurface[1])); +} + +TEST(ProgramDecoder, GivenProgramWithGlobalSurfaceWhenBlobSmallerThanNeededForInlineDataThenDecodingFails) { + PatchTokensTestData::ValidProgramWithGlobalSurface programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + programToEncode.headerMutable->PatchListSize -= 1; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedProgram.decodeStatus); +} + +TEST(ProgramDecoder, GivenValidProgramWithPatchtokensThenDecodingSucceedsAndTokensAreProperlyAssinged) { + using namespace iOpenCL; + + PatchTokensTestData::ValidProgramWithConstantSurfaceAndPointer programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + auto constPointer1Off = pushBackToken(PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO, programToEncode.storage); + auto constPointer2Off = pushBackToken(PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO, programToEncode.storage); + auto globalPointer0Off = pushBackToken(PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO, programToEncode.storage); + auto globalPointer1Off = pushBackToken(PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO, programToEncode.storage); + auto symbolTableOff = pushBackToken(PATCH_TOKEN_PROGRAM_SYMBOL_TABLE, programToEncode.storage); + programToEncode.recalcTokPtr(); + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedProgram.decodeStatus); + EXPECT_TRUE(decodedProgram.unhandledTokens.empty()); + auto base = programToEncode.storage.data(); + + EXPECT_EQ(1U, programToEncode.programScopeTokens.constantPointer.size()); + ASSERT_EQ(3U, decodedProgram.programScopeTokens.constantPointer.size()); + ASSERT_EQ(2U, decodedProgram.programScopeTokens.globalPointer.size()); + EXPECT_TRUE(tokenOffsetMatched(base, constPointer1Off, decodedProgram.programScopeTokens.constantPointer[1])); + EXPECT_TRUE(tokenOffsetMatched(base, constPointer2Off, decodedProgram.programScopeTokens.constantPointer[2])); + EXPECT_TRUE(tokenOffsetMatched(base, globalPointer0Off, decodedProgram.programScopeTokens.globalPointer[0])); + EXPECT_TRUE(tokenOffsetMatched(base, globalPointer1Off, decodedProgram.programScopeTokens.globalPointer[1])); + EXPECT_TRUE(tokenOffsetMatched(base, symbolTableOff, decodedProgram.programScopeTokens.symbolTable)); +} + +TEST(ProgramDecoder, GivenProgramWithUnkownPatchtokensThenDecodingSucceedsButTokenIsMarkedAsUnhandled) { + PatchTokensTestData::ValidProgramWithConstantSurface programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + + programToEncode.constSurfMutable->Token = iOpenCL::NUM_PATCH_TOKENS; + programToEncode.constSurfMutable->Size += programToEncode.constSurfMutable->InlineDataSize; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedProgram.decodeStatus); + ASSERT_EQ(1U, decodedProgram.unhandledTokens.size()); + EXPECT_EQ(programToEncode.constSurfMutable, decodedProgram.unhandledTokens[0]); +} + +TEST(ProgramDecoder, GivenValidProgramWithKernelThenDecodingSucceedsAndTokensAreProperlyAssigned) { + PatchTokensTestData::ValidProgramWithKernelUsingSlm programToEncode; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.blobs.programInfo, decodedProgram); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedProgram.decodeStatus); + EXPECT_TRUE(decodedProgram.unhandledTokens.empty()); + ASSERT_EQ(1U, decodedProgram.header->NumberOfKernels); + ASSERT_EQ(1U, decodedProgram.kernels.size()); + auto decodedKernel = decodedProgram.kernels[0]; + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel.decodeStatus); + EXPECT_TRUE(decodedKernel.unhandledTokens.empty()); + EXPECT_NE(nullptr, decodedKernel.tokens.allocateLocalSurface); +} + +TEST(ProgramDecoder, GivenValidProgramWithTwoKernelsWhenThenDecodingSucceeds) { + PatchTokensTestData::ValidProgramWithKernelUsingSlm programToEncode; + programToEncode.headerMutable->NumberOfKernels = 2; + programToEncode.storage.insert(programToEncode.storage.end(), programToEncode.kernels[0].blobs.kernelInfo.begin(), programToEncode.kernels[0].blobs.kernelInfo.end()); + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.storage, decodedProgram); + EXPECT_TRUE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedProgram.decodeStatus); + EXPECT_TRUE(decodedProgram.unhandledTokens.empty()); + EXPECT_EQ(2U, decodedProgram.header->NumberOfKernels); + ASSERT_EQ(2U, decodedProgram.kernels.size()); + + auto decodedKernel0 = decodedProgram.kernels[0]; + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel0.decodeStatus); + EXPECT_TRUE(decodedKernel0.unhandledTokens.empty()); + EXPECT_NE(nullptr, decodedKernel0.tokens.allocateLocalSurface); + + auto decodedKernel1 = decodedProgram.kernels[0]; + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::Success, decodedKernel1.decodeStatus); + EXPECT_TRUE(decodedKernel1.unhandledTokens.empty()); + EXPECT_NE(nullptr, decodedKernel1.tokens.allocateLocalSurface); +} + +TEST(ProgramDecoder, GivenPatchTokenWithZeroSizeThenDecodingFailsAndStops) { + PatchTokensTestData::ValidProgramWithKernelUsingSlm programToEncode; + programToEncode.slmMutable->Size = 0U; + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.storage, decodedProgram); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedProgram.decodeStatus); +} + +TEST(ProgramDecoder, GivenProgramWithMultipleKernelsWhenFailsToDecodeKernelThenDecodingFailsAndStops) { + PatchTokensTestData::ValidProgramWithKernelUsingSlm programToEncode; + programToEncode.slmMutable->Size = 0U; + programToEncode.headerMutable->NumberOfKernels = 2; + programToEncode.storage.insert(programToEncode.storage.end(), programToEncode.kernels[0].blobs.kernelInfo.begin(), programToEncode.kernels[0].blobs.kernelInfo.end()); + NEO::PatchTokenBinary::ProgramFromPatchtokens decodedProgram; + bool decodeSuccess = NEO::PatchTokenBinary::decodeProgramFromPatchtokensBlob(programToEncode.storage, decodedProgram); + EXPECT_FALSE(decodeSuccess); + EXPECT_EQ(NEO::PatchTokenBinary::DecoderError::InvalidBinary, decodedProgram.decodeStatus); + EXPECT_TRUE(decodedProgram.unhandledTokens.empty()); + EXPECT_EQ(2U, decodedProgram.header->NumberOfKernels); + EXPECT_EQ(1U, decodedProgram.kernels.size()); +} diff --git a/unit_tests/compiler_interface/patchtokens_dumper_tests.cpp b/unit_tests/compiler_interface/patchtokens_dumper_tests.cpp new file mode 100644 index 0000000000..38ff32cf14 --- /dev/null +++ b/unit_tests/compiler_interface/patchtokens_dumper_tests.cpp @@ -0,0 +1,1754 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "runtime/compiler_interface/patchtokens_decoder.h" +#include "runtime/compiler_interface/patchtokens_dumper.h" +#include "test.h" + +#include "patchtokens_tests.h" + +#include + +TEST(ProgramDumper, GivenEmptyProgramThenProperlyCreatesDumpStringWithWarnig) { + NEO::PatchTokenBinary::ProgramFromPatchtokens emptyProgram = {}; + emptyProgram.decodeStatus = NEO::PatchTokenBinary::DecoderError::Undefined; + std::string generated = NEO::PatchTokenBinary::asString(emptyProgram); + const char *expected = + R"===(Program of size : 0 in undefined status +WARNING : Program header is missing +Program-scope tokens section size : 0 +Kernels section size : 0 +)==="; + EXPECT_STREQ(expected, generated.c_str()); + + emptyProgram.decodeStatus = NEO::PatchTokenBinary::DecoderError::InvalidBinary; + generated = NEO::PatchTokenBinary::asString(emptyProgram); + expected = + R"===(Program of size : 0 with invalid binary +WARNING : Program header is missing +Program-scope tokens section size : 0 +Kernels section size : 0 +)==="; + EXPECT_STREQ(expected, generated.c_str()); + + emptyProgram.decodeStatus = NEO::PatchTokenBinary::DecoderError::Success; + generated = NEO::PatchTokenBinary::asString(emptyProgram); + expected = + R"===(Program of size : 0 decoded successfully +WARNING : Program header is missing +Program-scope tokens section size : 0 +Kernels section size : 0 +)==="; + EXPECT_STREQ(expected, generated.c_str()); +} + +TEST(KernelDumper, GivenEmptyKernelThenProperlyCreatesDumpStringWithWarnig) { + NEO::PatchTokenBinary::KernelFromPatchtokens emptyKernel = {}; + emptyKernel.decodeStatus = NEO::PatchTokenBinary::DecoderError::Undefined; + std::string generated = NEO::PatchTokenBinary::asString(emptyKernel); + const char *expected = + R"===(Kernel of size : 0 in undefined status +WARNING : Kernel header is missing +Kernel-scope tokens section size : 0 +)==="; + EXPECT_STREQ(expected, generated.c_str()); + + emptyKernel.decodeStatus = NEO::PatchTokenBinary::DecoderError::InvalidBinary; + generated = NEO::PatchTokenBinary::asString(emptyKernel); + expected = + R"===(Kernel of size : 0 with invalid binary +WARNING : Kernel header is missing +Kernel-scope tokens section size : 0 +)==="; + EXPECT_STREQ(expected, generated.c_str()); + + emptyKernel.decodeStatus = NEO::PatchTokenBinary::DecoderError::Success; + generated = NEO::PatchTokenBinary::asString(emptyKernel); + expected = + R"===(Kernel of size : 0 decoded successfully +WARNING : Kernel header is missing +Kernel-scope tokens section size : 0 +)==="; + EXPECT_STREQ(expected, generated.c_str()); +} + +TEST(KernelArgDumper, GivenEmptyKernelArgThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens emptyKernelArg = {}; + std::string generated = NEO::PatchTokenBinary::asString(emptyKernelArg, ""); + const char *expected = + R"===(Kernel argument of type unspecified +)==="; + EXPECT_STREQ(expected, generated.c_str()); +} + +TEST(ProgramDumper, GivenProgramWithPatchtokensThenProperlyCreatesDump) { + using namespace iOpenCL; + PatchTokensTestData::ValidProgramWithConstantSurfaceAndPointer progWithConst = {}; + PatchTokensTestData::ValidProgramWithGlobalSurfaceAndPointer progWithGlobal = {}; + SPatchAllocateConstantMemorySurfaceProgramBinaryInfo constSurf2 = *progWithConst.programScopeTokens.allocateConstantMemorySurface[0]; + constSurf2.ConstantBufferIndex += 1; + constSurf2.InlineDataSize *= 2; + progWithConst.programScopeTokens.allocateConstantMemorySurface.push_back(&constSurf2); + SPatchConstantPointerProgramBinaryInfo constPointer2 = *progWithConst.programScopeTokens.constantPointer[0]; + constPointer2.BufferIndex = 1; + constPointer2.ConstantPointerOffset += 8; + constPointer2.ConstantBufferIndex = 1; + progWithConst.programScopeTokens.constantPointer.push_back(&constPointer2); + + progWithConst.programScopeTokens.allocateGlobalMemorySurface.push_back(progWithGlobal.programScopeTokens.allocateGlobalMemorySurface[0]); + SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo globSurf2 = *progWithGlobal.programScopeTokens.allocateGlobalMemorySurface[0]; + globSurf2.GlobalBufferIndex += 2; + globSurf2.InlineDataSize *= 3; + progWithConst.programScopeTokens.allocateGlobalMemorySurface.push_back(&globSurf2); + progWithConst.programScopeTokens.globalPointer.push_back(progWithGlobal.programScopeTokens.globalPointer[0]); + SPatchGlobalPointerProgramBinaryInfo globPointer2 = *progWithGlobal.programScopeTokens.globalPointer[0]; + globPointer2.GlobalPointerOffset += 8; + progWithConst.programScopeTokens.globalPointer.push_back(&globPointer2); + SPatchGlobalPointerProgramBinaryInfo globPointer3 = globPointer2; + globPointer3.GlobalPointerOffset += 8; + progWithConst.programScopeTokens.globalPointer.push_back(&globPointer3); + iOpenCL::SPatchFunctionTableInfo symbolTable; + symbolTable.Token = iOpenCL::PATCH_TOKEN_PROGRAM_SYMBOL_TABLE; + symbolTable.Size = sizeof(iOpenCL::SPatchFunctionTableInfo); + symbolTable.NumEntries = 7; + progWithConst.programScopeTokens.symbolTable = &symbolTable; + + auto unknownToken0 = globPointer2; + unknownToken0.Token = NUM_PATCH_TOKENS; + progWithConst.unhandledTokens.push_back(&unknownToken0); + + auto unknownToken1 = globPointer2; + unknownToken1.Token = NUM_PATCH_TOKENS; + progWithConst.unhandledTokens.push_back(&unknownToken1); + + std::string generated = NEO::PatchTokenBinary::asString(progWithConst); + std::stringstream expected; + expected << + R"===(Program of size : 200 decoded successfully +struct SProgramBinaryHeader { + uint32_t Magic; // = 1229870147 + uint32_t Version; // = )===" + << CURRENT_ICBE_VERSION << R"===( + + uint32_t Device; // = )===" + << renderCoreFamily << R"===( + uint32_t GPUPointerSizeInBytes; // = 0 + + uint32_t NumberOfKernels; // = 0 + + uint32_t SteppingId; // = 0 + + uint32_t PatchListSize; // = 172 +}; +Program-scope tokens section size : 172 + WARNING : Unhandled program-scope tokens detected [2] : + + [0]: + | struct SPatchItemHeader { + | uint32_t Token;// = )===" + << NUM_PATCH_TOKENS << R"===( + | uint32_t Size;// = 28 + | }; + + [1]: + | struct SPatchItemHeader { + | uint32_t Token;// = )===" + << (NUM_PATCH_TOKENS) << R"===( + | uint32_t Size;// = 28 + | }; + Inline Costant Surface(s) [2] : + + [0]: + | struct SPatchAllocateConstantMemorySurfaceProgramBinaryInfo : + | SPatchItemHeader (Token=42(PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO), Size=)===" + << sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo) << R"===() + | { + | uint32_t ConstantBufferIndex;// = 0 + | uint32_t InlineDataSize;// = 128 + | } + + [1]: + | struct SPatchAllocateConstantMemorySurfaceProgramBinaryInfo : + | SPatchItemHeader (Token=42(PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO), Size=)===" + << sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo) << R"===() + | { + | uint32_t ConstantBufferIndex;// = 1 + | uint32_t InlineDataSize;// = 256 + | } + Inline Costant Surface - self relocations [2] : + + [0]: + | struct SPatchConstantPointerProgramBinaryInfo : + | SPatchItemHeader (Token=48(PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO), Size=)===" + << sizeof(SPatchConstantPointerProgramBinaryInfo) << R"===() + | { + | uint32_t ConstantBufferIndex;// = 0 + | uint64_t ConstantPointerOffset;// = 96 + | uint32_t BufferType;// = 1 + | uint32_t BufferIndex;// = 0 + | } + + [1]: + | struct SPatchConstantPointerProgramBinaryInfo : + | SPatchItemHeader (Token=48(PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO), Size=)===" + << sizeof(SPatchConstantPointerProgramBinaryInfo) << R"===() + | { + | uint32_t ConstantBufferIndex;// = 1 + | uint64_t ConstantPointerOffset;// = 104 + | uint32_t BufferType;// = 1 + | uint32_t BufferIndex;// = 1 + | } + Inline Global Variable Surface(s) [2] : + + [0]: + | struct SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo : + | SPatchItemHeader (Token=41(PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO), Size=)===" + << sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo) << R"===() + | { + | uint32_t Type;// = 0 + | uint32_t GlobalBufferIndex;// = 0 + | uint32_t InlineDataSize;// = 256 + | } + + [1]: + | struct SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo : + | SPatchItemHeader (Token=41(PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO), Size=)===" + << sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo) << R"===() + | { + | uint32_t Type;// = 0 + | uint32_t GlobalBufferIndex;// = 2 + | uint32_t InlineDataSize;// = 768 + | } + Inline Global Variable Surface - self relocations [3] : + + [0]: + | struct SPatchGlobalPointerProgramBinaryInfo : + | SPatchItemHeader (Token=47(PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO), Size=)===" + << sizeof(SPatchGlobalPointerProgramBinaryInfo) << R"===() + | { + | uint32_t GlobalBufferIndex;// = 0 + | uint64_t GlobalPointerOffset;// = 48 + | uint32_t BufferType;// = 0 + | uint32_t BufferIndex;// = 0 + | } + + [1]: + | struct SPatchGlobalPointerProgramBinaryInfo : + | SPatchItemHeader (Token=47(PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO), Size=)===" + << sizeof(SPatchGlobalPointerProgramBinaryInfo) << R"===() + | { + | uint32_t GlobalBufferIndex;// = 0 + | uint64_t GlobalPointerOffset;// = 56 + | uint32_t BufferType;// = 0 + | uint32_t BufferIndex;// = 0 + | } + + [2]: + | struct SPatchGlobalPointerProgramBinaryInfo : + | SPatchItemHeader (Token=47(PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO), Size=)===" + << sizeof(SPatchGlobalPointerProgramBinaryInfo) << R"===() + | { + | uint32_t GlobalBufferIndex;// = 0 + | uint64_t GlobalPointerOffset;// = 64 + | uint32_t BufferType;// = 0 + | uint32_t BufferIndex;// = 0 + | } + struct SPatchFunctionTableInfo : + SPatchItemHeader (Token=53(PATCH_TOKEN_PROGRAM_SYMBOL_TABLE), Size=)===" + << sizeof(SPatchFunctionTableInfo) << R"===() + { + uint32_t NumEntries;// = 7 + } +Kernels section size : 0 +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(ProgramDumper, GivenProgramWithKernelThenProperlyCreatesDump) { + PatchTokensTestData::ValidProgramWithKernelUsingSlm program; + std::string generated = NEO::PatchTokenBinary::asString(program); + std::stringstream expected; + expected << + R"===(Program of size : 96 decoded successfully +struct SProgramBinaryHeader { + uint32_t Magic; // = 1229870147 + uint32_t Version; // = )===" + << iOpenCL::CURRENT_ICBE_VERSION << R"===( + + uint32_t Device; // = )===" + << renderCoreFamily << R"===( + uint32_t GPUPointerSizeInBytes; // = 0 + + uint32_t NumberOfKernels; // = 1 + + uint32_t SteppingId; // = 0 + + uint32_t PatchListSize; // = 0 +}; +Program-scope tokens section size : 0 +Kernels section size : 0 +kernel[0] test_kernel: +Kernel of size : 68 decoded successfully +struct SKernelBinaryHeader { + uint32_t CheckSum;// = 2446215414 + uint64_t ShaderHashCode;// = 0 + uint32_t KernelNameSize;// = 12 + uint32_t PatchListSize;// = 16 +}; +Kernel-scope tokens section size : 16 + struct SPatchAllocateLocalSurface : + SPatchItemHeader (Token=15(PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE), Size=)===" + << sizeof(iOpenCL::SPatchAllocateLocalSurface) << R"===() + { + uint32_t Offset;// = 0 + uint32_t TotalInlineLocalMemorySize;// = 16 + } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(ProgramDumper, GivenProgramWithMultipleKerneslThenProperlyCreatesDump) { + PatchTokensTestData::ValidProgramWithKernelUsingSlm program; + program.kernels.push_back(program.kernels[0]); + program.kernels[1].tokens.allocateLocalSurface = nullptr; + program.kernels[1].name = ArrayRef("different_kernel"); + program.kernels[1].blobs.patchList = ArrayRef(); + program.kernels.push_back(program.kernels[1]); + program.kernels[2].name = ArrayRef(); + std::string generated = NEO::PatchTokenBinary::asString(program); + std::stringstream expected; + expected << + R"===(Program of size : 96 decoded successfully +struct SProgramBinaryHeader { + uint32_t Magic; // = 1229870147 + uint32_t Version; // = )===" + << iOpenCL::CURRENT_ICBE_VERSION << R"===( + + uint32_t Device; // = )===" + << renderCoreFamily << R"===( + uint32_t GPUPointerSizeInBytes; // = 0 + + uint32_t NumberOfKernels; // = 1 + + uint32_t SteppingId; // = 0 + + uint32_t PatchListSize; // = 0 +}; +Program-scope tokens section size : 0 +Kernels section size : 0 +kernel[0] test_kernel: +Kernel of size : 68 decoded successfully +struct SKernelBinaryHeader { + uint32_t CheckSum;// = 2446215414 + uint64_t ShaderHashCode;// = 0 + uint32_t KernelNameSize;// = 12 + uint32_t PatchListSize;// = 16 +}; +Kernel-scope tokens section size : 16 + struct SPatchAllocateLocalSurface : + SPatchItemHeader (Token=15(PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE), Size=)===" + << sizeof(iOpenCL::SPatchAllocateLocalSurface) << R"===() + { + uint32_t Offset;// = 0 + uint32_t TotalInlineLocalMemorySize;// = 16 + } +kernel[1] different_kernel: +Kernel of size : 68 decoded successfully +struct SKernelBinaryHeader { + uint32_t CheckSum;// = 2446215414 + uint64_t ShaderHashCode;// = 0 + uint32_t KernelNameSize;// = 12 + uint32_t PatchListSize;// = 16 +}; +Kernel-scope tokens section size : 0 +kernel[2] : +Kernel of size : 68 decoded successfully +struct SKernelBinaryHeader { + uint32_t CheckSum;// = 2446215414 + uint64_t ShaderHashCode;// = 0 + uint32_t KernelNameSize;// = 12 + uint32_t PatchListSize;// = 16 +}; +Kernel-scope tokens section size : 0 +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelDumper, GivenKernelWithNonCrossthreadDataPatchtokensThenProperlyCreatesDump) { + using namespace iOpenCL; + using namespace PatchTokensTestData; + std::vector stream; + auto kernel = ValidEmptyKernel::create(stream); + auto samplerStateArray = initToken(PATCH_TOKEN_SAMPLER_STATE_ARRAY); + auto bindingTableState = initToken(PATCH_TOKEN_BINDING_TABLE_STATE); + auto allocateLocalSurface = initToken(PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE); + SPatchMediaVFEState mediaVfeState[2] = {initToken(PATCH_TOKEN_MEDIA_VFE_STATE), initToken(PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1)}; + auto mediaInterfaceDescriptorLoad = initToken(PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD); + auto interfaceDescriptorData = initToken(PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA); + auto threadPayload = initToken(PATCH_TOKEN_THREAD_PAYLOAD); + auto executionEnvironment = initToken(PATCH_TOKEN_EXECUTION_ENVIRONMENT); + auto dataParameterStream = initToken(PATCH_TOKEN_DATA_PARAMETER_STREAM); + auto kernelAttributesInfo = initToken(PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO); + auto allocateStatelessPrivateSurface = initToken(PATCH_TOKEN_ALLOCATE_PRIVATE_MEMORY); + auto allocateStatelessConstantMemorySurfaceWithInitialization = initToken(PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION); + auto allocateStatelessGlobalMemorySurfaceWithInitialization = initToken(PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION); + auto allocateStatelessPrintfSurface = initToken(PATCH_TOKEN_ALLOCATE_PRINTF_SURFACE); + auto allocateStatelessEventPoolSurface = initToken(PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE); + auto allocateStatelessDefaultDeviceQueueSurface = initToken(PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT); + auto inlineVmeSamplerInfo = initToken(PATCH_TOKEN_INLINE_VME_SAMPLER_INFO); + auto gtpinFreeGrfInfo = initToken(PATCH_TOKEN_GTPIN_FREE_GRF_INFO); + auto stateSip = initToken(PATCH_TOKEN_STATE_SIP); + auto allocateSystemThreadSurface = initToken(PATCH_TOKEN_ALLOCATE_SIP_SURFACE); + auto gtpinInfo = initToken(PATCH_TOKEN_GTPIN_INFO); + auto programSymbolTable = initToken(PATCH_TOKEN_PROGRAM_SYMBOL_TABLE); + auto programRelocationTable = initToken(PATCH_TOKEN_PROGRAM_RELOCATION_TABLE); + auto unknownToken0 = initToken(NUM_PATCH_TOKENS); + auto unknownToken1 = initToken(NUM_PATCH_TOKENS); + + kernel.tokens.samplerStateArray = &samplerStateArray; + kernel.tokens.bindingTableState = &bindingTableState; + kernel.tokens.allocateLocalSurface = &allocateLocalSurface; + kernel.tokens.mediaVfeState[0] = &mediaVfeState[0]; + kernel.tokens.mediaVfeState[1] = &mediaVfeState[1]; + kernel.tokens.mediaInterfaceDescriptorLoad = &mediaInterfaceDescriptorLoad; + kernel.tokens.interfaceDescriptorData = &interfaceDescriptorData; + kernel.tokens.threadPayload = &threadPayload; + kernel.tokens.executionEnvironment = &executionEnvironment; + kernel.tokens.dataParameterStream = &dataParameterStream; + kernel.tokens.kernelAttributesInfo = &kernelAttributesInfo; + kernel.tokens.allocateStatelessPrivateSurface = &allocateStatelessPrivateSurface; + kernel.tokens.allocateStatelessConstantMemorySurfaceWithInitialization = &allocateStatelessConstantMemorySurfaceWithInitialization; + kernel.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization = &allocateStatelessGlobalMemorySurfaceWithInitialization; + kernel.tokens.allocateStatelessPrintfSurface = &allocateStatelessPrintfSurface; + kernel.tokens.allocateStatelessEventPoolSurface = &allocateStatelessEventPoolSurface; + kernel.tokens.allocateStatelessDefaultDeviceQueueSurface = &allocateStatelessDefaultDeviceQueueSurface; + kernel.tokens.inlineVmeSamplerInfo = &inlineVmeSamplerInfo; + kernel.tokens.gtpinFreeGrfInfo = >pinFreeGrfInfo; + kernel.tokens.stateSip = &stateSip; + kernel.tokens.allocateSystemThreadSurface = &allocateSystemThreadSurface; + kernel.tokens.gtpinInfo = >pinInfo; + kernel.tokens.programSymbolTable = &programSymbolTable; + kernel.tokens.programRelocationTable = &programRelocationTable; + kernel.unhandledTokens.push_back(&unknownToken0); + kernel.unhandledTokens.push_back(&unknownToken1); + + std::string generated = NEO::PatchTokenBinary::asString(kernel); + std::stringstream expected; + expected << + R"===(Kernel of size : 52 decoded successfully +struct SKernelBinaryHeader { + uint32_t CheckSum;// = 3223116527 + uint64_t ShaderHashCode;// = 0 + uint32_t KernelNameSize;// = 12 + uint32_t PatchListSize;// = 0 +}; +Kernel-scope tokens section size : 0 + WARNING : Unhandled kernel-scope tokens detected [2] : + + [0]: + | struct SPatchItemHeader { + | uint32_t Token;// = )===" + << NUM_PATCH_TOKENS << R"===( + | uint32_t Size;// = 8 + | }; + + [1]: + | struct SPatchItemHeader { + | uint32_t Token;// = )===" + << NUM_PATCH_TOKENS << R"===( + | uint32_t Size;// = 8 + | }; + struct SPatchExecutionEnvironment : + SPatchItemHeader (Token=23(PATCH_TOKEN_EXECUTION_ENVIRONMENT), Size=)===" + << sizeof(SPatchExecutionEnvironment) << R"===() + { + uint32_t RequiredWorkGroupSizeX;// = 0 + uint32_t RequiredWorkGroupSizeY;// = 0 + uint32_t RequiredWorkGroupSizeZ;// = 0 + uint32_t LargestCompiledSIMDSize;// = 0 + uint32_t CompiledSubGroupsNumber;// = 0 + uint32_t HasBarriers;// = 0 + uint32_t DisableMidThreadPreemption;// = 0 + uint32_t CompiledSIMD8;// = 0 + uint32_t CompiledSIMD16;// = 0 + uint32_t CompiledSIMD32;// = 0 + uint32_t HasDeviceEnqueue;// = 0 + uint32_t MayAccessUndeclaredResource;// = 0 + uint32_t UsesFencesForReadWriteImages;// = 0 + uint32_t UsesStatelessSpillFill;// = 0 + uint32_t UsesMultiScratchSpaces;// = 0 + uint32_t IsCoherent;// = 0 + uint32_t IsInitializer;// = 0 + uint32_t IsFinalizer;// = 0 + uint32_t SubgroupIndependentForwardProgressRequired;// = 0 + uint32_t CompiledForGreaterThan4GBBuffers;// = 0 + uint32_t NumGRFRequired;// = 0 + uint32_t WorkgroupWalkOrderDims;// = 0 + uint32_t HasGlobalAtomics;// = 0 + } + struct SPatchThreadPayload : + SPatchItemHeader (Token=22(PATCH_TOKEN_THREAD_PAYLOAD), Size=)===" + << sizeof(SPatchThreadPayload) << R"===() + { + uint32_t HeaderPresent;// = 0 + uint32_t LocalIDXPresent;// = 0 + uint32_t LocalIDYPresent;// = 0 + uint32_t LocalIDZPresent;// = 0 + uint32_t LocalIDFlattenedPresent;// = 0 + uint32_t IndirectPayloadStorage;// = 0 + uint32_t UnusedPerThreadConstantPresent;// = 0 + uint32_t GetLocalIDPresent;// = 0 + uint32_t GetGroupIDPresent;// = 0 + uint32_t GetGlobalOffsetPresent;// = 0 + uint32_t StageInGridOriginPresent;// = 0 + uint32_t StageInGridSizePresent;// = 0 + uint32_t OffsetToSkipPerThreadDataLoad;// = 0 + uint32_t OffsetToSkipSetFFIDGP;// = 0 + uint32_t PassInlineData;// = 0 + } + struct SPatchSamplerStateArray : + SPatchItemHeader (Token=5(PATCH_TOKEN_SAMPLER_STATE_ARRAY), Size=)===" + << sizeof(SPatchSamplerStateArray) << R"===() + { + uint32_t Offset;// = 0 + uint32_t Count;// = 0 + uint32_t BorderColorOffset;// = 0 + } + struct SPatchBindingTableState : + SPatchItemHeader (Token=8(PATCH_TOKEN_BINDING_TABLE_STATE), Size=)===" + << sizeof(SPatchBindingTableState) << R"===() + { + uint32_t Offset;// = 0 + uint32_t Count;// = 0 + uint32_t SurfaceStateOffset;// = 0 + } + struct SPatchAllocateLocalSurface : + SPatchItemHeader (Token=15(PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE), Size=)===" + << sizeof(SPatchAllocateLocalSurface) << R"===() + { + uint32_t Offset;// = 0 + uint32_t TotalInlineLocalMemorySize;// = 0 + } + mediaVfeState [2] : + + [0]: + | struct SPatchMediaVFEState : + | SPatchItemHeader (Token=18(PATCH_TOKEN_MEDIA_VFE_STATE), Size=)===" + << sizeof(SPatchMediaVFEState) << R"===() + | { + | uint32_t ScratchSpaceOffset;// = 0 + | uint32_t PerThreadScratchSpace;// = 0 + | } + + [1]: + | struct SPatchMediaVFEState : + | SPatchItemHeader (Token=55(PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1), Size=)===" + << sizeof(SPatchMediaVFEState) << R"===() + | { + | uint32_t ScratchSpaceOffset;// = 0 + | uint32_t PerThreadScratchSpace;// = 0 + | } + struct SPatchMediaInterfaceDescriptorLoad : + SPatchItemHeader (Token=19(PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD), Size=)===" + << sizeof(SPatchMediaInterfaceDescriptorLoad) << R"===() + { + uint32_t InterfaceDescriptorDataOffset;// = 0 + } + struct SPatchInterfaceDescriptorData : + SPatchItemHeader (Token=21(PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA), Size=)===" + << sizeof(SPatchInterfaceDescriptorData) << R"===() + { + uint32_t Offset;// = 0 + uint32_t SamplerStateOffset;// = 0 + uint32_t KernelOffset;// = 0 + uint32_t BindingTableOffset;// = 0 + } + struct SPatchKernelAttributesInfo : + SPatchItemHeader (Token=27(PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO), Size=)===" + << sizeof(SPatchKernelAttributesInfo) << R"===() + { + uint32_t AttributesSize;// = 0 + } + struct SPatchAllocateStatelessPrivateSurface : + SPatchItemHeader (Token=24(PATCH_TOKEN_ALLOCATE_PRIVATE_MEMORY), Size=)===" + << sizeof(SPatchAllocateStatelessPrivateSurface) << R"===() + { + uint32_t SurfaceStateHeapOffset;// = 0 + uint32_t DataParamOffset;// = 0 + uint32_t DataParamSize;// = 0 + uint32_t PerThreadPrivateMemorySize;// = 0 + } + struct SPatchAllocateStatelessConstantMemorySurfaceWithInitialization : + SPatchItemHeader (Token=39(PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION), Size=)===" + << sizeof(SPatchAllocateStatelessConstantMemorySurfaceWithInitialization) << R"===() + { + uint32_t ConstantBufferIndex;// = 0 + uint32_t SurfaceStateHeapOffset;// = 0 + uint32_t DataParamOffset;// = 0 + uint32_t DataParamSize;// = 0 + } + struct SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization : + SPatchItemHeader (Token=40(PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION), Size=)===" + << sizeof(SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization) << R"===() + { + uint32_t GlobalBufferIndex;// = 0 + uint32_t SurfaceStateHeapOffset;// = 0 + uint32_t DataParamOffset;// = 0 + uint32_t DataParamSize;// = 0 + } + struct SPatchAllocateStatelessPrintfSurface : + SPatchItemHeader (Token=29(PATCH_TOKEN_ALLOCATE_PRINTF_SURFACE), Size=)===" + << sizeof(SPatchAllocateStatelessPrintfSurface) << R"===() + { + uint32_t PrintfSurfaceIndex;// = 0 + uint32_t SurfaceStateHeapOffset;// = 0 + uint32_t DataParamOffset;// = 0 + uint32_t DataParamSize;// = 0 + } + struct SPatchAllocateStatelessEventPoolSurface : + SPatchItemHeader (Token=36(PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE), Size=)===" + << sizeof(SPatchAllocateStatelessEventPoolSurface) << R"===() + { + uint32_t EventPoolSurfaceIndex;// = 0 + uint32_t SurfaceStateHeapOffset;// = 0 + uint32_t DataParamOffset;// = 0 + uint32_t DataParamSize;// = 0 + } + struct SPatchAllocateStatelessDefaultDeviceQueueSurface : + SPatchItemHeader (Token=46(PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT), Size=)===" + << sizeof(SPatchAllocateStatelessDefaultDeviceQueueSurface) << R"===() + { + uint32_t SurfaceStateHeapOffset;// = 0 + uint32_t DataParamOffset;// = 0 + uint32_t DataParamSize;// = 0 + } + struct SPatchItemHeader { + uint32_t Token;// = 50(PATCH_TOKEN_INLINE_VME_SAMPLER_INFO) + uint32_t Size;// = 8 + }; + struct SPatchGtpinFreeGRFInfo : + SPatchItemHeader (Token=51(PATCH_TOKEN_GTPIN_FREE_GRF_INFO), Size=)===" + << sizeof(SPatchGtpinFreeGRFInfo) << R"===() + { + uint32_t BufferSize;// = 0 + } + struct SPatchStateSIP : + SPatchItemHeader (Token=2(PATCH_TOKEN_STATE_SIP), Size=)===" + << sizeof(SPatchStateSIP) << R"===() + { + uint32_t SystemKernelOffset;// = 0 + } + struct SPatchAllocateSystemThreadSurface : + SPatchItemHeader (Token=10(PATCH_TOKEN_ALLOCATE_SIP_SURFACE), Size=)===" + << sizeof(SPatchAllocateSystemThreadSurface) << R"===() + { + uint32_t Offset;// = 0 + uint32_t PerThreadSystemThreadSurfaceSize;// = 0 + uint32_t BTI;// = 0 + } + struct SPatchItemHeader { + uint32_t Token;// = 52(PATCH_TOKEN_GTPIN_INFO) + uint32_t Size;// = 8 + }; + struct SPatchFunctionTableInfo : + SPatchItemHeader (Token=53(PATCH_TOKEN_PROGRAM_SYMBOL_TABLE), Size=)===" + << sizeof(SPatchFunctionTableInfo) << R"===() + { + uint32_t NumEntries;// = 0 + } + struct SPatchFunctionTableInfo : + SPatchItemHeader (Token=54(PATCH_TOKEN_PROGRAM_RELOCATION_TABLE), Size=)===" + << sizeof(SPatchFunctionTableInfo) << R"===() + { + uint32_t NumEntries;// = 0 + } + struct SPatchDataParameterStream : + SPatchItemHeader (Token=25(PATCH_TOKEN_DATA_PARAMETER_STREAM), Size=)===" + << sizeof(SPatchDataParameterStream) << R"===() + { + uint32_t DataParameterStreamSize;// = 0 + } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelDumper, GivenKernelWithStringPatchTokensThenProperlyCreatesDump) { + std::vector kernelStream; + auto kernel = PatchTokensTestData::ValidEmptyKernel::create(kernelStream); + + std::vector strTokStream; + + std::string str0{"some_string0"}; + std::string str1{"another_string"}; + std::string str2{"yet_another_string"}; + auto string0Off = PatchTokensTestData::pushBackStringToken(str0, 0, strTokStream); + auto string1Off = PatchTokensTestData::pushBackStringToken(str1, 1, strTokStream); + auto string2Off = PatchTokensTestData::pushBackStringToken(str2, 2, strTokStream); + + kernel.tokens.strings.push_back(reinterpret_cast(strTokStream.data() + string0Off)); + kernel.tokens.strings.push_back(reinterpret_cast(strTokStream.data() + string1Off)); + kernel.tokens.strings.push_back(reinterpret_cast(strTokStream.data() + string2Off)); + std::string generated = NEO::PatchTokenBinary::asString(kernel); + std::stringstream expected; + expected << R"===(Kernel of size : 52 decoded successfully +struct SKernelBinaryHeader { + uint32_t CheckSum;// = 3223116527 + uint64_t ShaderHashCode;// = 0 + uint32_t KernelNameSize;// = 12 + uint32_t PatchListSize;// = 0 +}; +Kernel-scope tokens section size : 0 + String literals [3] : + + [0]: + | struct SPatchString : + | SPatchItemHeader (Token=28(PATCH_TOKEN_STRING), Size=)===" + << (sizeof(iOpenCL::SPatchString) + str0.length()) << R"===() + | { + | uint32_t Index;// = 0 + | uint32_t StringSize;// = 12 : [some_string0] + | } + + [1]: + | struct SPatchString : + | SPatchItemHeader (Token=28(PATCH_TOKEN_STRING), Size=)===" + << (sizeof(iOpenCL::SPatchString) + str1.length()) << R"===() + | { + | uint32_t Index;// = 1 + | uint32_t StringSize;// = 14 : [another_string] + | } + + [2]: + | struct SPatchString : + | SPatchItemHeader (Token=28(PATCH_TOKEN_STRING), Size=)===" + << (sizeof(iOpenCL::SPatchString) + str2.length()) << R"===() + | { + | uint32_t Index;// = 2 + | uint32_t StringSize;// = 18 : [yet_another_string] + | } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelDumper, GivenKernelWithNonArgCrossThreadDataPatchtokensThenProperlyCreatesDump) { + using namespace iOpenCL; + using namespace PatchTokensTestData; + std::vector stream; + auto kernel = ValidEmptyKernel::create(stream); + + SPatchDataParameterBuffer localWorkSize[3] = {initDataParameterBufferToken(DATA_PARAMETER_LOCAL_WORK_SIZE), + initDataParameterBufferToken(DATA_PARAMETER_LOCAL_WORK_SIZE, 1U), + initDataParameterBufferToken(DATA_PARAMETER_LOCAL_WORK_SIZE, 2U)}; + SPatchDataParameterBuffer localWorkSize2[3] = {initDataParameterBufferToken(DATA_PARAMETER_LOCAL_WORK_SIZE), + initDataParameterBufferToken(DATA_PARAMETER_LOCAL_WORK_SIZE, 1U), + initDataParameterBufferToken(DATA_PARAMETER_LOCAL_WORK_SIZE, 2U)}; + SPatchDataParameterBuffer enqueuedLocalWorkSize[3] = {initDataParameterBufferToken(DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE), + initDataParameterBufferToken(DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE, 1U), + initDataParameterBufferToken(DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE, 2U)}; + SPatchDataParameterBuffer numWorkgroups[3] = {initDataParameterBufferToken(DATA_PARAMETER_NUM_WORK_GROUPS), + initDataParameterBufferToken(DATA_PARAMETER_NUM_WORK_GROUPS, 1U), + initDataParameterBufferToken(DATA_PARAMETER_NUM_WORK_GROUPS, 2U)}; + SPatchDataParameterBuffer globalWorkOffset[3] = {initDataParameterBufferToken(DATA_PARAMETER_GLOBAL_WORK_OFFSET), + initDataParameterBufferToken(DATA_PARAMETER_GLOBAL_WORK_OFFSET, 1U), + initDataParameterBufferToken(DATA_PARAMETER_GLOBAL_WORK_OFFSET, 2U)}; + SPatchDataParameterBuffer globalWorkSize[3] = {initDataParameterBufferToken(DATA_PARAMETER_GLOBAL_WORK_SIZE), + initDataParameterBufferToken(DATA_PARAMETER_GLOBAL_WORK_SIZE, 1U), + initDataParameterBufferToken(DATA_PARAMETER_GLOBAL_WORK_SIZE, 2U)}; + SPatchDataParameterBuffer maxWorkGroupSize = initDataParameterBufferToken(DATA_PARAMETER_MAX_WORKGROUP_SIZE); + auto workDimensions = initDataParameterBufferToken(DATA_PARAMETER_WORK_DIMENSIONS); + auto simdSize = initDataParameterBufferToken(DATA_PARAMETER_SIMD_SIZE); + auto parentEvent = initDataParameterBufferToken(DATA_PARAMETER_PARENT_EVENT); + auto privateMemoryStatelessSize = initDataParameterBufferToken(DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE); + auto localMemoryStatelessWindowSize = initDataParameterBufferToken(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE); + auto localMemoryStatelessWindowStartAddress = initDataParameterBufferToken(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS); + auto preferredWorkgroupMultiple = initDataParameterBufferToken(DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE); + SPatchDataParameterBuffer childBlockSimdSize[2] = {initDataParameterBufferToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE), + initDataParameterBufferToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, 2U)}; + auto unknownToken0 = initDataParameterBufferToken(NUM_DATA_PARAMETER_TOKENS); + auto unknownToken1 = initDataParameterBufferToken(NUM_DATA_PARAMETER_TOKENS); + + kernel.tokens.crossThreadPayloadArgs.localWorkSize[0] = &localWorkSize[0]; + kernel.tokens.crossThreadPayloadArgs.localWorkSize[1] = &localWorkSize[1]; + kernel.tokens.crossThreadPayloadArgs.localWorkSize[2] = &localWorkSize[2]; + kernel.tokens.crossThreadPayloadArgs.localWorkSize2[0] = &localWorkSize2[0]; + kernel.tokens.crossThreadPayloadArgs.localWorkSize2[1] = nullptr; + kernel.tokens.crossThreadPayloadArgs.localWorkSize2[2] = &localWorkSize2[2]; + kernel.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[0] = &enqueuedLocalWorkSize[0]; + kernel.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[1] = nullptr; + kernel.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[2] = nullptr; + kernel.tokens.crossThreadPayloadArgs.numWorkGroups[0] = &numWorkgroups[0]; + kernel.tokens.crossThreadPayloadArgs.numWorkGroups[1] = &numWorkgroups[1]; + kernel.tokens.crossThreadPayloadArgs.numWorkGroups[2] = &numWorkgroups[2]; + kernel.tokens.crossThreadPayloadArgs.globalWorkOffset[0] = &globalWorkOffset[0]; + kernel.tokens.crossThreadPayloadArgs.globalWorkOffset[1] = &globalWorkOffset[1]; + kernel.tokens.crossThreadPayloadArgs.globalWorkOffset[2] = &globalWorkOffset[2]; + kernel.tokens.crossThreadPayloadArgs.globalWorkSize[0] = &globalWorkSize[0]; + kernel.tokens.crossThreadPayloadArgs.globalWorkSize[1] = &globalWorkSize[1]; + kernel.tokens.crossThreadPayloadArgs.globalWorkSize[2] = &globalWorkSize[2]; + kernel.tokens.crossThreadPayloadArgs.maxWorkGroupSize = &maxWorkGroupSize; + kernel.tokens.crossThreadPayloadArgs.workDimensions = &workDimensions; + kernel.tokens.crossThreadPayloadArgs.simdSize = &simdSize; + kernel.tokens.crossThreadPayloadArgs.parentEvent = &parentEvent; + kernel.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize = &privateMemoryStatelessSize; + kernel.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize = &localMemoryStatelessWindowSize; + kernel.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress = &localMemoryStatelessWindowStartAddress; + kernel.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple = &preferredWorkgroupMultiple; + kernel.tokens.crossThreadPayloadArgs.childBlockSimdSize.push_back(&childBlockSimdSize[0]); + kernel.tokens.crossThreadPayloadArgs.childBlockSimdSize.push_back(&childBlockSimdSize[1]); + kernel.unhandledTokens.push_back(&unknownToken0); + kernel.unhandledTokens.push_back(&unknownToken1); + + std::string generated = NEO::PatchTokenBinary::asString(kernel); + static constexpr auto tokenSize = sizeof(SPatchDataParameterBuffer); + std::stringstream expected; + expected << R"===(Kernel of size : 52 decoded successfully +struct SKernelBinaryHeader { + uint32_t CheckSum;// = 3223116527 + uint64_t ShaderHashCode;// = 0 + uint32_t KernelNameSize;// = 12 + uint32_t PatchListSize;// = 0 +}; +Kernel-scope tokens section size : 0 + WARNING : Unhandled kernel-scope tokens detected [2] : + + [0]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = )===" + << NUM_DATA_PARAMETER_TOKENS << R"===( + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [1]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = )===" + << NUM_DATA_PARAMETER_TOKENS << R"===( + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + localWorkSize [3] : + + [0]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 2(DATA_PARAMETER_LOCAL_WORK_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [1]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 2(DATA_PARAMETER_LOCAL_WORK_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 4 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [2]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 2(DATA_PARAMETER_LOCAL_WORK_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 8 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + localWorkSize2 [3] : + + [0]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 2(DATA_PARAMETER_LOCAL_WORK_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [2]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 2(DATA_PARAMETER_LOCAL_WORK_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 8 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + enqueuedLocalWorkSize [3] : + + [0]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 28(DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + numWorkGroups [3] : + + [0]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 4(DATA_PARAMETER_NUM_WORK_GROUPS) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [1]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 4(DATA_PARAMETER_NUM_WORK_GROUPS) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 4 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [2]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 4(DATA_PARAMETER_NUM_WORK_GROUPS) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 8 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + globalWorkOffset [3] : + + [0]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 16(DATA_PARAMETER_GLOBAL_WORK_OFFSET) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [1]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 16(DATA_PARAMETER_GLOBAL_WORK_OFFSET) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 4 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [2]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 16(DATA_PARAMETER_GLOBAL_WORK_OFFSET) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 8 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + globalWorkSize [3] : + + [0]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 3(DATA_PARAMETER_GLOBAL_WORK_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [1]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 3(DATA_PARAMETER_GLOBAL_WORK_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 4 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [2]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 3(DATA_PARAMETER_GLOBAL_WORK_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 8 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + struct SPatchDataParameterBuffer : + SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + { + uint32_t Type;// = 29(DATA_PARAMETER_MAX_WORKGROUP_SIZE) + uint32_t ArgumentNumber;// = 0 + uint32_t Offset;// = 0 + uint32_t DataSize;// = 0 + uint32_t SourceOffset;// = 0 + uint32_t LocationIndex;// = 0 + uint32_t LocationIndex2;// = 0 + uint32_t IsEmulationArgument;// = 0 + } + struct SPatchDataParameterBuffer : + SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + { + uint32_t Type;// = 5(DATA_PARAMETER_WORK_DIMENSIONS) + uint32_t ArgumentNumber;// = 0 + uint32_t Offset;// = 0 + uint32_t DataSize;// = 0 + uint32_t SourceOffset;// = 0 + uint32_t LocationIndex;// = 0 + uint32_t LocationIndex2;// = 0 + uint32_t IsEmulationArgument;// = 0 + } + struct SPatchDataParameterBuffer : + SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + { + uint32_t Type;// = 34(DATA_PARAMETER_SIMD_SIZE) + uint32_t ArgumentNumber;// = 0 + uint32_t Offset;// = 0 + uint32_t DataSize;// = 0 + uint32_t SourceOffset;// = 0 + uint32_t LocationIndex;// = 0 + uint32_t LocationIndex2;// = 0 + uint32_t IsEmulationArgument;// = 0 + } + struct SPatchDataParameterBuffer : + SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + { + uint32_t Type;// = 22(DATA_PARAMETER_PARENT_EVENT) + uint32_t ArgumentNumber;// = 0 + uint32_t Offset;// = 0 + uint32_t DataSize;// = 0 + uint32_t SourceOffset;// = 0 + uint32_t LocationIndex;// = 0 + uint32_t LocationIndex2;// = 0 + uint32_t IsEmulationArgument;// = 0 + } + struct SPatchDataParameterBuffer : + SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + { + uint32_t Type;// = 33(DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE) + uint32_t ArgumentNumber;// = 0 + uint32_t Offset;// = 0 + uint32_t DataSize;// = 0 + uint32_t SourceOffset;// = 0 + uint32_t LocationIndex;// = 0 + uint32_t LocationIndex2;// = 0 + uint32_t IsEmulationArgument;// = 0 + } + struct SPatchDataParameterBuffer : + SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + { + uint32_t Type;// = 32(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE) + uint32_t ArgumentNumber;// = 0 + uint32_t Offset;// = 0 + uint32_t DataSize;// = 0 + uint32_t SourceOffset;// = 0 + uint32_t LocationIndex;// = 0 + uint32_t LocationIndex2;// = 0 + uint32_t IsEmulationArgument;// = 0 + } + struct SPatchDataParameterBuffer : + SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + { + uint32_t Type;// = 31(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS) + uint32_t ArgumentNumber;// = 0 + uint32_t Offset;// = 0 + uint32_t DataSize;// = 0 + uint32_t SourceOffset;// = 0 + uint32_t LocationIndex;// = 0 + uint32_t LocationIndex2;// = 0 + uint32_t IsEmulationArgument;// = 0 + } + struct SPatchDataParameterBuffer : + SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + { + uint32_t Type;// = 30(DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE) + uint32_t ArgumentNumber;// = 0 + uint32_t Offset;// = 0 + uint32_t DataSize;// = 0 + uint32_t SourceOffset;// = 0 + uint32_t LocationIndex;// = 0 + uint32_t LocationIndex2;// = 0 + uint32_t IsEmulationArgument;// = 0 + } + Child block simd size(s) [2] : + + [0]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 38(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + [1]: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << tokenSize << R"===() + | { + | uint32_t Type;// = 38(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 8 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelDumper, GivenKernelWithArgThenProperlyCreatesDump) { + std::vector stream; + auto kernel = PatchTokensTestData::ValidEmptyKernel::create(stream); + kernel.tokens.kernelArgs.push_back(NEO::PatchTokenBinary::KernelArgFromPatchtokens{}); + auto kernelArgObjId = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_OBJECT_ID); + kernel.tokens.kernelArgs[0].objectId = &kernelArgObjId; + kernel.tokens.kernelArgs.push_back(kernel.tokens.kernelArgs[0]); + auto generated = NEO::PatchTokenBinary::asString(kernel); + std::stringstream expected; + expected << R"===(Kernel of size : 52 decoded successfully +struct SKernelBinaryHeader { + uint32_t CheckSum;// = 3223116527 + uint64_t ShaderHashCode;// = 0 + uint32_t KernelNameSize;// = 12 + uint32_t PatchListSize;// = 0 +}; +Kernel-scope tokens section size : 0 +Kernel arguments [2] : + + kernelArg[0]: + | Kernel argument of type unspecified + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 35(DATA_PARAMETER_OBJECT_ID) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + + kernelArg[1]: + | Kernel argument of type unspecified + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 35(DATA_PARAMETER_OBJECT_ID) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenKernelArgWithObjectIdAndArgInfoThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + auto kernelArgObjId = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_OBJECT_ID); + kernelArg.objectId = &kernelArgObjId; + + std::vector argInfoStorage; + PatchTokensTestData::pushBackArgInfoToken(argInfoStorage); + kernelArg.argInfo = reinterpret_cast(argInfoStorage.data()); + + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type unspecified + | struct SPatchKernelArgumentInfo : + | SPatchItemHeader (Token=26(PATCH_TOKEN_KERNEL_ARGUMENT_INFO), Size=)===" + << kernelArg.argInfo->Size << R"===() + | { + | uint32_t ArgumentNumber;// = 0 + | uint32_t AddressQualifierSize;// = 8 : [__global] + | uint32_t AccessQualifierSize;// = 10 : [read_write] + | uint32_t ArgumentNameSize;// = 10 : [custom_arg] + | uint32_t TypeNameSize;// = 5 : [int*;] + | uint32_t TypeQualifierSize;// = 5 : [const] + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 35(DATA_PARAMETER_OBJECT_ID) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenSamplerObjectKernelArgThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + auto objectArg = PatchTokensTestData::initToken(iOpenCL::PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT); + kernelArg.objectArg = &objectArg; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Sampler; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type SAMPLER + | struct SPatchSamplerKernelArgument : + | SPatchItemHeader (Token=16(PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT), Size=)===" + << sizeof(objectArg) << R"===() + | { + | uint32_t ArgumentNumber;// = 0 + | uint32_t Type;// = 0 + | uint32_t Offset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t needBindlessHandle;// = 0 + | uint32_t TextureMask;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | uint32_t btiOffset;// = 0 + | } + | Sampler Metadata: +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenImageObjectKernelArgThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + auto objectArg = PatchTokensTestData::initToken(iOpenCL::PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT); + kernelArg.objectArg = &objectArg; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Image; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type IMAGE + | struct SPatchImageMemoryObjectKernelArgument : + | SPatchItemHeader (Token=12(PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT), Size=)===" + << sizeof(objectArg) << R"===() + | { + | uint32_t ArgumentNumber;// = 0 + | uint32_t Type;// = 0 + | uint32_t Offset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t Writeable;// = 0 + | uint32_t Transformable;// = 0 + | uint32_t needBindlessHandle;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | uint32_t btiOffset;// = 0 + | } + | Image Metadata: +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenGlobalMemoryObjectKernelArgThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + auto objectArg = PatchTokensTestData::initToken(iOpenCL::PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT); + kernelArg.objectArg = &objectArg; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Buffer; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type BUFFER + | struct SPatchGlobalMemoryObjectKernelArgument : + | SPatchItemHeader (Token=11(PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT), Size=)===" + << sizeof(objectArg) << R"===() + | { + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | Buffer Metadata: +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenStatelessGlobalMemoryObjectKernelArgThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + auto objectArg = PatchTokensTestData::initToken(iOpenCL::PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT); + kernelArg.objectArg = &objectArg; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Buffer; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type BUFFER + | struct SPatchStatelessGlobalMemoryObjectKernelArgument : + | SPatchItemHeader (Token=30(PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT), Size=)===" + << sizeof(objectArg) << R"===() + | { + | uint32_t ArgumentNumber;// = 0 + | uint32_t SurfaceStateHeapOffset;// = 0 + | uint32_t DataParamOffset;// = 0 + | uint32_t DataParamSize;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | Buffer Metadata: +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenStatelessConstantMemoryObjectKernelArgThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + auto objectArg = PatchTokensTestData::initToken(iOpenCL::PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT); + kernelArg.objectArg = &objectArg; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Buffer; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type BUFFER + | struct SPatchStatelessConstantMemoryObjectKernelArgument : + | SPatchItemHeader (Token=31(PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT), Size=)===" + << sizeof(objectArg) << R"===() + | { + | uint32_t ArgumentNumber;// = 0 + | uint32_t SurfaceStateHeapOffset;// = 0 + | uint32_t DataParamOffset;// = 0 + | uint32_t DataParamSize;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | Buffer Metadata: +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenStatelessDeviceQueueObjectKernelArgThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + auto objectArg = PatchTokensTestData::initToken(iOpenCL::PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT); + kernelArg.objectArg = &objectArg; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Buffer; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type BUFFER + | struct SPatchStatelessDeviceQueueKernelArgument : + | SPatchItemHeader (Token=46(PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT), Size=)===" + << sizeof(objectArg) << R"===() + | { + | uint32_t ArgumentNumber;// = 0 + | uint32_t SurfaceStateHeapOffset;// = 0 + | uint32_t DataParamOffset;// = 0 + | uint32_t DataParamSize;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | Buffer Metadata: +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenBufferKernelArgWithMetadataTokensThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Buffer; + auto dataBufferOffset = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_BUFFER_OFFSET); + auto pureStateful = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_BUFFER_STATEFUL); + kernelArg.metadata.buffer.bufferOffset = &dataBufferOffset; + kernelArg.metadata.buffer.pureStateful = &pureStateful; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type BUFFER + | Buffer Metadata: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 42(DATA_PARAMETER_BUFFER_OFFSET) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 43(DATA_PARAMETER_BUFFER_STATEFUL) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenImageKernelArgWithMetadataTokensThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Image; + auto width = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_IMAGE_WIDTH); + auto height = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_IMAGE_HEIGHT); + auto depth = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_IMAGE_DEPTH); + auto channelDataType = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE); + auto channelOrder = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_ORDER); + auto arraySize = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_IMAGE_ARRAY_SIZE); + auto numSamples = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_IMAGE_NUM_SAMPLES); + auto numMipLevels = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS); + + kernelArg.metadata.image.width = &width; + kernelArg.metadata.image.height = &height; + kernelArg.metadata.image.depth = &depth; + kernelArg.metadata.image.channelDataType = &channelDataType; + kernelArg.metadata.image.channelOrder = &channelOrder; + kernelArg.metadata.image.arraySize = &arraySize; + kernelArg.metadata.image.numSamples = &numSamples; + kernelArg.metadata.image.numMipLevels = &numMipLevels; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type IMAGE + | Image Metadata: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 9(DATA_PARAMETER_IMAGE_WIDTH) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 10(DATA_PARAMETER_IMAGE_HEIGHT) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 11(DATA_PARAMETER_IMAGE_DEPTH) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 12(DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 13(DATA_PARAMETER_IMAGE_CHANNEL_ORDER) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 18(DATA_PARAMETER_IMAGE_ARRAY_SIZE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 20(DATA_PARAMETER_IMAGE_NUM_SAMPLES) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 27(DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenSamplerKernelArgWithMetadataTokensThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Sampler; + auto coordinateSnapWaRequired = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED); + auto addressMode = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE); + auto normalizedCoords = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS); + kernelArg.metadata.sampler.coordinateSnapWaRequired = &coordinateSnapWaRequired; + kernelArg.metadata.sampler.addressMode = &addressMode; + kernelArg.metadata.sampler.normalizedCoords = &normalizedCoords; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type SAMPLER + | Sampler Metadata: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 14(DATA_PARAMETER_SAMPLER_ADDRESS_MODE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 21(DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 15(DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenSlmKernelArgWithMetadataTokensThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Slm; + auto slm = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES); + kernelArg.metadata.slm.token = &slm; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type SLM + | Slm Metadata: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 8(DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} + +TEST(KernelArgDumper, GivenVmeKernelArgWithMetadataTokensThenProperlyCreatesDump) { + NEO::PatchTokenBinary::KernelArgFromPatchtokens kernelArg = {}; + kernelArg.objectType = NEO::PatchTokenBinary::ArgObjectType::Image; + kernelArg.objectTypeSpecialized = NEO::PatchTokenBinary::ArgObjectTypeSpecialized::Vme; + auto mbBlockType = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_VME_MB_BLOCK_TYPE); + auto subpixelMode = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_VME_SUBPIXEL_MODE); + auto sadAdjustMode = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_VME_SAD_ADJUST_MODE); + auto searchPathType = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_VME_SEARCH_PATH_TYPE); + + kernelArg.metadataSpecialized.vme.mbBlockType = &mbBlockType; + kernelArg.metadataSpecialized.vme.subpixelMode = &subpixelMode; + kernelArg.metadataSpecialized.vme.sadAdjustMode = &sadAdjustMode; + kernelArg.metadataSpecialized.vme.searchPathType = &searchPathType; + auto generated = NEO::PatchTokenBinary::asString(kernelArg, " | "); + std::stringstream expected; + expected << R"===( | Kernel argument of type IMAGE [ VME ] + | Image Metadata: + | Vme Metadata: + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 23(DATA_PARAMETER_VME_MB_BLOCK_TYPE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 25(DATA_PARAMETER_VME_SAD_ADJUST_MODE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 26(DATA_PARAMETER_VME_SEARCH_PATH_TYPE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } + | struct SPatchDataParameterBuffer : + | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" + << sizeof(iOpenCL::SPatchDataParameterBuffer) << R"===() + | { + | uint32_t Type;// = 24(DATA_PARAMETER_VME_SUBPIXEL_MODE) + | uint32_t ArgumentNumber;// = 0 + | uint32_t Offset;// = 0 + | uint32_t DataSize;// = 0 + | uint32_t SourceOffset;// = 0 + | uint32_t LocationIndex;// = 0 + | uint32_t LocationIndex2;// = 0 + | uint32_t IsEmulationArgument;// = 0 + | } +)==="; + EXPECT_STREQ(expected.str().c_str(), generated.c_str()); +} diff --git a/unit_tests/compiler_interface/patchtokens_tests.h b/unit_tests/compiler_interface/patchtokens_tests.h new file mode 100644 index 0000000000..bdb459528e --- /dev/null +++ b/unit_tests/compiler_interface/patchtokens_tests.h @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "runtime/compiler_interface/patchtokens_decoder.h" +#include "test.h" + +#include + +namespace PatchTokensTestData { +struct ValidEmptyProgram : NEO::PatchTokenBinary::ProgramFromPatchtokens { + ValidEmptyProgram() { + iOpenCL::SProgramBinaryHeader headerTok = {}; + headerTok.Magic = iOpenCL::MAGIC_CL; + headerTok.Version = iOpenCL::CURRENT_ICBE_VERSION; + headerTok.Device = renderCoreFamily; + this->decodeStatus = NEO::PatchTokenBinary::DecoderError::Success; + + storage.insert(storage.end(), reinterpret_cast(&headerTok), reinterpret_cast((&headerTok) + 1)); + recalcTokPtr(); + } + void recalcTokPtr() { + this->blobs.programInfo = storage; + this->headerMutable = reinterpret_cast(storage.data()); + this->header = this->headerMutable; + } + + std::vector storage; + iOpenCL::SProgramBinaryHeader *headerMutable = nullptr; +}; + +struct ValidProgramWithConstantSurface : ValidEmptyProgram { + ValidProgramWithConstantSurface() { + iOpenCL::SPatchAllocateConstantMemorySurfaceProgramBinaryInfo constSurfTok = {}; + constSurfTok.Token = iOpenCL::PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO; + constSurfTok.Size = sizeof(constSurfTok); + constSurfTok.InlineDataSize = 128; + this->programScopeTokens.allocateConstantMemorySurface.push_back(nullptr); + storage.insert(storage.end(), reinterpret_cast(&constSurfTok), reinterpret_cast((&constSurfTok) + 1)); + storage.resize(storage.size() + constSurfTok.InlineDataSize); + recalcTokPtr(); + } + + void recalcTokPtr() { + ValidEmptyProgram::recalcTokPtr(); + this->constSurfMutable = reinterpret_cast(storage.data() + sizeof(*this->headerMutable)); + this->programScopeTokens.allocateConstantMemorySurface[0] = this->constSurfMutable; + this->blobs.patchList = ArrayRef(storage.data() + sizeof(*this->headerMutable), storage.size() - sizeof(*this->headerMutable)); + this->headerMutable->PatchListSize = static_cast(this->blobs.patchList.size()); + } + + iOpenCL::SPatchAllocateConstantMemorySurfaceProgramBinaryInfo *constSurfMutable = nullptr; +}; + +struct ValidProgramWithGlobalSurface : ValidEmptyProgram { + ValidProgramWithGlobalSurface() { + iOpenCL::SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo globalSurfTok = {}; + globalSurfTok.Token = iOpenCL::PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO; + globalSurfTok.Size = sizeof(globalSurfTok); + globalSurfTok.InlineDataSize = 256; + this->programScopeTokens.allocateGlobalMemorySurface.push_back(nullptr); + storage.insert(storage.end(), reinterpret_cast(&globalSurfTok), reinterpret_cast((&globalSurfTok) + 1)); + storage.resize(storage.size() + globalSurfTok.InlineDataSize); + recalcTokPtr(); + } + + void recalcTokPtr() { + ValidEmptyProgram::recalcTokPtr(); + this->globalSurfMutable = reinterpret_cast(storage.data() + sizeof(*this->headerMutable)); + this->programScopeTokens.allocateGlobalMemorySurface[0] = this->globalSurfMutable; + this->blobs.patchList = ArrayRef(storage.data() + sizeof(*this->headerMutable), storage.size() - sizeof(*this->headerMutable)); + this->headerMutable->PatchListSize = static_cast(this->blobs.patchList.size()); + } + + iOpenCL::SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo *globalSurfMutable = nullptr; +}; + +struct ValidProgramWithConstantSurfaceAndPointer : ValidProgramWithConstantSurface { + ValidProgramWithConstantSurfaceAndPointer() { + iOpenCL::SPatchConstantPointerProgramBinaryInfo constantPointerTok = {}; + constantPointerTok.Token = iOpenCL::PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO; + constantPointerTok.Size = sizeof(constantPointerTok); + constantPointerTok.ConstantBufferIndex = 0; + constantPointerTok.BufferIndex = 0; + constantPointerTok.BufferType = iOpenCL::PROGRAM_SCOPE_CONSTANT_BUFFER; + constantPointerTok.ConstantPointerOffset = 96; + this->programScopeTokens.constantPointer.push_back(nullptr); + storage.insert(storage.end(), reinterpret_cast(&constantPointerTok), reinterpret_cast((&constantPointerTok) + 1)); + recalcTokPtr(); + } + + void recalcTokPtr() { + ValidProgramWithConstantSurface::recalcTokPtr(); + this->constantPointerMutable = reinterpret_cast(storage.data() + sizeof(*this->headerMutable) + sizeof(*this->constSurfMutable) + this->constSurfMutable->InlineDataSize); + this->programScopeTokens.constantPointer[0] = this->constantPointerMutable; + } + + iOpenCL::SPatchConstantPointerProgramBinaryInfo *constantPointerMutable = nullptr; +}; + +struct ValidProgramWithGlobalSurfaceAndPointer : ValidProgramWithGlobalSurface { + ValidProgramWithGlobalSurfaceAndPointer() { + iOpenCL::SPatchGlobalPointerProgramBinaryInfo globalPointerTok = {}; + globalPointerTok.Token = iOpenCL::PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO; + globalPointerTok.Size = sizeof(globalPointerTok); + globalPointerTok.GlobalBufferIndex = 0; + globalPointerTok.BufferIndex = 0; + globalPointerTok.BufferType = iOpenCL::PROGRAM_SCOPE_GLOBAL_BUFFER; + globalPointerTok.GlobalPointerOffset = 48; + this->programScopeTokens.globalPointer.push_back(nullptr); + storage.insert(storage.end(), reinterpret_cast(&globalPointerTok), reinterpret_cast((&globalPointerTok) + 1)); + recalcTokPtr(); + } + + void recalcTokPtr() { + ValidProgramWithGlobalSurface::recalcTokPtr(); + this->globalPointerMutable = reinterpret_cast(storage.data() + sizeof(*this->headerMutable) + sizeof(*this->globalSurfMutable) + this->globalSurfMutable->InlineDataSize); + this->programScopeTokens.globalPointer[0] = this->globalPointerMutable; + } + + iOpenCL::SPatchGlobalPointerProgramBinaryInfo *globalPointerMutable = nullptr; +}; + +struct ValidEmptyKernel { + static NEO::PatchTokenBinary::KernelFromPatchtokens create(std::vector &storage) { + NEO::PatchTokenBinary::KernelFromPatchtokens ret; + iOpenCL::SKernelBinaryHeaderCommon headerTokInl = {}; + ret.decodeStatus = NEO::PatchTokenBinary::DecoderError::Success; + ret.name = "test_kernel"; + headerTokInl.KernelNameSize = static_cast(ret.name.size()); + + auto kernOffset = storage.size(); + storage.reserve(storage.size() + 512); + storage.insert(storage.end(), reinterpret_cast(&headerTokInl), reinterpret_cast((&headerTokInl) + 1)); + auto headerTok = reinterpret_cast(&*(storage.begin() + kernOffset)); + ret.NEO::PatchTokenBinary::KernelFromPatchtokens::header = headerTok; + storage.insert(storage.end(), reinterpret_cast(ret.name.begin()), reinterpret_cast(ret.name.end())); + ret.blobs.kernelInfo = ArrayRef(storage.data() + kernOffset, storage.data() + storage.size()); + headerTok->CheckSum = NEO::PatchTokenBinary::calcKernelChecksum(ret.blobs.kernelInfo); + return ret; + } +}; + +struct ValidProgramWithKernel : ValidEmptyProgram { + ValidProgramWithKernel() { + this->headerMutable->NumberOfKernels = 1; + kernOffset = storage.size(); + this->kernels.push_back(ValidEmptyKernel::create(storage)); + this->kernels[0].decodeStatus = NEO::PatchTokenBinary::DecoderError::Success; + recalcTokPtr(); + } + + void recalcTokPtr() { + ValidEmptyProgram::recalcTokPtr(); + this->kernels[0].blobs.kernelInfo = ArrayRef(storage.data() + kernOffset, storage.data() + storage.size()); + kernelHeaderMutable = reinterpret_cast(&*(storage.begin() + kernOffset)); + this->kernels[0].header = kernelHeaderMutable; + kernelHeaderMutable->CheckSum = NEO::PatchTokenBinary::calcKernelChecksum(this->kernels[0].blobs.kernelInfo); + } + + size_t kernOffset = 0U; + iOpenCL::SKernelBinaryHeaderCommon *kernelHeaderMutable = nullptr; +}; + +struct ValidProgramWithKernelUsingSlm : ValidProgramWithKernel { + ValidProgramWithKernelUsingSlm() { + patchlistOffset = storage.size(); + iOpenCL::SPatchAllocateLocalSurface slmTok = {}; + slmTok.Token = iOpenCL::PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE; + slmTok.Size = sizeof(slmTok); + slmTok.TotalInlineLocalMemorySize = 16; + storage.insert(storage.end(), reinterpret_cast(&slmTok), reinterpret_cast((&slmTok) + 1)); + recalcTokPtr(); + } + + void recalcTokPtr() { + ValidProgramWithKernel::recalcTokPtr(); + this->kernels[0].blobs.patchList = ArrayRef(storage.data() + patchlistOffset, storage.data() + storage.size()); + slmMutable = reinterpret_cast(storage.data() + patchlistOffset); + this->kernels[0].tokens.allocateLocalSurface = slmMutable; + this->kernelHeaderMutable->PatchListSize = static_cast(this->kernels[0].blobs.patchList.size()); + } + + iOpenCL::SPatchAllocateLocalSurface *slmMutable = nullptr; + size_t patchlistOffset = 0U; +}; + +template +inline TokenT initToken(iOpenCL::PATCH_TOKEN tok) { + TokenT ret = {}; + ret.Size = sizeof(TokenT); + ret.Token = tok; + return ret; +} + +inline iOpenCL::SPatchDataParameterBuffer initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_TOKEN type, uint32_t sourceIndex = 0, uint32_t argNum = 0) { + iOpenCL::SPatchDataParameterBuffer tok = {}; + tok.Size = static_cast(sizeof(iOpenCL::SPatchDataParameterBuffer)); + tok.Token = iOpenCL::PATCH_TOKEN_DATA_PARAMETER_BUFFER; + tok.Type = type; + tok.SourceOffset = sourceIndex * sizeof(uint32_t); + tok.ArgumentNumber = argNum; + return tok; +} + +inline uint32_t pushBackString(const std::string &str, std::vector &storage) { + auto offset = storage.size(); + storage.insert(storage.end(), reinterpret_cast(str.c_str()), reinterpret_cast(str.c_str()) + str.size()); + return static_cast(offset); +} + +inline uint32_t pushBackStringToken(const std::string &str, uint32_t stringIndex, std::vector &outStream) { + auto off = outStream.size(); + outStream.reserve(outStream.size() + sizeof(iOpenCL::SPatchString) + str.length()); + outStream.resize(outStream.size() + sizeof(iOpenCL::SPatchString)); + iOpenCL::SPatchString *tok = reinterpret_cast(outStream.data() + off); + *tok = initToken(iOpenCL::PATCH_TOKEN::PATCH_TOKEN_STRING); + tok->StringSize = static_cast(str.length()); + tok->Size += tok->StringSize; + tok->Index = stringIndex; + pushBackString(str, outStream); + return static_cast(off); +}; + +inline uint32_t pushBackArgInfoToken(std::vector &outStream, + uint32_t argNum = 0, + const std::string &addressQualifier = "__global", const std::string &accessQualifier = "read_write", + const std::string &argName = "custom_arg", const std::string &typeName = "int*;", std::string typeQualifier = "const") { + auto off = outStream.size(); + iOpenCL::SPatchKernelArgumentInfo tok = {}; + tok.Token = iOpenCL::PATCH_TOKEN_KERNEL_ARGUMENT_INFO; + tok.AddressQualifierSize = static_cast(addressQualifier.size()); + tok.AccessQualifierSize = static_cast(accessQualifier.size()); + tok.ArgumentNameSize = static_cast(argName.size()); + tok.TypeNameSize = static_cast(typeName.size()); + tok.TypeQualifierSize = static_cast(typeQualifier.size()); + tok.Size = sizeof(iOpenCL::SPatchKernelArgumentInfo) + tok.AddressQualifierSize + tok.AccessQualifierSize + tok.ArgumentNameSize + tok.TypeNameSize + tok.TypeQualifierSize; + + outStream.insert(outStream.end(), reinterpret_cast(&tok), reinterpret_cast(&tok) + sizeof(tok)); + pushBackString(addressQualifier, outStream); + pushBackString(accessQualifier, outStream); + pushBackString(argName, outStream); + pushBackString(typeName, outStream); + pushBackString(typeQualifier, outStream); + return static_cast(off); +} +} // namespace PatchTokensTestData diff --git a/unit_tests/compiler_interface/patchtokens_validator_tests.cpp b/unit_tests/compiler_interface/patchtokens_validator_tests.cpp new file mode 100644 index 0000000000..b890657d76 --- /dev/null +++ b/unit_tests/compiler_interface/patchtokens_validator_tests.cpp @@ -0,0 +1,337 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "runtime/compiler_interface/patchtokens_decoder.h" +#include "runtime/compiler_interface/patchtokens_validator.inl" +#include "test.h" + +#include "patchtokens_tests.h" + +struct UknownTokenValidator { + UknownTokenValidator(bool isSafeToSkip = true) : isSafeToSkip(isSafeToSkip) { + } + bool isSafeToSkipUnhandledToken(uint32_t token) const { + return isSafeToSkip; + } + bool isSafeToSkip = true; +}; + +TEST(PatchtokensValidator, GivenValidProgramThenValidationSucceeds) { + PatchTokensTestData::ValidEmptyProgram prog; + std::string error, warning; + + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::Success, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_EQ(0U, error.size()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithInvalidOrUnknownStatusThenValidationFails) { + PatchTokensTestData::ValidEmptyProgram prog; + std::string error, warning; + + prog.decodeStatus = NEO::PatchTokenBinary::DecoderError::Undefined; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("ProgramFromPatchtokens wasn't successfully decoded", error.c_str()); + EXPECT_EQ(0U, warning.size()); + + error.clear(); + warning.clear(); + + prog.decodeStatus = NEO::PatchTokenBinary::DecoderError::InvalidBinary; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("ProgramFromPatchtokens wasn't successfully decoded", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenValidProgramWithASingleConstantSurfaceThenValidationSucceeds) { + PatchTokensTestData::ValidProgramWithConstantSurface prog; + std::string error, warning; + + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::Success, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_EQ(0U, error.size()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithMultipleConstantSurfacesThenValidationFails) { + PatchTokensTestData::ValidProgramWithConstantSurface prog; + std::string error, warning; + + iOpenCL::SPatchAllocateConstantMemorySurfaceProgramBinaryInfo constSurface2 = *prog.programScopeTokens.allocateConstantMemorySurface[0]; + prog.programScopeTokens.allocateConstantMemorySurface.push_back(&constSurface2); + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled number of global constants surfaces > 1", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenValidProgramWithASingleGlobalSurfaceThenValidationSucceeds) { + PatchTokensTestData::ValidProgramWithGlobalSurface prog; + std::string error, warning; + + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::Success, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_EQ(0U, error.size()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithMultipleGlobalSurfacesThenValidationFails) { + PatchTokensTestData::ValidProgramWithGlobalSurface prog; + std::string error, warning; + + iOpenCL::SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo globSurface2 = *prog.programScopeTokens.allocateGlobalMemorySurface[0]; + prog.programScopeTokens.allocateGlobalMemorySurface.push_back(&globSurface2); + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled number of global variables surfaces > 1", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenValidProgramWithValidConstantPointerThenValidationSucceeds) { + PatchTokensTestData::ValidProgramWithConstantSurfaceAndPointer prog; + std::string error, warning; + + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::Success, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_EQ(0U, error.size()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithInvalidConstantPointerBufferIndexThenValidationFails) { + PatchTokensTestData::ValidProgramWithConstantSurfaceAndPointer prog; + std::string error, warning; + + prog.constantPointerMutable->BufferIndex = 1; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled SPatchConstantPointerProgramBinaryInfo", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithInvalidConstantPointerConstantBufferIndexThenValidationFails) { + PatchTokensTestData::ValidProgramWithConstantSurfaceAndPointer prog; + std::string error, warning; + + prog.constantPointerMutable->ConstantBufferIndex = 1; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled SPatchConstantPointerProgramBinaryInfo", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithInvalidConstantPointerBufferTypeThenValidationFails) { + PatchTokensTestData::ValidProgramWithConstantSurfaceAndPointer prog; + std::string error, warning; + + prog.constantPointerMutable->BufferType = iOpenCL::PROGRAM_SCOPE_GLOBAL_BUFFER; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled SPatchConstantPointerProgramBinaryInfo", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithInvalidConstantPointerOffsetThenValidationFails) { + PatchTokensTestData::ValidProgramWithConstantSurfaceAndPointer prog; + std::string error, warning; + + prog.constantPointerMutable->ConstantPointerOffset = prog.constSurfMutable->InlineDataSize; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled SPatchConstantPointerProgramBinaryInfo", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithoutConstantSurfaceButWithConstantPointerThenValidationFails) { + PatchTokensTestData::ValidProgramWithConstantSurfaceAndPointer prog; + std::string error, warning; + + prog.programScopeTokens.allocateConstantMemorySurface.clear(); + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled SPatchConstantPointerProgramBinaryInfo", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenValidProgramWithValidGlobalPointerThenValidationSucceeds) { + PatchTokensTestData::ValidProgramWithGlobalSurfaceAndPointer prog; + std::string error, warning; + + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::Success, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_EQ(0U, error.size()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithInvalidGlobalPointerBufferIndexThenValidationFails) { + PatchTokensTestData::ValidProgramWithGlobalSurfaceAndPointer prog; + std::string error, warning; + + prog.globalPointerMutable->BufferIndex = 1; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled SPatchGlobalPointerProgramBinaryInfo", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithInvalidGlobalPointerGlobalBufferIndexThenValidationFails) { + PatchTokensTestData::ValidProgramWithGlobalSurfaceAndPointer prog; + std::string error, warning; + + prog.globalPointerMutable->GlobalBufferIndex = 1; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled SPatchGlobalPointerProgramBinaryInfo", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithInvalidGlobalPointerBufferTypeThenValidationFails) { + PatchTokensTestData::ValidProgramWithGlobalSurfaceAndPointer prog; + std::string error, warning; + + prog.globalPointerMutable->BufferType = iOpenCL::PROGRAM_SCOPE_CONSTANT_BUFFER; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled SPatchGlobalPointerProgramBinaryInfo", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithInvalidGlobalPointerOffsetThenValidationFails) { + PatchTokensTestData::ValidProgramWithGlobalSurfaceAndPointer prog; + std::string error, warning; + + prog.globalPointerMutable->GlobalPointerOffset = prog.globalSurfMutable->InlineDataSize; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled SPatchGlobalPointerProgramBinaryInfo", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithoutGlobalSurfaceButWithGlobalPointerThenValidationFails) { + PatchTokensTestData::ValidProgramWithGlobalSurfaceAndPointer prog; + std::string error, warning; + + prog.programScopeTokens.allocateGlobalMemorySurface.clear(); + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("Unhandled SPatchGlobalPointerProgramBinaryInfo", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenValidProgramWithUnknownPatchTokenWhenUknownTokenCantBeSkippedThenValidationFails) { + PatchTokensTestData::ValidEmptyProgram prog; + std::string error, warning; + + iOpenCL::SPatchItemHeader unknownToken = {}; + unknownToken.Token = iOpenCL::NUM_PATCH_TOKENS + 1; + prog.unhandledTokens.push_back(&unknownToken); + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(false), error, warning)); + auto expectedError = "Unhandled required program-scope Patch Token : " + std::to_string(unknownToken.Token); + EXPECT_STREQ(expectedError.c_str(), error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenValidProgramWithUnknownPatchTokenWhenUknownTokenCanBeSkippedThenValidationSucceedsAndEmitsWarning) { + PatchTokensTestData::ValidEmptyProgram prog; + std::string error, warning; + + iOpenCL::SPatchItemHeader unknownToken = {}; + unknownToken.Token = iOpenCL::NUM_PATCH_TOKENS + 1; + prog.unhandledTokens.push_back(&unknownToken); + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::Success, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(true), error, warning)); + auto expectedWarning = "Unknown program-scope Patch Token : " + std::to_string(unknownToken.Token); + EXPECT_EQ(0U, error.size()); + EXPECT_STREQ(expectedWarning.c_str(), warning.c_str()); +} + +TEST(PatchtokensValidator, GivenProgramWithUnsupportedPatchTokenVersionThenValidationFails) { + PatchTokensTestData::ValidEmptyProgram prog; + std::string error, warning; + + prog.headerMutable->Version = std::numeric_limits::max(); + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(true), error, warning)); + auto expectedError = "Unhandled Version of Patchtokens: expected: " + std::to_string(iOpenCL::CURRENT_ICBE_VERSION) + ", got: " + std::to_string(prog.header->Version); + EXPECT_STREQ(expectedError.c_str(), error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithUnsupportedPlatformThenValidationFails) { + PatchTokensTestData::ValidEmptyProgram prog; + std::string error, warning; + + prog.headerMutable->Device = IGFX_MAX_CORE; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(true), error, warning)); + auto expectedError = "Unsupported device binary, device GFXCORE_FAMILY : " + std::to_string(prog.header->Device); + EXPECT_STREQ(expectedError.c_str(), error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenValidProgramWithKernelThenValidationSucceeds) { + PatchTokensTestData::ValidProgramWithKernel prog; + std::string error, warning; + + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::Success, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_EQ(0U, error.size()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithKernelWhenKernelHasInvalidOrUnknownStatusThenValidationFails) { + PatchTokensTestData::ValidProgramWithKernel prog; + std::string error, warning; + + prog.kernels[0].decodeStatus = NEO::PatchTokenBinary::DecoderError::Undefined; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("KernelFromPatchtokens wasn't successfully decoded", error.c_str()); + EXPECT_EQ(0U, warning.size()); + + error.clear(); + warning.clear(); + + prog.kernels[0].decodeStatus = NEO::PatchTokenBinary::DecoderError::InvalidBinary; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("KernelFromPatchtokens wasn't successfully decoded", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithKernelWhenKernelHasInvalidChecksumThenValidationFails) { + PatchTokensTestData::ValidProgramWithKernel prog; + std::string error, warning; + + prog.kernelHeaderMutable->CheckSum += 1; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("KernelFromPatchtokens has invalid checksum", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenValidProgramWithKernelUsingSlmThenValidationSucceeds) { + PatchTokensTestData::ValidProgramWithKernelUsingSlm prog; + std::string error, warning; + + size_t slmSizeAvailable = 1 + prog.kernels[0].tokens.allocateLocalSurface->TotalInlineLocalMemorySize; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::Success, NEO::PatchTokenBinary::validate(prog, slmSizeAvailable, UknownTokenValidator(), error, warning)); + EXPECT_EQ(0U, error.size()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenProgramWithKernelUsingSlmWhenKernelRequiresTooMuchSlmThenValidationFails) { + PatchTokensTestData::ValidProgramWithKernelUsingSlm prog; + std::string error, warning; + + size_t slmSizeAvailable = -1 + prog.kernels[0].tokens.allocateLocalSurface->TotalInlineLocalMemorySize; + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::NotEnoughSlm, NEO::PatchTokenBinary::validate(prog, slmSizeAvailable, UknownTokenValidator(), error, warning)); + EXPECT_STREQ("KernelFromPatchtokens requires too much SLM", error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenValidProgramWithKernelContainingUnknownPatchTokenWhenUknownTokenCantBeSkippedThenValidationFails) { + PatchTokensTestData::ValidProgramWithKernel prog; + std::string error, warning; + + iOpenCL::SPatchItemHeader unknownToken = {}; + unknownToken.Token = iOpenCL::NUM_PATCH_TOKENS + 1; + prog.kernels[0].unhandledTokens.push_back(&unknownToken); + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::InvalidBinary, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(false), error, warning)); + auto expectedError = "Unhandled required kernel-scope Patch Token : " + std::to_string(unknownToken.Token); + EXPECT_STREQ(expectedError.c_str(), error.c_str()); + EXPECT_EQ(0U, warning.size()); +} + +TEST(PatchtokensValidator, GivenValidProgramWithKernelContainingUnknownPatchTokenWhenUknownTokenCanBeSkippedThenValidationSucceedsAndEmitsWarning) { + PatchTokensTestData::ValidProgramWithKernel prog; + std::string error, warning; + + iOpenCL::SPatchItemHeader unknownToken = {}; + unknownToken.Token = iOpenCL::NUM_PATCH_TOKENS + 1; + prog.kernels[0].unhandledTokens.push_back(&unknownToken); + EXPECT_EQ(NEO::PatchTokenBinary::ValidatorError::Success, NEO::PatchTokenBinary::validate(prog, 0U, UknownTokenValidator(true), error, warning)); + auto expectedWarning = "Unknown kernel-scope Patch Token : " + std::to_string(unknownToken.Token); + EXPECT_EQ(0U, error.size()); + EXPECT_STREQ(expectedWarning.c_str(), warning.c_str()); +} diff --git a/unit_tests/device_queue/device_queue_hw_tests.cpp b/unit_tests/device_queue/device_queue_hw_tests.cpp index bc894407f6..59718d7cf0 100644 --- a/unit_tests/device_queue/device_queue_hw_tests.cpp +++ b/unit_tests/device_queue/device_queue_hw_tests.cpp @@ -796,7 +796,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, getProfilingEndCmdsSi HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, givenDeviceQueueWhenRunningOnCCsThenFfidSkipOffsetIsAddedToBlockKernelStartPointer) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); std::unique_ptr mockParentKernel(MockParentKernel::create(*pContext)); - KernelInfo *blockInfo = const_cast(mockParentKernel->mockProgram->getBlockKernelInfo(0)); + KernelInfo *blockInfo = const_cast(mockParentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0)); blockInfo->createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager()); ASSERT_NE(nullptr, blockInfo->getGraphicsAllocation()); const_cast(blockInfo->patchInfo.threadPayload)->OffsetToSkipSetFFIDGP = 0x1234; diff --git a/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp b/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp index 79c31b9ea4..b66df9ca95 100644 --- a/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -310,12 +310,12 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu auto dstBindingTable = reinterpret_cast(dstBlockBti); auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset); - auto srcBindingTable = reinterpret_cast(srcBlockBti); + auto srcBindingTable = reinterpret_cast(srcBlockBti); for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) { uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer(); uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer(); auto *dstSurfaceState = reinterpret_cast(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer)); - auto *srcSurfaceState = reinterpret_cast(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer)); + auto *srcSurfaceState = reinterpret_cast(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer)); EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE))); } diff --git a/unit_tests/fixtures/kernel_data_fixture.cpp b/unit_tests/fixtures/kernel_data_fixture.cpp index dc7ea7e76f..9391c7a647 100644 --- a/unit_tests/fixtures/kernel_data_fixture.cpp +++ b/unit_tests/fixtures/kernel_data_fixture.cpp @@ -10,6 +10,7 @@ #include "core/helpers/aligned_memory.h" #include "core/helpers/string.h" #include "core/memory_manager/graphics_allocation.h" +#include "runtime/compiler_interface/patchtokens_decoder.h" void KernelDataTest::buildAndDecode() { cl_int error = CL_SUCCESS; @@ -78,16 +79,22 @@ void KernelDataTest::buildAndDecode() { pCurPtr += sizeof(SPatchDataParameterStream); // now build a program with this kernel data - error = program->build(pKernelData, kernelDataSize); + iOpenCL::SProgramBinaryHeader header = {}; + NEO::PatchTokenBinary::ProgramFromPatchtokens programFromPatchtokens; + programFromPatchtokens.decodeStatus = PatchTokenBinary::DecoderError::Success; + programFromPatchtokens.header = &header; + programFromPatchtokens.kernels.resize(1); + auto &kernelFromPatchtokens = *programFromPatchtokens.kernels.rbegin(); + auto kernelBlob = ArrayRef(reinterpret_cast(pKernelData), kernelDataSize); + bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(kernelBlob, kernelFromPatchtokens); + EXPECT_TRUE(decodeSuccess); + + program->populateKernelInfo(programFromPatchtokens, 0, error); EXPECT_EQ(CL_SUCCESS, error); // extract the kernel info pKernelInfo = program->Program::getKernelInfo(kernelName.c_str()); - // validate kernel info - // vaidate entire set of data - EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pBlob, pKernelData, kernelDataSize)); - // validate header EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pKernelHeader, &kernelBinaryHeader, sizeof(SKernelBinaryHeaderCommon))); @@ -107,9 +114,6 @@ void KernelDataTest::buildAndDecode() { if (pSsh != nullptr) { EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pSsh, pSsh, sshSize)); } - if (pPatchList != nullptr) { - EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pPatchList, pPatchList, patchListSize)); - } if (kernelHeapSize) { auto kernelAllocation = pKernelInfo->getGraphicsAllocation(); UNRECOVERABLE_IF(kernelAllocation == nullptr); diff --git a/unit_tests/gtpin/gtpin_tests.cpp b/unit_tests/gtpin/gtpin_tests.cpp index 14e12321a9..ac65135752 100644 --- a/unit_tests/gtpin/gtpin_tests.cpp +++ b/unit_tests/gtpin/gtpin_tests.cpp @@ -8,6 +8,7 @@ #include "core/helpers/basic_math.h" #include "core/helpers/file_io.h" #include "core/helpers/hash.h" +#include "runtime/compiler_interface/patchtokens_decoder.h" #include "runtime/context/context.h" #include "runtime/device/device.h" #include "runtime/gtpin/gtpin_defs.h" @@ -2329,8 +2330,12 @@ TEST_F(ProgramTests, givenGenBinaryWithGtpinInfoWhenProcessGenBinaryCalledThenGt pPatch->Token = iOpenCL::PATCH_TOKEN_GTPIN_INFO; pPatch->Size = sizeof(iOpenCL::SPatchItemHeader); binSize += sizeof(iOpenCL::SPatchItemHeader); + pBin += sizeof(iOpenCL::SPatchItemHeader); + + pKHdr->CheckSum = PatchTokenBinary::calcKernelChecksum(ArrayRef(reinterpret_cast(pKHdr), reinterpret_cast(pBin))); // Decode prepared program binary pProgram->genBinary = makeCopy(&genBin[0], binSize); + pProgram->genBinarySize = binSize; retVal = pProgram->processGenBinary(); auto kernelInfo = pProgram->getKernelInfo("TstCopy"); EXPECT_NE(kernelInfo->igcInfoForGtpin, nullptr); diff --git a/unit_tests/kernel/kernel_reflection_surface_tests.cpp b/unit_tests/kernel/kernel_reflection_surface_tests.cpp index 44587dc6be..2099438adf 100644 --- a/unit_tests/kernel/kernel_reflection_surface_tests.cpp +++ b/unit_tests/kernel/kernel_reflection_surface_tests.cpp @@ -336,7 +336,7 @@ TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBufferAndDataParameterBuf dataParameterBuffer.SourceOffset = 0; dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; - info.patchInfo.dataParameterBuffers.push_back(&dataParameterBuffer); + info.patchInfo.dataParameterBuffersKernelArgs.push_back(&dataParameterBuffer); info.storeKernelArgument(&dataParameterBuffer); std::vector curbeParams; @@ -669,7 +669,7 @@ TEST(KernelReflectionSurfaceTestSingle, ObtainKernelReflectionSurfaceWithoutKern EXPECT_TRUE(kernel.isParentKernel); - program.addBlockKernel(blockInfo); + program.blockKernelManager->addBlockKernelInfo(blockInfo); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); @@ -734,7 +734,7 @@ TEST(KernelReflectionSurfaceTestSingle, ObtainKernelReflectionSurfaceWithDeviceQ EXPECT_TRUE(kernel.isParentKernel); - program.addBlockKernel(blockInfo); + program.blockKernelManager->addBlockKernelInfo(blockInfo); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); @@ -1978,7 +1978,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryWh parentKernel->patchBlocksCurbeWithConstantValues(); - auto *blockInfo = parentKernel->mockProgram->getBlockKernelInfo(0); + auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset; @@ -2012,7 +2012,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryAn parentKernel->patchBlocksCurbeWithConstantValues(); - auto *blockInfo = parentKernel->mockProgram->getBlockKernelInfo(0); + auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); @@ -2045,7 +2045,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemory parentKernel->patchBlocksCurbeWithConstantValues(); - auto *blockInfo = parentKernel->mockProgram->getBlockKernelInfo(0); + auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset; @@ -2088,7 +2088,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemory parentKernel->patchBlocksCurbeWithConstantValues(); - auto *blockInfo = parentKernel->mockProgram->getBlockKernelInfo(0); + auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset; diff --git a/unit_tests/kernel/kernel_tests.cpp b/unit_tests/kernel/kernel_tests.cpp index c19d085e61..63af8605ea 100644 --- a/unit_tests/kernel/kernel_tests.cpp +++ b/unit_tests/kernel/kernel_tests.cpp @@ -2459,7 +2459,7 @@ TEST_F(KernelCrossThreadTests, patchBlocksSimdSize) { kernel->executionEnvironmentBlock.CompiledSIMD16 = 1; kernel->executionEnvironmentBlock.CompiledSIMD32 = 0; infoBlock->patchInfo.executionEnvironment = &kernel->executionEnvironmentBlock; - kernel->mockProgram->addBlockKernel(infoBlock); + kernel->mockProgram->blockKernelManager->addBlockKernelInfo(infoBlock); // patch block's simd size kernel->mockKernel->patchBlocksSimdSize(); @@ -2469,7 +2469,7 @@ TEST_F(KernelCrossThreadTests, patchBlocksSimdSize) { uint32_t *simdSize = reinterpret_cast(blockSimdSize); // check of block's simd size has been patched correctly - EXPECT_EQ(kernel->mockProgram->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); + EXPECT_EQ(kernel->mockProgram->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); delete kernel; } diff --git a/unit_tests/kernel/parent_kernel_tests.cpp b/unit_tests/kernel/parent_kernel_tests.cpp index 3cd4888471..d0523fdf0c 100644 --- a/unit_tests/kernel/parent_kernel_tests.cpp +++ b/unit_tests/kernel/parent_kernel_tests.cpp @@ -75,7 +75,7 @@ TEST(ParentKernelTest, patchBlocksSimdSize) { void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); - EXPECT_EQ(program->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); + EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); } TEST(ParentKernelTest, hasDeviceEnqueue) { @@ -104,7 +104,7 @@ TEST(ParentKernelTest, initializeOnParentKernelPatchesBlocksSimdSize) { void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); - EXPECT_EQ(program->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); + EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); } TEST(ParentKernelTest, initializeOnParentKernelAllocatesPrivateMemoryForBlocks) { @@ -194,7 +194,7 @@ TEST(ParentKernelTest, initializeOnParentKernelAllocatesPrivateMemoryForBlocks) infoBlock->heapInfo.pDsh = (void *)new uint64_t[64]; infoBlock->crossThreadData = new char[crossThreadOffsetBlock]; - program->addBlockKernel(infoBlock); + program->blockKernelManager->addBlockKernelInfo(infoBlock); parentKernel->initialize(); diff --git a/unit_tests/mocks/mock_kernel.h b/unit_tests/mocks/mock_kernel.h index 5a0eca41ba..0823cd967a 100644 --- a/unit_tests/mocks/mock_kernel.h +++ b/unit_tests/mocks/mock_kernel.h @@ -11,6 +11,7 @@ #include "runtime/device/device.h" #include "runtime/kernel/grf_config.h" #include "runtime/kernel/kernel.h" +#include "runtime/program/block_kernel_manager.h" #include "runtime/scheduler/scheduler_kernel.h" #include "unit_tests/mocks/mock_context.h" #include "unit_tests/mocks/mock_program.h" @@ -532,7 +533,7 @@ class MockParentKernel : public Kernel { infoBlock->heapInfo.pDsh = (void *)new uint64_t[64]; infoBlock->crossThreadData = new char[crossThreadOffsetBlock > crossThreadSize ? crossThreadOffsetBlock : crossThreadSize]; - mockProgram->addBlockKernel(infoBlock); + mockProgram->blockKernelManager->addBlockKernelInfo(infoBlock); parent->mockProgram = mockProgram; return parent; diff --git a/unit_tests/mocks/mock_program.cpp b/unit_tests/mocks/mock_program.cpp index 70ccc53dbe..f9a290a31f 100644 --- a/unit_tests/mocks/mock_program.cpp +++ b/unit_tests/mocks/mock_program.cpp @@ -11,6 +11,7 @@ #include "core/helpers/hash.h" #include "runtime/context/context.h" #include "runtime/program/create.inl" +#include "runtime/program/kernel_info.h" #include "unit_tests/mocks/mock_compilers.h" #include "unit_tests/mocks/mock_graphics_allocation.h" diff --git a/unit_tests/mocks/mock_program.h b/unit_tests/mocks/mock_program.h index 8e350b01a6..4657894814 100644 --- a/unit_tests/mocks/mock_program.h +++ b/unit_tests/mocks/mock_program.h @@ -9,6 +9,7 @@ #include "core/helpers/hash.h" #include "core/helpers/string.h" #include "runtime/helpers/options.h" +#include "runtime/program/kernel_info.h" #include "runtime/program/program.h" #include "gmock/gmock.h" @@ -26,15 +27,17 @@ class MockProgram : public Program { public: using Program::createProgramFromBinary; using Program::getKernelNamesString; - using Program::getProgramCompilerVersion; using Program::isKernelDebugEnabled; using Program::linkBinary; + using Program::populateKernelInfo; using Program::prepareLinkerInputStorage; using Program::rebuildProgramFromIr; using Program::resolveProgramBinary; + using Program::separateBlockKernels; using Program::updateNonUniformFlag; using Program::areSpecializationConstantsInitialized; + using Program::blockKernelManager; using Program::constantSurface; using Program::context; using Program::debugData; @@ -81,18 +84,6 @@ class MockProgram : public Program { void setDevice(Device *device) { this->pDevice = device; }; - const KernelInfo *getBlockKernelInfo(size_t ordinal) { - return blockKernelManager->getBlockKernelInfo(ordinal); - } - size_t getNumberOfBlocks() { - return blockKernelManager->getCount(); - } - void addBlockKernel(KernelInfo *blockInfo) { - blockKernelManager->addBlockKernelInfo(blockInfo); - } - void separateBlockKernels() { - Program::separateBlockKernels(); - } std::vector &getKernelInfoArray() { return kernelInfoArray; } @@ -138,7 +129,15 @@ class MockProgram : public Program { extractInternalOptions(buildOptions); } + cl_int isHandled(const PatchTokenBinary::ProgramFromPatchtokens &decodedProgram) const override { + if (skipValidationOfBinary) { + return CL_SUCCESS; + } + return Program::isHandled(decodedProgram); + } + bool contextSet = false; + bool skipValidationOfBinary = false; }; class GlobalMockSipProgram : public Program { diff --git a/unit_tests/program/CMakeLists.txt b/unit_tests/program/CMakeLists.txt index 08a9e2a557..608f67a8b5 100644 --- a/unit_tests/program/CMakeLists.txt +++ b/unit_tests/program/CMakeLists.txt @@ -11,6 +11,7 @@ set(IGDRCL_SRCS_tests_program ${CMAKE_CURRENT_SOURCE_DIR}/kernel_data.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_OCL2_0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_from_patchtokens_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/process_debug_data_tests.cpp diff --git a/unit_tests/program/evaluate_unhandled_token_tests.cpp b/unit_tests/program/evaluate_unhandled_token_tests.cpp index 47c44c946b..793c90da6c 100644 --- a/unit_tests/program/evaluate_unhandled_token_tests.cpp +++ b/unit_tests/program/evaluate_unhandled_token_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "runtime/compiler_interface/patchtokens_decoder.h" #include "runtime/execution_environment/execution_environment.h" #include "runtime/program/create.inl" #include "runtime/program/program.h" @@ -89,7 +90,7 @@ inline std::vector CreateBinary(bool addUnhandledProgramScopePatchToken, b kernelName.push_back('\0'); } iOpenCL::SKernelBinaryHeaderCommon kernBinHeader = {}; - kernBinHeader.CheckSum = 0; + kernBinHeader.CheckSum = 0U; kernBinHeader.ShaderHashCode = 0; kernBinHeader.KernelNameSize = static_cast(kernelName.size()); kernBinHeader.PatchListSize = 0; @@ -99,19 +100,21 @@ inline std::vector CreateBinary(bool addUnhandledProgramScopePatchToken, b kernBinHeader.SurfaceStateHeapSize = 0; kernBinHeader.KernelUnpaddedSize = 0; - if (false == addUnhandledKernelScopePatchToken) { - PushBackToken(ret, kernBinHeader); - ret.insert(ret.end(), kernelName.begin(), kernelName.end()); - } else { - kernBinHeader.PatchListSize = static_cast(sizeof(iOpenCL::SPatchItemHeader)); - PushBackToken(ret, kernBinHeader); - ret.insert(ret.end(), kernelName.begin(), kernelName.end()); - + auto headerOffset = ret.size(); + PushBackToken(ret, kernBinHeader); + ret.insert(ret.end(), kernelName.begin(), kernelName.end()); + uint32_t patchListSize = 0; + if (addUnhandledKernelScopePatchToken) { iOpenCL::SPatchItemHeader unhandledToken = {}; unhandledToken.Size = static_cast(sizeof(iOpenCL::SPatchItemHeader)); unhandledToken.Token = static_cast(unhandledTokenId); PushBackToken(ret, unhandledToken); + patchListSize = static_cast(sizeof(iOpenCL::SPatchItemHeader)); } + iOpenCL::SKernelBinaryHeaderCommon *kernHeader = reinterpret_cast(ret.data() + headerOffset); + kernHeader->PatchListSize = patchListSize; + auto kernelData = reinterpret_cast(kernHeader); + kernHeader->CheckSum = NEO::PatchTokenBinary::calcKernelChecksum(ArrayRef(kernelData, reinterpret_cast(&*ret.rbegin()) + 1)); } return ret; @@ -156,6 +159,6 @@ TEST(EvaluateUnhandledToken, WhenDecodingKernelBinaryIfUnhandledTokenIsFoundAndI TEST(EvaluateUnhandledToken, WhenDecodingKernelBinaryIfUnhandledTokenIsFoundAndIsUnsafeToSkipThenDecodingFails) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(false, true, unhandledTokenId), false, -7, lastUnhandledTokenFound); - EXPECT_EQ(CL_INVALID_KERNEL, retVal); + EXPECT_EQ(CL_INVALID_BINARY, retVal); EXPECT_EQ(unhandledTokenId, lastUnhandledTokenFound); } diff --git a/unit_tests/program/kernel_data.cpp b/unit_tests/program/kernel_data.cpp index 816681e9a9..3c2fefad23 100644 --- a/unit_tests/program/kernel_data.cpp +++ b/unit_tests/program/kernel_data.cpp @@ -525,9 +525,9 @@ TEST_P(DataParameterTest, DataParameterTests) { buildAndDecode(); - if (pKernelInfo->patchInfo.dataParameterBuffers.size() > 0) { - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(GetParam(), pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + if (pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size() > 0) { + EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Token); + EXPECT_EQ_VAL(GetParam(), pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Type); if (pKernelInfo->kernelArgInfo.size() == dataParameterToken.ArgumentNumber + 1) { if (GetParam() == DATA_PARAMETER_BUFFER_STATEFUL) { EXPECT_TRUE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess); @@ -553,7 +553,7 @@ TEST_F(KernelDataParameterTest, DataParameterTestsDataPatameterBufferOffset) { dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; - dataParameterToken.Offset = 0; + dataParameterToken.Offset = 128; dataParameterToken.SourceOffset = 8; pPatchList = &dataParameterToken; @@ -561,9 +561,9 @@ TEST_F(KernelDataParameterTest, DataParameterTestsDataPatameterBufferOffset) { buildAndDecode(); - ASSERT_EQ(1u, pKernelInfo->patchInfo.dataParameterBuffers.size()); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_BUFFER_OFFSET, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); + EXPECT_EQ_VAL(pKernelInfo->kernelArgInfo[1].offsetBufferOffset, dataParameterToken.Offset); } TEST_F(KernelDataParameterTest, givenDataParameterBufferStatefulWhenDecodingThenSetArgAsPureStateful) { @@ -578,10 +578,9 @@ TEST_F(KernelDataParameterTest, givenDataParameterBufferStatefulWhenDecodingThen buildAndDecode(); - ASSERT_EQ(1u, pKernelInfo->patchInfo.dataParameterBuffers.size()); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_BUFFER_STATEFUL, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); - EXPECT_TRUE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); + EXPECT_TRUE(pKernelInfo->kernelArgInfo[1].pureStatefulBufferAccess); } TEST_F(KernelDataParameterTest, givenUnknownDataParameterWhenDecodedThenParameterIsIgnored) { @@ -601,7 +600,7 @@ TEST_F(KernelDataParameterTest, givenUnknownDataParameterWhenDecodedThenParamete buildAndDecode(); - EXPECT_EQ_VAL(0u, pKernelInfo->patchInfo.dataParameterBuffers.size()); + EXPECT_EQ_VAL(0u, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); } TEST_F(KernelDataTest, DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES) { @@ -625,10 +624,10 @@ TEST_F(KernelDataTest, DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES) buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); - + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(alignment, pKernelInfo->kernelArgInfo[argumentNumber].slmAlignment); + ASSERT_EQ(1U, pKernelInfo->kernelArgInfo[argumentNumber].kernelArgPatchInfoVector.size()); EXPECT_EQ(offsetCrossThread, pKernelInfo->kernelArgInfo[argumentNumber].kernelArgPatchInfoVector[0].crossthreadOffset); } @@ -653,9 +652,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_WIDTH) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_IMAGE_WIDTH, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); - + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImgWidth, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgWidth); } @@ -680,8 +678,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_HEIGHT) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_IMAGE_HEIGHT, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImgHeight, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgHeight); } @@ -707,8 +705,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_DEPTH) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_IMAGE_DEPTH, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImgDepth, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgDepth); } @@ -734,8 +732,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_NUM_SAMPLES) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_IMAGE_NUM_SAMPLES, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetNumSamples, pKernelInfo->kernelArgInfo[argumentNumber].offsetNumSamples); } @@ -761,8 +759,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetNumMipLevels, pKernelInfo->kernelArgInfo[argumentNumber].offsetNumMipLevels); } @@ -788,8 +786,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_DATA_TYPE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetChannelDataType, pKernelInfo->kernelArgInfo[argumentNumber].offsetChannelDataType); } @@ -815,8 +813,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_CHANNEL_ORDER) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_IMAGE_CHANNEL_ORDER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetChannelOrder, pKernelInfo->kernelArgInfo[argumentNumber].offsetChannelOrder); } @@ -842,8 +840,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_ARRAY_SIZE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_IMAGE_ARRAY_SIZE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImageArraySize, pKernelInfo->kernelArgInfo[argumentNumber].offsetArraySize); } @@ -869,8 +867,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_WORK_DIMENSIONS) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_WORK_DIMENSIONS, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetWorkDim, pKernelInfo->workloadInfo.workDimOffset); } @@ -896,10 +894,9 @@ TEST_F(KernelDataTest, DATA_PARAMETER_SIMD_SIZE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_SIMD_SIZE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); - + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); + EXPECT_EQ(offsetSimdSize, pKernelInfo->workloadInfo.simdSizeOffset); } @@ -924,8 +921,7 @@ TEST_F(KernelDataTest, DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); } @@ -950,8 +946,7 @@ TEST_F(KernelDataTest, DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); } @@ -976,8 +971,7 @@ TEST_F(KernelDataTest, DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRES buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); } @@ -1002,8 +996,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_NUM_WORK_GROUPS) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_NUM_WORK_GROUPS, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetNumWorkGroups[argumentNumber], pKernelInfo->workloadInfo.numWorkGroupsOffset[argumentNumber]); } @@ -1029,8 +1023,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_MAX_WORKGROUP_SIZE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_MAX_WORKGROUP_SIZE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetMaxWorkGroupSize, pKernelInfo->workloadInfo.maxWorkGroupSizeOffset); } @@ -1057,10 +1051,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_SAMPLER_ADDRESS_MODE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_SAMPLER_ADDRESS_MODE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); - - ASSERT_EQ(1u, pKernelInfo->kernelArgInfo.size()); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(1U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[0].offsetSamplerAddressingMode); } @@ -1087,10 +1079,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); - - ASSERT_EQ(2u, pKernelInfo->kernelArgInfo.size()); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[1].offsetSamplerSnapWa); } @@ -1117,10 +1107,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); - - ASSERT_EQ(2u, pKernelInfo->kernelArgInfo.size()); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[1].offsetSamplerNormalizedCoords); } @@ -1157,8 +1145,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_KERNEL_ARGUMENT) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_KERNEL_ARGUMENT, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Token); + EXPECT_EQ_VAL(DATA_PARAMETER_KERNEL_ARGUMENT, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Type); ASSERT_EQ(1u, pKernelInfo->kernelArgInfo.size()); ASSERT_EQ(2u, pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.size()); @@ -1273,8 +1261,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_VME_MB_BLOCK_TYPE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_VME_MB_BLOCK_TYPE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeMbBlockType, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeMbBlockType); } @@ -1300,8 +1288,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_VME_SUBPIXEL_MODE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_VME_SUBPIXEL_MODE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeSubpixelMode, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSubpixelMode); } @@ -1327,8 +1315,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_VME_SAD_ADJUST_MODE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_VME_SAD_ADJUST_MODE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeSadAdjustMode, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSadAdjustMode); } @@ -1354,8 +1342,8 @@ TEST_F(KernelDataTest, DATA_PARAMETER_VME_SEARCH_PATH_TYPE) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffers[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_VME_SEARCH_PATH_TYPE, pKernelInfo->patchInfo.dataParameterBuffers[0]->Type); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeSearchPathType, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSearchPathType); } @@ -1372,6 +1360,8 @@ TEST_F(KernelDataTest, PATCH_TOKEN_STATE_SIP) { buildAndDecode(); + EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ_VAL(token.SystemKernelOffset, pKernelInfo->systemKernelOffset); } diff --git a/unit_tests/program/kernel_info_from_patchtokens_tests.cpp b/unit_tests/program/kernel_info_from_patchtokens_tests.cpp new file mode 100644 index 0000000000..93de9737f7 --- /dev/null +++ b/unit_tests/program/kernel_info_from_patchtokens_tests.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "runtime/compiler_interface/patchtokens_decoder.h" +#include "runtime/program/kernel_info.h" +#include "runtime/program/kernel_info_from_patchtokens.h" +#include "unit_tests/compiler_interface/patchtokens_tests.h" + +TEST(GetInlineData, GivenValidEmptyKernelFromPatchtokensThenReturnEmptyKernelInfo) { + std::vector storage; + auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); + NEO::KernelInfo dst = {}; + NEO::populateKernelInfo(dst, src); + + NEO::KernelInfo expectedKernelInfo = {}; + expectedKernelInfo.name = std::string(src.name.begin()).c_str(); + expectedKernelInfo.heapInfo.pKernelHeader = src.header; + expectedKernelInfo.isValid = true; + + EXPECT_STREQ(expectedKernelInfo.name.c_str(), dst.name.c_str()); + EXPECT_EQ(expectedKernelInfo.heapInfo.pKernelHeader, dst.heapInfo.pKernelHeader); + EXPECT_EQ(expectedKernelInfo.isValid, dst.isValid); +} diff --git a/unit_tests/program/process_elf_binary_tests.cpp b/unit_tests/program/process_elf_binary_tests.cpp index a9747a1025..1f5fecf4cc 100644 --- a/unit_tests/program/process_elf_binary_tests.cpp +++ b/unit_tests/program/process_elf_binary_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "core/elf/reader.h" #include "core/helpers/file_io.h" #include "core/helpers/string.h" #include "runtime/device/device.h" diff --git a/unit_tests/program/program_data_tests.cpp b/unit_tests/program/program_data_tests.cpp index d32a4cf3f8..4d44853e9d 100644 --- a/unit_tests/program/program_data_tests.cpp +++ b/unit_tests/program/program_data_tests.cpp @@ -68,12 +68,12 @@ class ProgramDataTestBase : public testing::Test, SPatchAllocateConstantMemorySurfaceProgramBinaryInfo allocateConstMemorySurface; allocateConstMemorySurface.Token = PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO; - allocateConstMemorySurface.Size = static_cast(sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo) + constSize); + allocateConstMemorySurface.Size = static_cast(sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo)); allocateConstMemorySurface.ConstantBufferIndex = 0; allocateConstMemorySurface.InlineDataSize = static_cast(constSize); - pAllocateConstMemorySurface.reset(new cl_char[allocateConstMemorySurface.Size]); + pAllocateConstMemorySurface.reset(new cl_char[allocateConstMemorySurface.Size + constSize]); memcpy_s(pAllocateConstMemorySurface.get(), sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo), @@ -83,7 +83,7 @@ class ProgramDataTestBase : public testing::Test, memcpy_s((cl_char *)pAllocateConstMemorySurface.get() + sizeof(allocateConstMemorySurface), constSize, constValue, constSize); pProgramPatchList = (void *)pAllocateConstMemorySurface.get(); - programPatchListSize = allocateConstMemorySurface.Size; + programPatchListSize = static_cast(allocateConstMemorySurface.Size + constSize); return constSize; } @@ -94,12 +94,12 @@ class ProgramDataTestBase : public testing::Test, SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo allocateGlobalMemorySurface; allocateGlobalMemorySurface.Token = PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO; - allocateGlobalMemorySurface.Size = static_cast(sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo) + globalSize); + allocateGlobalMemorySurface.Size = static_cast(sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); allocateGlobalMemorySurface.GlobalBufferIndex = 0; allocateGlobalMemorySurface.InlineDataSize = static_cast(globalSize); - pAllocateGlobalMemorySurface.reset(new cl_char[allocateGlobalMemorySurface.Size]); + pAllocateGlobalMemorySurface.reset(new cl_char[allocateGlobalMemorySurface.Size + globalSize]); memcpy_s(pAllocateGlobalMemorySurface.get(), sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo), @@ -109,7 +109,7 @@ class ProgramDataTestBase : public testing::Test, memcpy_s((cl_char *)pAllocateGlobalMemorySurface.get() + sizeof(allocateGlobalMemorySurface), globalSize, globalValue, globalSize); pProgramPatchList = pAllocateGlobalMemorySurface.get(); - programPatchListSize = allocateGlobalMemorySurface.Size; + programPatchListSize = static_cast(allocateGlobalMemorySurface.Size + globalSize); return globalSize; } std::unique_ptr pAllocateConstMemorySurface; @@ -376,11 +376,11 @@ TEST_F(ProgramDataTest, GlobalPointerProgramBinaryInfo) { // regular case - global surface exists SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo allocateGlobalMemorySurface; allocateGlobalMemorySurface.Token = PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO; - allocateGlobalMemorySurface.Size = static_cast(sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo) + globalPointerSize); + allocateGlobalMemorySurface.Size = static_cast(sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); allocateGlobalMemorySurface.GlobalBufferIndex = 0; allocateGlobalMemorySurface.InlineDataSize = static_cast(globalPointerSize); - cl_char *pAllocateGlobalMemorySurface = new cl_char[allocateGlobalMemorySurface.Size]; + cl_char *pAllocateGlobalMemorySurface = new cl_char[allocateGlobalMemorySurface.Size + globalPointerSize]; memcpy_s(pAllocateGlobalMemorySurface, sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo), @@ -389,7 +389,7 @@ TEST_F(ProgramDataTest, GlobalPointerProgramBinaryInfo) { memcpy_s((cl_char *)pAllocateGlobalMemorySurface + sizeof(allocateGlobalMemorySurface), globalPointerSize, &pGlobalPointerValue, globalPointerSize); pProgramPatchList = pAllocateGlobalMemorySurface; - programPatchListSize = allocateGlobalMemorySurface.Size; + programPatchListSize = static_cast(allocateGlobalMemorySurface.Size + globalPointerSize); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getGlobalSurface()); @@ -418,7 +418,7 @@ TEST_F(ProgramDataTest, GlobalPointerProgramBinaryInfo) { sizeof(SPatchGlobalPointerProgramBinaryInfo)); pProgramPatchList = (void *)pGlobalPointer; programPatchListSize = globalPointer.Size; - + this->allowDecodeFailure = true; buildAndDecodeProgramPatchList(); EXPECT_EQ(0, memcmp(&pGlobalPointerValue, globalSurface->getUnderlyingBuffer(), globalPointerSize)); @@ -441,7 +441,7 @@ TEST_F(ProgramDataTest, GlobalPointerProgramBinaryInfo) { sizeof(SPatchGlobalPointerProgramBinaryInfo)); pProgramPatchList = (void *)pGlobalPointer; programPatchListSize = globalPointer.Size; - + this->allowDecodeFailure = true; buildAndDecodeProgramPatchList(); EXPECT_EQ(0, memcmp(&pGlobalPointerValue, globalSurface->getUnderlyingBuffer(), globalPointerSize)); @@ -472,6 +472,8 @@ TEST_F(ProgramDataTest, GlobalPointerProgramBinaryInfo) { delete[] pGlobalPointer; // regular case - global pointer to global surface - all parameters valid + this->pProgram->skipValidationOfBinary = true; + this->allowDecodeFailure = false; globalPointer.Token = PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO; globalPointer.Size = sizeof(SPatchGlobalPointerProgramBinaryInfo); @@ -506,12 +508,12 @@ TEST_F(ProgramDataTest, Given32BitDeviceWhenGlobalMemorySurfaceIsPresentThenItHa SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo allocateGlobalMemorySurface; allocateGlobalMemorySurface.Token = PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO; - allocateGlobalMemorySurface.Size = static_cast(sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo) + globalSize); + allocateGlobalMemorySurface.Size = static_cast(sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); allocateGlobalMemorySurface.GlobalBufferIndex = 0; allocateGlobalMemorySurface.InlineDataSize = static_cast(globalSize); - cl_char *pAllocateGlobalMemorySurface = new cl_char[allocateGlobalMemorySurface.Size]; + cl_char *pAllocateGlobalMemorySurface = new cl_char[allocateGlobalMemorySurface.Size + globalSize]; memcpy_s(pAllocateGlobalMemorySurface, sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo), @@ -521,7 +523,7 @@ TEST_F(ProgramDataTest, Given32BitDeviceWhenGlobalMemorySurfaceIsPresentThenItHa memcpy_s((cl_char *)pAllocateGlobalMemorySurface + sizeof(allocateGlobalMemorySurface), globalSize, globalValue, globalSize); pProgramPatchList = (void *)pAllocateGlobalMemorySurface; - programPatchListSize = allocateGlobalMemorySurface.Size; + programPatchListSize = static_cast(allocateGlobalMemorySurface.Size + globalSize); buildAndDecodeProgramPatchList(); @@ -569,12 +571,12 @@ TEST_F(ProgramDataTest, ConstantPointerProgramBinaryInfo) { SPatchAllocateConstantMemorySurfaceProgramBinaryInfo allocateConstMemorySurface; allocateConstMemorySurface.Token = PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO; // note : + sizeof(uint64_t) is to accomodate for constant buffer offset - allocateConstMemorySurface.Size = static_cast(sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo) + constantDataLen + sizeof(uint64_t)); + allocateConstMemorySurface.Size = static_cast(sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo)); allocateConstMemorySurface.ConstantBufferIndex = 0; allocateConstMemorySurface.InlineDataSize = static_cast(constantDataLen + sizeof(uint64_t)); - auto pAllocateConstMemorySurface = std::unique_ptr(new char[allocateConstMemorySurface.Size]); + auto pAllocateConstMemorySurface = std::unique_ptr(new char[allocateConstMemorySurface.Size + allocateConstMemorySurface.InlineDataSize]); // copy the token header memcpy_s(pAllocateConstMemorySurface.get(), @@ -590,7 +592,7 @@ TEST_F(ProgramDataTest, ConstantPointerProgramBinaryInfo) { *(uint64_t *)((char *)pAllocateConstMemorySurface.get() + sizeof(allocateConstMemorySurface) + constantBufferOffsetPatchOffset) = 0U; pProgramPatchList = (void *)pAllocateConstMemorySurface.get(); - programPatchListSize = allocateConstMemorySurface.Size; + programPatchListSize = allocateConstMemorySurface.Size + allocateConstMemorySurface.InlineDataSize; buildAndDecodeProgramPatchList(); @@ -630,6 +632,7 @@ TEST_F(ProgramDataTest, ConstantPointerProgramBinaryInfo) { pProgramPatchList = (void *)pConstantPointer; programPatchListSize = constantPointer.Size; + this->allowDecodeFailure = true; buildAndDecodeProgramPatchList(); EXPECT_EQ(0, memcmp(pConstantData, constantSurface->getUnderlyingBuffer(), constantDataLen)); // check that constant pointer offset was not patched @@ -704,7 +707,8 @@ TEST_F(ProgramDataTest, ConstantPointerProgramBinaryInfo) { sizeof(SPatchConstantPointerProgramBinaryInfo)); pProgramPatchList = (void *)pConstantPointer; programPatchListSize = constantPointer.Size; - + this->pProgram->skipValidationOfBinary = true; + this->allowDecodeFailure = false; buildAndDecodeProgramPatchList(); EXPECT_EQ(0, memcmp(pConstantData, constantSurface->getUnderlyingBuffer(), constantDataLen)); @@ -748,6 +752,7 @@ TEST_F(ProgramDataTest, GivenProgramWith32bitPointerOptWhenProgramScopeConstantB uint32_t sentinel = 0x17192329U; constantSurfaceStorage[0] = 0U; constantSurfaceStorage[1] = sentinel; + this->pProgram->skipValidationOfBinary = true; buildAndDecodeProgramPatchList(); uint32_t expectedAddr = static_cast(constantSurface.getGraphicsAllocation()->getGpuAddressToPatch()); EXPECT_EQ(expectedAddr, constantSurfaceStorage[0]); @@ -790,6 +795,7 @@ TEST_F(ProgramDataTest, GivenProgramWith32bitPointerOptWhenProgramScopeGlobalPoi uint32_t sentinel = 0x17192329U; globalSurfaceStorage[0] = 0U; globalSurfaceStorage[1] = sentinel; + this->pProgram->skipValidationOfBinary = true; buildAndDecodeProgramPatchList(); uint32_t expectedAddr = static_cast(globalSurface.getGraphicsAllocation()->getGpuAddressToPatch()); EXPECT_EQ(expectedAddr, globalSurfaceStorage[0]); diff --git a/unit_tests/program/program_tests.cpp b/unit_tests/program/program_tests.cpp index b0a3389ea9..5b4a575b4c 100644 --- a/unit_tests/program/program_tests.cpp +++ b/unit_tests/program/program_tests.cpp @@ -18,6 +18,7 @@ #include "core/unit_tests/utilities/base_object_utils.h" #include "runtime/command_stream/command_stream_receiver_hw.h" #include "runtime/compiler_interface/compiler_options.h" +#include "runtime/compiler_interface/patchtokens_decoder.h" #include "runtime/gmm_helper/gmm_helper.h" #include "runtime/helpers/hardware_commands_helper.h" #include "runtime/indirect_heap/indirect_heap.h" @@ -1564,7 +1565,6 @@ TEST_F(ProgramPatchTokenTests, DISABLED_ConstantMemorySurface) { false); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(0u, pProgram->getProgramScopePatchListSize()); } //////////////////////////////////////////////////////////////////////////////// @@ -1882,12 +1882,6 @@ TEST_F(ProgramTests, givenStatelessToStatefullOptimizationOffWHenProgramIsCreate EXPECT_THAT(pProgram->getInternalOptions(), Not(testing::HasSubstr(std::string("-cl-intel-has-buffer-offset-arg ")))); } -TEST_F(ProgramTests, ProgramCtorSetsProperProgramScopePatchListSize) { - - MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false); - EXPECT_EQ((size_t)0, program.getProgramScopePatchListSize()); -} - TEST_F(ProgramTests, GivenContextWhenCreateProgramThenIncrementContextRefCount) { auto initialApiRefCount = pContext->getReference(); auto initialInternalRefCount = pContext->getRefInternalCount(); @@ -2062,7 +2056,7 @@ TEST_F(ProgramTests, ProgramFromGenBinaryWithPATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KE pKHdr->CheckSum = 0; pKHdr->ShaderHashCode = 0; pKHdr->KernelNameSize = 8; - pKHdr->PatchListSize = 24; + pKHdr->PatchListSize = sizeof(iOpenCL::SPatchGlobalMemoryObjectKernelArgument); pKHdr->KernelHeapSize = 0; pKHdr->GeneralStateHeapSize = 0; pKHdr->DynamicStateHeapSize = 0; @@ -2082,7 +2076,11 @@ TEST_F(ProgramTests, ProgramFromGenBinaryWithPATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KE pPatch->Offset = 0x40; pPatch->LocationIndex = iOpenCL::INVALID_INDEX; pPatch->LocationIndex2 = iOpenCL::INVALID_INDEX; - binSize += sizeof(SPatchGlobalMemoryObjectKernelArgument); + binSize += pPatch->Size; + pBin += pPatch->Size; + + ArrayRef kernelBlob(reinterpret_cast(pKHdr), reinterpret_cast(pBin)); + pKHdr->CheckSum = PatchTokenBinary::calcKernelChecksum(kernelBlob); // Decode prepared program binary pProgram->genBinary = makeCopy(&genBin[0], binSize); @@ -2147,6 +2145,8 @@ TEST_F(ProgramTests, givenProgramFromGenBinaryWhenSLMSizeIsBiggerThenDeviceLimit pPatch->TotalInlineLocalMemorySize = static_cast(pDevice->getDeviceInfo().localMemSize * 2); binSize += sizeof(SPatchAllocateLocalSurface); + pBin += sizeof(SPatchAllocateLocalSurface); + pKHdr->CheckSum = PatchTokenBinary::calcKernelChecksum(ArrayRef(reinterpret_cast(pKHdr), reinterpret_cast(pBin))); // Decode prepared program binary program->genBinary = makeCopy(&genBin[0], binSize); @@ -2186,11 +2186,12 @@ TEST_F(ProgramTests, ProgramFromGenBinaryWithPATCH_TOKEN_GTPIN_FREE_GRF_INFO) { pBin += sizeof(SProgramBinaryHeader); binSize += sizeof(SProgramBinaryHeader); + uint32_t patchTokenSize = sizeof(iOpenCL::SPatchGtpinFreeGRFInfo) + GRF_INFO_SIZE; SKernelBinaryHeaderCommon *pKHdr = (SKernelBinaryHeaderCommon *)pBin; pKHdr->CheckSum = 0; pKHdr->ShaderHashCode = 0; pKHdr->KernelNameSize = 8; - pKHdr->PatchListSize = 24; + pKHdr->PatchListSize = patchTokenSize; pKHdr->KernelHeapSize = 0; pKHdr->GeneralStateHeapSize = 0; pKHdr->DynamicStateHeapSize = 0; @@ -2205,9 +2206,12 @@ TEST_F(ProgramTests, ProgramFromGenBinaryWithPATCH_TOKEN_GTPIN_FREE_GRF_INFO) { SPatchGtpinFreeGRFInfo *pPatch = (SPatchGtpinFreeGRFInfo *)pBin; pPatch->Token = iOpenCL::PATCH_TOKEN_GTPIN_FREE_GRF_INFO; - pPatch->Size = sizeof(iOpenCL::SPatchGtpinFreeGRFInfo) + GRF_INFO_SIZE; + pPatch->Size = patchTokenSize; pPatch->BufferSize = GRF_INFO_SIZE; + binSize += pPatch->Size; + pBin += pPatch->Size; + pKHdr->CheckSum = PatchTokenBinary::calcKernelChecksum(ArrayRef(reinterpret_cast(pKHdr), reinterpret_cast(pBin))); // Decode prepared program binary pProgram->genBinary = makeCopy(&genBin[0], binSize); @@ -2423,32 +2427,6 @@ TEST_F(ProgramTests, RebuildBinaryWithProcessGenBinaryError) { EXPECT_EQ(CL_INVALID_BINARY, retVal); } -TEST_F(ProgramTests, GetProgramCompilerVersion) { - auto program = std::make_unique(*pDevice->getExecutionEnvironment()); - - // Create example header of OpenCL Program Binary - cl_device_id deviceId = pContext->getDevice(0); - Device *pDevice = castToObject(deviceId); - struct SProgramBinaryHeader prgHdr; - prgHdr.Magic = iOpenCL::MAGIC_CL; - prgHdr.Version = 12; - prgHdr.Device = pDevice->getHardwareInfo().platform.eRenderCoreFamily; - prgHdr.GPUPointerSizeInBytes = 8; - prgHdr.NumberOfKernels = 1; - prgHdr.SteppingId = 0; - prgHdr.PatchListSize = 0; - - // Check whether Program Binary version is returned correctly - uint32_t binaryVersion = 0; - program->getProgramCompilerVersion(&prgHdr, binaryVersion); - EXPECT_EQ(binaryVersion, 12u); - - // Check whether Program Binary version is left intact - binaryVersion = 1; - program->getProgramCompilerVersion(nullptr, binaryVersion); - EXPECT_EQ(binaryVersion, 1u); -} - TEST_F(ProgramTests, GivenZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfacesCalledThenNoSurfaceIsCreated) { MockProgram *program = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false); @@ -2465,7 +2443,7 @@ TEST_F(ProgramTests, GivenZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfaces privateSurfaceBlock->PerThreadPrivateMemorySize = 0; infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; - program->addBlockKernel(infoBlock); + program->blockKernelManager->addBlockKernelInfo(infoBlock); program->allocateBlockPrivateSurfaces(pDevice->getRootDeviceIndex()); @@ -2491,7 +2469,7 @@ TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfa privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; - program->addBlockKernel(infoBlock); + program->blockKernelManager->addBlockKernelInfo(infoBlock); program->allocateBlockPrivateSurfaces(pDevice->getRootDeviceIndex()); @@ -2517,7 +2495,7 @@ TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfa privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; - program->addBlockKernel(infoBlock); + program->blockKernelManager->addBlockKernelInfo(infoBlock); program->allocateBlockPrivateSurfaces(pDevice->getRootDeviceIndex()); @@ -2551,7 +2529,7 @@ TEST_F(ProgramTests, givenProgramWithBlockKernelsWhenfreeBlockResourcesisCalledT privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; - program->addBlockKernel(infoBlock); + program->blockKernelManager->addBlockKernelInfo(infoBlock); GraphicsAllocation *privateSurface = program->getDevice(0).getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize}); EXPECT_NE(nullptr, privateSurface); diff --git a/unit_tests/program/program_with_block_kernels_tests.cpp b/unit_tests/program/program_with_block_kernels_tests.cpp index 9d58d898de..66ada7906e 100644 --- a/unit_tests/program/program_with_block_kernels_tests.cpp +++ b/unit_tests/program/program_with_block_kernels_tests.cpp @@ -7,6 +7,7 @@ #include "core/compiler_interface/compiler_interface.h" #include "runtime/device/device.h" +#include "runtime/program/block_kernel_manager.h" #include "unit_tests/fixtures/context_fixture.h" #include "unit_tests/fixtures/platform_fixture.h" #include "unit_tests/fixtures/program_fixture.h" @@ -67,16 +68,16 @@ TEST_F(ProgramWithBlockKernelsTest, GivenKernelWithBlockKernelsWhenProgramIsBuil auto blockKernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel_dispatch_0"); EXPECT_EQ(nullptr, blockKernelInfo); - std::vector blockKernelInfos(mockProgram->getNumberOfBlocks()); + std::vector blockKernelInfos(mockProgram->blockKernelManager->getCount()); - for (size_t i = 0; i < mockProgram->getNumberOfBlocks(); i++) { - const KernelInfo *blockKernelInfo = mockProgram->getBlockKernelInfo(i); + for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) { + const KernelInfo *blockKernelInfo = mockProgram->blockKernelManager->getBlockKernelInfo(i); EXPECT_NE(nullptr, blockKernelInfo); blockKernelInfos[i] = blockKernelInfo; } bool blockKernelFound = false; - for (size_t i = 0; i < mockProgram->getNumberOfBlocks(); i++) { + for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) { if (blockKernelInfos[i]->name.find("simple_block_kernel_dispatch") != std::string::npos) { blockKernelFound = true; break;