diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index e84b3a1dd3..34288d2418 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -27,6 +27,8 @@ namespace NEO { +bool useKernelDescriptor = false; + struct KernelArgumentType { const char *argTypeQualifier; uint64_t argTypeQualifierValue; diff --git a/opencl/source/program/kernel_info.h b/opencl/source/program/kernel_info.h index cf8edf6b6d..7398335440 100644 --- a/opencl/source/program/kernel_info.h +++ b/opencl/source/program/kernel_info.h @@ -39,6 +39,8 @@ struct KernelArgumentType; class GraphicsAllocation; class MemoryManager; +extern bool useKernelDescriptor; + extern std::map typeSizeMap; struct WorkloadInfo { @@ -212,6 +214,8 @@ struct KernelInfo { DebugData debugData; bool computeMode = false; const gtpin::igc_info_t *igcInfoForGtpin = nullptr; + + KernelDescriptor kernelDescriptor; }; std::string concatenateKernelNames(ArrayRef kernelInfos); diff --git a/opencl/source/program/kernel_info_from_patchtokens.cpp b/opencl/source/program/kernel_info_from_patchtokens.cpp index 0164cdb5ff..9bc1108f5f 100644 --- a/opencl/source/program/kernel_info_from_patchtokens.cpp +++ b/opencl/source/program/kernel_info_from_patchtokens.cpp @@ -8,6 +8,7 @@ #include "opencl/source/program/kernel_info_from_patchtokens.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" +#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h" #include "opencl/source/program/kernel_info.h" @@ -221,6 +222,10 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch dst.crossThreadData = new char[crossThreadDataSize]; memset(dst.crossThreadData, 0x00, crossThreadDataSize); } + + if (useKernelDescriptor) { + populateKernelDescriptor(dst.kernelDescriptor, src, gpuPointerSizeInBytes); + } } } // namespace NEO diff --git a/opencl/test/unit_test/program/kernel_info_from_patchtokens_tests.cpp b/opencl/test/unit_test/program/kernel_info_from_patchtokens_tests.cpp index 7f857da0c5..4e01113361 100644 --- a/opencl/test/unit_test/program/kernel_info_from_patchtokens_tests.cpp +++ b/opencl/test/unit_test/program/kernel_info_from_patchtokens_tests.cpp @@ -222,3 +222,39 @@ TEST(KernelInfoFromPatchTokens, GivenKernelWithGlobalObjectArgThenKernelInfoIsPr EXPECT_EQ(0U, kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size); EXPECT_EQ(globalMemArg.Offset, kernelInfo.kernelArgInfo[1].offsetHeap); } + +TEST(KernelInfoFromPatchTokens, GivenDefaultModeThenKernelDescriptorIsNotBeingPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchGlobalMemoryObjectKernelArgument globalMemArg = {}; + globalMemArg.Token = iOpenCL::PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; + globalMemArg.Size = sizeof(iOpenCL::SPatchGlobalMemoryObjectKernelArgument); + globalMemArg.ArgumentNumber = 1; + globalMemArg.Offset = 0x40; + + kernelTokens.tokens.kernelArgs.resize(2); + kernelTokens.tokens.kernelArgs[1].objectArg = &globalMemArg; + NEO::KernelInfo kernelInfo = {}; + NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); + EXPECT_TRUE(kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.empty()); +} + +TEST(KernelInfoFromPatchTokens, WhenUseKernelDescriptorIsEnabledThenKernelDescriptorIsBeingPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchGlobalMemoryObjectKernelArgument globalMemArg = {}; + globalMemArg.Token = iOpenCL::PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; + globalMemArg.Size = sizeof(iOpenCL::SPatchGlobalMemoryObjectKernelArgument); + globalMemArg.ArgumentNumber = 1; + globalMemArg.Offset = 0x40; + + kernelTokens.tokens.kernelArgs.resize(2); + kernelTokens.tokens.kernelArgs[1].objectArg = &globalMemArg; + NEO::KernelInfo kernelInfo = {}; + NEO::useKernelDescriptor = true; + NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); + NEO::useKernelDescriptor = false; + EXPECT_FALSE(kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.empty()); +} diff --git a/shared/source/kernel/CMakeLists.txt b/shared/source/kernel/CMakeLists.txt index b2e0a39693..96e24b9d3d 100644 --- a/shared/source/kernel/CMakeLists.txt +++ b/shared/source/kernel/CMakeLists.txt @@ -10,9 +10,13 @@ set(NEO_CORE_KERNEL ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_kernel_encoder_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/grf_config.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor.h + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_device_side_enqueue.h + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_vme.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_metadata.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor.h + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.h ) set_property(GLOBAL PROPERTY NEO_CORE_KERNEL ${NEO_CORE_KERNEL}) diff --git a/shared/source/kernel/kernel_arg_descriptor.h b/shared/source/kernel/kernel_arg_descriptor.h index 23a0d783f7..a1a4a255b4 100644 --- a/shared/source/kernel/kernel_arg_descriptor.h +++ b/shared/source/kernel/kernel_arg_descriptor.h @@ -126,6 +126,9 @@ struct ArgDescriptor final { } ArgDescriptor &operator=(const ArgDescriptor &rhs); + ArgDescriptor(const ArgDescriptor &rhs) { + *this = rhs; + } template const T &as() const; diff --git a/shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h b/shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h new file mode 100644 index 0000000000..a555c7a680 --- /dev/null +++ b/shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/kernel/kernel_arg_descriptor.h" + +namespace NEO { + +struct ArgDescriptorDeviceSideEnqueue : ArgDescriptorExtended { + CrossThreadDataOffset objectId = undefined; +}; + +} // namespace NEO diff --git a/shared/source/kernel/kernel_arg_descriptor_extended_vme.h b/shared/source/kernel/kernel_arg_descriptor_extended_vme.h new file mode 100644 index 0000000000..6e0ad6cf73 --- /dev/null +++ b/shared/source/kernel/kernel_arg_descriptor_extended_vme.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/kernel/kernel_arg_descriptor.h" + +namespace NEO { + +struct ArgDescVme : ArgDescriptorExtended { + CrossThreadDataOffset mbBlockType = undefined; + CrossThreadDataOffset subpixelMode = undefined; + CrossThreadDataOffset sadAdjustMode = undefined; + CrossThreadDataOffset searchPathType = undefined; +}; + +} // namespace NEO diff --git a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp new file mode 100644 index 0000000000..19c7a8a5d8 --- /dev/null +++ b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp @@ -0,0 +1,498 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h" + +#include "shared/source/device_binary_format/patchtokens_decoder.h" +#include "shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h" +#include "shared/source/kernel/kernel_arg_descriptor_extended_vme.h" +#include "shared/source/kernel/kernel_descriptor.h" + +#include +#include + +namespace NEO { + +using namespace iOpenCL; + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnvironment &execEnv) { + if (execEnv.RequiredWorkGroupSizeX != 0) { + dst.kernelAttributes.requiredWorkgroupSize[0] = execEnv.RequiredWorkGroupSizeX; + dst.kernelAttributes.requiredWorkgroupSize[1] = execEnv.RequiredWorkGroupSizeY; + dst.kernelAttributes.requiredWorkgroupSize[2] = execEnv.RequiredWorkGroupSizeZ; + DEBUG_BREAK_IF(!(execEnv.RequiredWorkGroupSizeY > 0)); + DEBUG_BREAK_IF(!(execEnv.RequiredWorkGroupSizeZ > 0)); + } + if (execEnv.WorkgroupWalkOrderDims) { + constexpr auto dimensionMask = 0b11; + constexpr auto dimensionSize = 2; + dst.kernelAttributes.workgroupWalkOrder[0] = execEnv.WorkgroupWalkOrderDims & dimensionMask; + dst.kernelAttributes.workgroupWalkOrder[1] = (execEnv.WorkgroupWalkOrderDims >> dimensionSize) & dimensionMask; + dst.kernelAttributes.workgroupWalkOrder[2] = (execEnv.WorkgroupWalkOrderDims >> dimensionSize * 2) & dimensionMask; + dst.kernelAttributes.flags.requiresWorkgroupWalkOrder = true; + } + + for (uint32_t i = 0; i < 3; ++i) { + // inverts the walk order mapping (from ORDER_ID->DIM_ID to DIM_ID->ORDER_ID) + dst.kernelAttributes.workgroupDimensionsOrder[dst.kernelAttributes.workgroupWalkOrder[i]] = i; + } + + if (execEnv.CompiledForGreaterThan4GBBuffers) { + dst.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; + } else { + dst.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindfulAndStateless; + } + dst.kernelAttributes.simdSize = execEnv.LargestCompiledSIMDSize; + dst.kernelAttributes.flags.usesDeviceSideEnqueue = (0 != execEnv.HasDeviceEnqueue); + dst.kernelAttributes.flags.usesBarriers = (0 != execEnv.HasBarriers); + dst.kernelAttributes.flags.requiresDisabledMidThreadPreemption = (0 != execEnv.DisableMidThreadPreemption); + dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber; + dst.kernelAttributes.flags.usesFencesForReadWriteImages = (0 != execEnv.UsesFencesForReadWriteImages); + dst.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress = (0 != execEnv.SubgroupIndependentForwardProgressRequired); + dst.kernelAttributes.numGrfRequired = execEnv.NumGRFRequired; + dst.kernelAttributes.flags.useGlobalAtomics = execEnv.HasGlobalAtomics; + dst.kernelAttributes.flags.usesStatelessWrites = 0U; +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token) { + dst.payloadMappings.samplerTable.borderColor = token.BorderColorOffset; + dst.payloadMappings.samplerTable.numSamplers = token.Count; + dst.payloadMappings.samplerTable.tableOffset = token.Offset; +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchBindingTableState &token) { + dst.payloadMappings.bindingTable.numEntries = token.Count; + dst.payloadMappings.bindingTable.tableOffset = token.Offset; +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateLocalSurface &token) { + dst.kernelAttributes.slmInlineSize = token.TotalInlineLocalMemorySize; +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchMediaVFEState &token, uint32_t slot) { + UNRECOVERABLE_IF(slot >= 2U); + dst.kernelAttributes.perThreadScratchSize[slot] = token.PerThreadScratchSpace; +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchInterfaceDescriptorData &token) { + dst.kernelMetadata.deviceSideEnqueueBlockInterfaceDescriptorOffset = token.Offset; +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchThreadPayload &token) { + dst.kernelAttributes.flags.perThreadDataHeaderIsPresent = (0U != token.HeaderPresent); + dst.kernelAttributes.numLocalIdChannels = token.LocalIDXPresent + token.LocalIDYPresent + token.LocalIDZPresent; + ; + dst.kernelAttributes.flags.usesFlattenedLocalIds = (0U != token.LocalIDFlattenedPresent); + dst.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent = (0U != token.UnusedPerThreadConstantPresent); + dst.kernelAttributes.flags.passInlineData = (0 != token.PassInlineData); + dst.entryPoints.skipPerThreadDataLoad = token.OffsetToSkipPerThreadDataLoad; + dst.entryPoints.skipSetFFIDGP = token.OffsetToSkipSetFFIDGP; +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchDataParameterStream &token) { + dst.kernelAttributes.crossThreadDataSize = token.DataParameterStreamSize; +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchKernelAttributesInfo &token) { + constexpr ConstStringRef attributeReqdSubGroupSizeBeg = "intel_reqd_sub_group_size("; + std::string attributes = std::string(reinterpret_cast(&token + 1), token.AttributesSize).c_str(); + dst.kernelMetadata.kernelLanguageAttributes = attributes; + auto it = attributes.find(attributeReqdSubGroupSizeBeg.begin()); + if (it != std::string::npos) { + it += attributeReqdSubGroupSizeBeg.size(); + dst.kernelMetadata.requiredSubGroupSize = 0U; + while ((attributes[it] >= '0') & (attributes[it] <= '9')) { + dst.kernelMetadata.requiredSubGroupSize *= 10; + dst.kernelMetadata.requiredSubGroupSize += attributes[it] - '0'; + ++it; + } + } +} + +void populatePointerKernelArg(ArgDescPointer &dst, + CrossThreadDataOffset stateless, uint8_t pointerSize, SurfaceStateHeapOffset bindful, CrossThreadDataOffset bindless, + KernelDescriptor::AddressingMode addressingMode) { + switch (addressingMode) { + default: + UNRECOVERABLE_IF(KernelDescriptor::Stateless != addressingMode); + dst.bindful = undefined; + dst.stateless = stateless; + dst.bindless = undefined; + dst.pointerSize = pointerSize; + break; + case KernelDescriptor::BindfulAndStateless: + dst.bindful = bindful; + dst.stateless = stateless; + dst.bindless = undefined; + dst.pointerSize = pointerSize; + break; + } +} + +template +void populatePointerKernelArg(ArgDescPointer &dst, const TokenT &src, KernelDescriptor::AddressingMode addressingMode) { + populatePointerKernelArg(dst, src.DataParamOffset, src.DataParamSize, src.SurfaceStateHeapOffset, undefined, addressingMode); +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessPrivateSurface &token) { + dst.kernelAttributes.flags.usesPrivateMemory = true; + dst.kernelAttributes.perThreadPrivateMemorySize = token.PerThreadPrivateMemorySize; + populatePointerKernelArg(dst.payloadMappings.implicitArgs.privateMemoryAddress, token, dst.kernelAttributes.bufferAddressingMode); +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization &token) { + populatePointerKernelArg(dst.payloadMappings.implicitArgs.globalConstantsSurfaceAddress, token, dst.kernelAttributes.bufferAddressingMode); +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization &token) { + populatePointerKernelArg(dst.payloadMappings.implicitArgs.globalVariablesSurfaceAddress, token, dst.kernelAttributes.bufferAddressingMode); +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessPrintfSurface &token) { + dst.kernelAttributes.flags.usesPrintf = true; + populatePointerKernelArg(dst.payloadMappings.implicitArgs.printfSurfaceAddress, token, dst.kernelAttributes.bufferAddressingMode); +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessEventPoolSurface &token) { + populatePointerKernelArg(dst.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress, token, dst.kernelAttributes.bufferAddressingMode); +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessDefaultDeviceQueueSurface &token) { + populatePointerKernelArg(dst.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress, token, dst.kernelAttributes.bufferAddressingMode); +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateSystemThreadSurface &token) { + dst.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = token.Offset; + dst.kernelAttributes.perThreadSystemThreadSurfaceSize = token.PerThreadSystemThreadSurfaceSize; +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateSyncBuffer &token) { + dst.kernelAttributes.flags.usesSyncBuffer = true; + populatePointerKernelArg(dst.payloadMappings.implicitArgs.syncBufferAddress, token, dst.kernelAttributes.bufferAddressingMode); +} + +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchString &token) { + uint32_t stringIndex = token.Index; + const char *stringData = reinterpret_cast(&token + 1); + dst.kernelMetadata.printfStringsMap[stringIndex].assign(stringData, stringData + token.StringSize); +} + +template +inline void populateKernelDescriptorIfNotNull(KernelDescriptor &dst, const TokenT *token, ArgsT &&... args) { + if (token != nullptr) { + populateKernelDescriptor(dst, *token, std::forward(args)...); + } +} + +void markArgAsPatchable(KernelDescriptor &parent, size_t dstArgNum) { + auto &argExtendedTypeInfo = parent.payloadMappings.explicitArgs[dstArgNum].getExtendedTypeInfo(); + if (false == argExtendedTypeInfo.needsPatch) { + argExtendedTypeInfo.needsPatch = true; + ++parent.kernelAttributes.numArgsToPatch; + } +} + +void populateKernelArgDescriptor(KernelDescriptor &dst, size_t argNum, const SPatchImageMemoryObjectKernelArgument &token) { + markArgAsPatchable(dst, argNum); + + auto &argImage = dst.payloadMappings.explicitArgs[argNum].as(true); + UNRECOVERABLE_IF(KernelDescriptor::Bindful != dst.kernelAttributes.imageAddressingMode); + argImage.bindful = token.Offset; + + if (token.Type == iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA) { + dst.payloadMappings.explicitArgs[argNum].getExtendedTypeInfo().isMediaImage = true; + } + + if (token.Type == iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA_BLOCK) { + dst.payloadMappings.explicitArgs[argNum].getExtendedTypeInfo().isMediaBlockImage = true; + } + + dst.payloadMappings.explicitArgs[argNum].getExtendedTypeInfo().isTransformable = token.Transformable != 0; + if (NEO::KernelArgMetadata::AccessUnknown == dst.payloadMappings.explicitArgs[argNum].getTraits().accessQualifier) { + auto accessQual = token.Writeable ? NEO::KernelArgMetadata::AccessReadWrite + : NEO::KernelArgMetadata::AccessReadOnly; + dst.payloadMappings.explicitArgs[argNum].getTraits().accessQualifier = accessQual; + } +} + +void populateKernelArgDescriptor(KernelDescriptor &dst, size_t argNum, const SPatchSamplerKernelArgument &token) { + markArgAsPatchable(dst, argNum); + + auto &argSampler = dst.payloadMappings.explicitArgs[argNum].as(true); + + argSampler.bindful = token.Offset; + argSampler.samplerType = token.Type; + + if (token.Type != iOpenCL::SAMPLER_OBJECT_TEXTURE) { + DEBUG_BREAK_IF(token.Type != iOpenCL::SAMPLER_OBJECT_VME && + token.Type != iOpenCL::SAMPLER_OBJECT_VE && + token.Type != iOpenCL::SAMPLER_OBJECT_VD); + dst.payloadMappings.explicitArgs[argNum].getExtendedTypeInfo().isAccelerator = true; + dst.kernelAttributes.flags.usesVme |= (token.Type == iOpenCL::SAMPLER_OBJECT_VME); + } +} + +void populateKernelArgDescriptor(KernelDescriptor &dst, size_t argNum, const SPatchGlobalMemoryObjectKernelArgument &token) { + markArgAsPatchable(dst, argNum); + + auto &argPointer = dst.payloadMappings.explicitArgs[argNum].as(true); + dst.payloadMappings.explicitArgs[argNum].getTraits().addressQualifier = KernelArgMetadata::AddrGlobal; + + argPointer.bindful = token.Offset; + argPointer.stateless = undefined; + argPointer.bindless = undefined; + argPointer.pointerSize = dst.kernelAttributes.gpuPointerSize; +} + +void populateKernelArgDescriptor(KernelDescriptor &dst, size_t argNum, const SPatchStatelessGlobalMemoryObjectKernelArgument &token) { + markArgAsPatchable(dst, argNum); + + auto &argPointer = dst.payloadMappings.explicitArgs[argNum].as(true); + dst.payloadMappings.explicitArgs[argNum].getTraits().addressQualifier = KernelArgMetadata::AddrGlobal; + + populatePointerKernelArg(argPointer, token, dst.kernelAttributes.bufferAddressingMode); +} + +void populateKernelArgDescriptor(KernelDescriptor &dst, size_t argNum, const SPatchStatelessConstantMemoryObjectKernelArgument &token) { + markArgAsPatchable(dst, argNum); + + auto &argPointer = dst.payloadMappings.explicitArgs[argNum].as(true); + dst.payloadMappings.explicitArgs[argNum].getTraits().addressQualifier = KernelArgMetadata::AddrConstant; + + populatePointerKernelArg(argPointer, token, dst.kernelAttributes.bufferAddressingMode); +} + +void populateKernelArgDescriptor(KernelDescriptor &dst, size_t argNum, const SPatchStatelessDeviceQueueKernelArgument &token) { + markArgAsPatchable(dst, argNum); + + auto &argPointer = dst.payloadMappings.explicitArgs[argNum].as(true); + dst.payloadMappings.explicitArgs[argNum].getTraits().addressQualifier = KernelArgMetadata::AddrGlobal; + + dst.payloadMappings.explicitArgs[argNum].getExtendedTypeInfo().isDeviceQueue = true; + + populatePointerKernelArg(argPointer, token, dst.kernelAttributes.bufferAddressingMode); +} + +void populateKernelArgDescriptor(KernelDescriptor &dst, size_t argNum, const SPatchDataParameterBuffer &token) { + markArgAsPatchable(dst, argNum); + + ArgDescValue::Element newElement = {}; + newElement.size = token.DataSize; + newElement.offset = token.Offset; + newElement.sourceOffset = token.SourceOffset; + + dst.payloadMappings.explicitArgs[argNum].as(true).elements.push_back(newElement); + + if (token.Type == DATA_PARAMETER_KERNEL_ARGUMENT) { + dst.kernelMetadata.allByValueKernelArguments.push_back({newElement, static_cast(argNum)}); + } +} + +inline CrossThreadDataOffset getOffset(const SPatchDataParameterBuffer *token) { + if (token != nullptr) { + return static_cast(token->Offset); + } + return undefined; +} + +void populateArgMetadata(KernelDescriptor &dst, size_t argNum, const SPatchKernelArgumentInfo *src) { + if (nullptr == src) { + return; + } + + auto inlineData = PatchTokenBinary::getInlineData(src); + + auto metadataExtended = std::make_unique(); + metadataExtended->addressQualifier = parseLimitedString(inlineData.addressQualifier.begin(), inlineData.addressQualifier.size()); + metadataExtended->accessQualifier = parseLimitedString(inlineData.accessQualifier.begin(), inlineData.accessQualifier.size()); + metadataExtended->argName = parseLimitedString(inlineData.argName.begin(), inlineData.argName.size()); + + auto argTypeFull = parseLimitedString(inlineData.typeName.begin(), inlineData.typeName.size()); + const char *argTypeDelim = strchr(argTypeFull.data(), ';'); + if (nullptr == argTypeDelim) { + argTypeDelim = argTypeFull.data() + argTypeFull.size(); + } + metadataExtended->type = std::string(argTypeFull.data(), argTypeDelim).c_str(); + metadataExtended->typeQualifiers = parseLimitedString(inlineData.typeQualifiers.begin(), inlineData.typeQualifiers.size()); + + ArgTypeTraits metadata = {}; + metadata.accessQualifier = KernelArgMetadata::parseAccessQualifier(metadataExtended->accessQualifier); + metadata.addressQualifier = KernelArgMetadata::parseAddressSpace(metadataExtended->addressQualifier); + metadata.typeQualifiers = KernelArgMetadata::parseTypeQualifiers(metadataExtended->typeQualifiers); + + markArgAsPatchable(dst, argNum); + + dst.payloadMappings.explicitArgs[argNum].getTraits() = metadata; + dst.explicitArgsExtendedMetadata[argNum] = std::move(*metadataExtended); +} + +void populateArgDescriptor(KernelDescriptor &dst, size_t argNum, const PatchTokenBinary::KernelArgFromPatchtokens &src) { + if (src.objectArg != nullptr) { + switch (src.objectArg->Token) { + default: + UNRECOVERABLE_IF(PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT != src.objectArg->Token); + populateKernelArgDescriptor(dst, argNum, *reinterpret_cast(src.objectArg)); + dst.kernelAttributes.flags.usesImages = true; + break; + case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT: + populateKernelArgDescriptor(dst, argNum, *reinterpret_cast(src.objectArg)); + dst.kernelAttributes.flags.usesSamplers = true; + break; + case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: + populateKernelArgDescriptor(dst, argNum, *reinterpret_cast(src.objectArg)); + break; + case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: + populateKernelArgDescriptor(dst, argNum, *reinterpret_cast(src.objectArg)); + break; + case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT: + populateKernelArgDescriptor(dst, argNum, *reinterpret_cast(src.objectArg)); + break; + case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT: + populateKernelArgDescriptor(dst, argNum, *reinterpret_cast(src.objectArg)); + break; + } + } + + switch (src.objectType) { + default: + UNRECOVERABLE_IF(PatchTokenBinary::ArgObjectType::None != src.objectType); + break; + case PatchTokenBinary::ArgObjectType::Buffer: { + auto &asBufferArg = dst.payloadMappings.explicitArgs[argNum].as(true); + asBufferArg.bufferOffset = getOffset(src.metadata.buffer.bufferOffset); + if (src.metadata.buffer.pureStateful != nullptr) { + asBufferArg.accessedUsingStatelessAddressingMode = false; + } + } break; + case PatchTokenBinary::ArgObjectType::Image: { + auto &asImageArg = dst.payloadMappings.explicitArgs[argNum].as(true); + asImageArg.metadataPayload.imgWidth = getOffset(src.metadata.image.width); + asImageArg.metadataPayload.imgHeight = getOffset(src.metadata.image.height); + asImageArg.metadataPayload.imgDepth = getOffset(src.metadata.image.depth); + asImageArg.metadataPayload.channelDataType = getOffset(src.metadata.image.channelDataType); + asImageArg.metadataPayload.channelOrder = getOffset(src.metadata.image.channelOrder); + asImageArg.metadataPayload.arraySize = getOffset(src.metadata.image.arraySize); + asImageArg.metadataPayload.numSamples = getOffset(src.metadata.image.numSamples); + asImageArg.metadataPayload.numMipLevels = getOffset(src.metadata.image.numMipLevels); + asImageArg.metadataPayload.flatBaseOffset = getOffset(src.metadata.image.flatBaseOffset); + asImageArg.metadataPayload.flatWidth = getOffset(src.metadata.image.flatWidth); + asImageArg.metadataPayload.flatHeight = getOffset(src.metadata.image.flatHeight); + asImageArg.metadataPayload.flatPitch = getOffset(src.metadata.image.flatPitch); + dst.kernelAttributes.flags.usesImages = true; + } break; + case PatchTokenBinary::ArgObjectType::Sampler: { + auto &asSamplerArg = dst.payloadMappings.explicitArgs[argNum].as(true); + asSamplerArg.metadataPayload.samplerSnapWa = getOffset(src.metadata.sampler.coordinateSnapWaRequired); + asSamplerArg.metadataPayload.samplerAddressingMode = getOffset(src.metadata.sampler.addressMode); + asSamplerArg.metadataPayload.samplerNormalizedCoords = getOffset(src.metadata.sampler.normalizedCoords); + dst.kernelAttributes.flags.usesSamplers = true; + } break; + case PatchTokenBinary::ArgObjectType::Slm: { + auto &asBufferArg = dst.payloadMappings.explicitArgs[argNum].as(true); + asBufferArg.requiredSlmAlignment = src.metadata.slm.token->SourceOffset; + asBufferArg.slmOffset = src.metadata.slm.token->Offset; + } break; + } + + switch (src.objectTypeSpecialized) { + default: + UNRECOVERABLE_IF(PatchTokenBinary::ArgObjectTypeSpecialized::None != src.objectTypeSpecialized); + break; + case PatchTokenBinary::ArgObjectTypeSpecialized::Vme: { + dst.payloadMappings.explicitArgs[argNum].getExtendedTypeInfo().hasVmeExtendedDescriptor = true; + dst.payloadMappings.explicitArgsExtendedDescriptors.resize(dst.payloadMappings.explicitArgs.size()); + + auto vmeDescriptor = std::make_unique(); + vmeDescriptor->mbBlockType = getOffset(src.metadataSpecialized.vme.mbBlockType); + vmeDescriptor->subpixelMode = getOffset(src.metadataSpecialized.vme.subpixelMode); + vmeDescriptor->sadAdjustMode = getOffset(src.metadataSpecialized.vme.sadAdjustMode); + vmeDescriptor->searchPathType = getOffset(src.metadataSpecialized.vme.searchPathType); + dst.payloadMappings.explicitArgsExtendedDescriptors[argNum] = std::move(vmeDescriptor); + } break; + } + + for (auto &byValArg : src.byValMap) { + populateKernelArgDescriptor(dst, argNum, *byValArg); + } + + if (src.objectId) { + dst.payloadMappings.explicitArgs[argNum].getExtendedTypeInfo().hasDeviceSideEnqueueExtendedDescriptor = true; + dst.payloadMappings.explicitArgsExtendedDescriptors.resize(dst.payloadMappings.explicitArgs.size()); + + auto deviceSideEnqueueDescriptor = std::make_unique(); + deviceSideEnqueueDescriptor->objectId = getOffset(src.objectId); + dst.payloadMappings.explicitArgsExtendedDescriptors[argNum] = std::move(deviceSideEnqueueDescriptor); + } + populateArgMetadata(dst, argNum, src.argInfo); +} + +void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes) { + UNRECOVERABLE_IF(nullptr == src.header); + + populateKernelDescriptorIfNotNull(dst, src.tokens.executionEnvironment); + populateKernelDescriptorIfNotNull(dst, src.tokens.samplerStateArray); + populateKernelDescriptorIfNotNull(dst, src.tokens.bindingTableState); + populateKernelDescriptorIfNotNull(dst, src.tokens.allocateLocalSurface); + populateKernelDescriptorIfNotNull(dst, src.tokens.mediaVfeState[0], 0); + populateKernelDescriptorIfNotNull(dst, src.tokens.mediaVfeState[1], 1); + populateKernelDescriptorIfNotNull(dst, src.tokens.interfaceDescriptorData); + populateKernelDescriptorIfNotNull(dst, src.tokens.threadPayload); + populateKernelDescriptorIfNotNull(dst, src.tokens.dataParameterStream); + populateKernelDescriptorIfNotNull(dst, src.tokens.kernelAttributesInfo); + populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessPrivateSurface); + populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessConstantMemorySurfaceWithInitialization); + populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization); + populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessPrintfSurface); + populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessEventPoolSurface); + populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface); + populateKernelDescriptorIfNotNull(dst, src.tokens.allocateSyncBuffer); + + dst.payloadMappings.explicitArgs.resize(src.tokens.kernelArgs.size()); + dst.explicitArgsExtendedMetadata.resize(src.tokens.kernelArgs.size()); + + for (size_t i = 0U; i < src.tokens.kernelArgs.size(); ++i) { + auto &decodedKernelArg = src.tokens.kernelArgs[i]; + populateArgDescriptor(dst, i, decodedKernelArg); + } + + for (auto &str : src.tokens.strings) { + populateKernelDescriptorIfNotNull(dst, str); + } + + dst.kernelAttributes.flags.usesVme |= (src.tokens.inlineVmeSamplerInfo != nullptr); + dst.entryPoints.systemKernel = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U; + populateKernelDescriptorIfNotNull(dst, src.tokens.allocateSystemThreadSurface); + + for (uint32_t i = 0; i < 3U; ++i) { + dst.payloadMappings.dispatchTraits.localWorkSize[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize[i]); + dst.payloadMappings.dispatchTraits.localWorkSize2[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize2[i]); + dst.payloadMappings.dispatchTraits.globalWorkOffset[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkOffset[i]); + dst.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[i] = getOffset(src.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[i]); + dst.payloadMappings.dispatchTraits.globalWorkSize[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkSize[i]); + dst.payloadMappings.dispatchTraits.numWorkGroups[i] = getOffset(src.tokens.crossThreadPayloadArgs.numWorkGroups[i]); + } + dst.payloadMappings.dispatchTraits.workDim = getOffset(src.tokens.crossThreadPayloadArgs.workDimensions); + + dst.payloadMappings.implicitArgs.maxWorkGroupSize = getOffset(src.tokens.crossThreadPayloadArgs.maxWorkGroupSize); + dst.payloadMappings.implicitArgs.simdSize = getOffset(src.tokens.crossThreadPayloadArgs.simdSize); + dst.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent = getOffset(src.tokens.crossThreadPayloadArgs.parentEvent); + dst.payloadMappings.implicitArgs.preferredWkgMultiple = getOffset(src.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple); + dst.payloadMappings.implicitArgs.privateMemorySize = getOffset(src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize); + dst.payloadMappings.implicitArgs.localMemoryStatelessWindowSize = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize); + dst.payloadMappings.implicitArgs.localMemoryStatelessWindowStartAddres = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress); + for (auto &childSimdSize : src.tokens.crossThreadPayloadArgs.childBlockSimdSize) { + dst.kernelMetadata.deviceSideEnqueueChildrenKernelsIdOffset.push_back({childSimdSize->ArgumentNumber, childSimdSize->Offset}); + } + + if (src.tokens.gtpinInfo) { + dst.external.igcInfoForGtpin = (src.tokens.gtpinInfo + 1); + } + + dst.kernelAttributes.gpuPointerSize = gpuPointerSizeInBytes; +} + +} // namespace NEO diff --git a/shared/source/kernel/kernel_descriptor_from_patchtokens.h b/shared/source/kernel/kernel_descriptor_from_patchtokens.h new file mode 100644 index 0000000000..727de5cb9c --- /dev/null +++ b/shared/source/kernel/kernel_descriptor_from_patchtokens.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include + +namespace NEO { +struct KernelDescriptor; + +namespace PatchTokenBinary { +struct KernelFromPatchtokens; +} + +void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes); + +} // namespace NEO diff --git a/shared/test/unit_test/kernel/CMakeLists.txt b/shared/test/unit_test/kernel/CMakeLists.txt index 82c6159d11..be5b0d514a 100644 --- a/shared/test/unit_test/kernel/CMakeLists.txt +++ b/shared/test/unit_test/kernel/CMakeLists.txt @@ -8,6 +8,7 @@ set(NEO_SHARED_KERNEL_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_metadata_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_tests.cpp ) diff --git a/shared/test/unit_test/kernel/kernel_arg_descriptor_tests.cpp b/shared/test/unit_test/kernel/kernel_arg_descriptor_tests.cpp index 926a2ab2d6..5a858f7788 100644 --- a/shared/test/unit_test/kernel/kernel_arg_descriptor_tests.cpp +++ b/shared/test/unit_test/kernel/kernel_arg_descriptor_tests.cpp @@ -298,8 +298,9 @@ TEST(ArgDescriptorCopyAssign, WhenCopyAssignedThenCopiesExtendedTypeInfo) { arg0.getExtendedTypeInfo().isAccelerator = true; arg0.getExtendedTypeInfo().hasDeviceSideEnqueueExtendedDescriptor = true; + NEO::ArgDescriptor arg1{arg0}; NEO::ArgDescriptor arg2; - arg2 = arg0; + arg2 = arg1; EXPECT_EQ(arg0.getExtendedTypeInfo().packed, arg2.getExtendedTypeInfo().packed); } diff --git a/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp b/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp new file mode 100644 index 0000000000..30c9867772 --- /dev/null +++ b/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp @@ -0,0 +1,1293 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/device_binary_format/patchtokens_decoder.h" +#include "shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h" +#include "shared/source/kernel/kernel_arg_descriptor_extended_vme.h" +#include "shared/source/kernel/kernel_descriptor.h" +#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h" +#include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" + +#include "test.h" + +TEST(KernelDescriptorFromPatchtokens, GivenEmptyInputKernelFromPatchtokensThenOnlySetsUpPointerSize) { + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; + iOpenCL::SKernelBinaryHeaderCommon kernelHeader; + kernelTokens.header = &kernelHeader; + NEO::KernelDescriptor kernelDescriptor; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(kernelDescriptor.kernelAttributes.gpuPointerSize, 4); + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 8); + EXPECT_EQ(kernelDescriptor.kernelAttributes.gpuPointerSize, 8); +} + +TEST(KernelDescriptorFromPatchtokens, GivenExecutionEnvironmentThenSetsProperPartsOfDescriptor) { + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; + iOpenCL::SKernelBinaryHeaderCommon kernelHeader; + kernelTokens.header = &kernelHeader; + NEO::KernelDescriptor kernelDescriptor; + + iOpenCL::SPatchExecutionEnvironment execEnv = {}; + kernelTokens.tokens.executionEnvironment = &execEnv; + + execEnv.CompiledForGreaterThan4GBBuffers = false; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(NEO::KernelDescriptor::BindfulAndStateless, kernelDescriptor.kernelAttributes.bufferAddressingMode); + EXPECT_FALSE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + + execEnv.CompiledForGreaterThan4GBBuffers = true; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(NEO::KernelDescriptor::Stateless, kernelDescriptor.kernelAttributes.bufferAddressingMode); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); + + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]); + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]); + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); + execEnv.RequiredWorkGroupSizeX = 2; + execEnv.RequiredWorkGroupSizeY = 3; + execEnv.RequiredWorkGroupSizeZ = 5; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]); + EXPECT_EQ(3U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]); + EXPECT_EQ(5U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder); + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[0]); + EXPECT_EQ(1U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[1]); + EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]); + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0]); + EXPECT_EQ(1U, kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1]); + EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]); + constexpr auto dimensionSize = 2; + execEnv.WorkgroupWalkOrderDims = (0 << (dimensionSize * 2)) | (2 << (dimensionSize * 1)) | (1 << (dimensionSize * 0)); + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder); + EXPECT_EQ(1U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[0]); + EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[1]); + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]); + EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0]); + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1]); + EXPECT_EQ(1U, kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]); + + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.simdSize); + execEnv.LargestCompiledSIMDSize = 32; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(32U, kernelDescriptor.kernelAttributes.simdSize); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue); + execEnv.HasDeviceEnqueue = 1U; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesBarriers); + execEnv.HasBarriers = 1U; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesBarriers); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption); + execEnv.DisableMidThreadPreemption = 1U; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption); + + EXPECT_EQ(0U, kernelDescriptor.kernelMetadata.compiledSubGroupsNumber); + execEnv.CompiledSubGroupsNumber = 8U; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(8U, kernelDescriptor.kernelMetadata.compiledSubGroupsNumber); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages); + execEnv.UsesFencesForReadWriteImages = 1U; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress); + execEnv.SubgroupIndependentForwardProgressRequired = 1U; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress); + + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.numGrfRequired); + execEnv.NumGRFRequired = 128U; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(128, kernelDescriptor.kernelAttributes.numGrfRequired); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.useGlobalAtomics); + execEnv.HasGlobalAtomics = 1U; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.useGlobalAtomics); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesStatelessWrites); +} + +TEST(KernelDescriptorFromPatchtokens, GivenThreadPayloadThenSetsProperPartsOfDescriptor) { + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; + iOpenCL::SKernelBinaryHeaderCommon kernelHeader; + kernelTokens.header = &kernelHeader; + NEO::KernelDescriptor kernelDescriptor; + + iOpenCL::SPatchThreadPayload threadPayload = {}; + kernelTokens.tokens.threadPayload = &threadPayload; + + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent); + threadPayload.HeaderPresent = 1; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent); + + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.numLocalIdChannels); + threadPayload.LocalIDXPresent = 1; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(1U, kernelDescriptor.kernelAttributes.numLocalIdChannels); + threadPayload.LocalIDYPresent = 1; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.numLocalIdChannels); + threadPayload.LocalIDZPresent = 1; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(3U, kernelDescriptor.kernelAttributes.numLocalIdChannels); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesFlattenedLocalIds); + threadPayload.LocalIDFlattenedPresent = 1; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesFlattenedLocalIds); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent); + threadPayload.UnusedPerThreadConstantPresent = 1; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent); + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.passInlineData); + threadPayload.PassInlineData = 1; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.passInlineData); + + EXPECT_EQ(0U, kernelDescriptor.entryPoints.skipPerThreadDataLoad); + threadPayload.OffsetToSkipPerThreadDataLoad = 16; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(16U, kernelDescriptor.entryPoints.skipPerThreadDataLoad); + + EXPECT_EQ(0U, kernelDescriptor.entryPoints.skipSetFFIDGP); + threadPayload.OffsetToSkipSetFFIDGP = 28; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(28U, kernelDescriptor.entryPoints.skipSetFFIDGP); +} + +TEST(KernelDescriptorFromPatchtokens, GivenImplicitArgsThenSetsProperPartsOfDescriptor) { + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; + iOpenCL::SKernelBinaryHeaderCommon kernelHeader; + kernelTokens.header = &kernelHeader; + NEO::KernelDescriptor kernelDescriptor; + + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.samplerTable.borderColor)); + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.samplerTable.numSamplers); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.samplerTable.tableOffset)); + iOpenCL::SPatchSamplerStateArray samplerStateArray = {}; + samplerStateArray.BorderColorOffset = 2; + samplerStateArray.Count = 3; + samplerStateArray.Offset = 5; + kernelTokens.tokens.samplerStateArray = &samplerStateArray; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(samplerStateArray.BorderColorOffset, kernelDescriptor.payloadMappings.samplerTable.borderColor); + EXPECT_EQ(samplerStateArray.Count, kernelDescriptor.payloadMappings.samplerTable.numSamplers); + EXPECT_EQ(samplerStateArray.Offset, kernelDescriptor.payloadMappings.samplerTable.tableOffset); + kernelTokens.tokens.samplerStateArray = nullptr; + + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.bindingTable.numEntries); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.bindingTable.tableOffset)); + iOpenCL::SPatchBindingTableState bindingTableState = {}; + bindingTableState.Count = 2; + bindingTableState.Offset = 3; + kernelTokens.tokens.bindingTableState = &bindingTableState; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(bindingTableState.Count, kernelDescriptor.payloadMappings.bindingTable.numEntries); + EXPECT_EQ(bindingTableState.Offset, kernelDescriptor.payloadMappings.bindingTable.tableOffset); + kernelTokens.tokens.bindingTableState = nullptr; + + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.slmInlineSize); + iOpenCL::SPatchAllocateLocalSurface allocateLocalSurface = {}; + allocateLocalSurface.TotalInlineLocalMemorySize = 64; + kernelTokens.tokens.allocateLocalSurface = &allocateLocalSurface; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(allocateLocalSurface.TotalInlineLocalMemorySize, kernelDescriptor.kernelAttributes.slmInlineSize); + kernelTokens.tokens.allocateLocalSurface = nullptr; + + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); + iOpenCL::SPatchMediaVFEState mediaVfeState0 = {}; + mediaVfeState0.PerThreadScratchSpace = 128; + iOpenCL::SPatchMediaVFEState mediaVfeState1 = {}; + mediaVfeState0.PerThreadScratchSpace = 256; + kernelTokens.tokens.mediaVfeState[0] = &mediaVfeState0; + kernelTokens.tokens.mediaVfeState[1] = &mediaVfeState1; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(mediaVfeState0.PerThreadScratchSpace, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); + EXPECT_EQ(mediaVfeState1.PerThreadScratchSpace, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); + kernelTokens.tokens.mediaVfeState[0] = nullptr; + kernelTokens.tokens.mediaVfeState[1] = nullptr; + + EXPECT_EQ(0U, kernelDescriptor.kernelMetadata.deviceSideEnqueueBlockInterfaceDescriptorOffset); + iOpenCL::SPatchInterfaceDescriptorData interfaceDescriptorData = {}; + interfaceDescriptorData.Offset = 4096; + kernelTokens.tokens.interfaceDescriptorData = &interfaceDescriptorData; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(interfaceDescriptorData.Offset, kernelDescriptor.kernelMetadata.deviceSideEnqueueBlockInterfaceDescriptorOffset); + kernelTokens.tokens.interfaceDescriptorData = nullptr; + + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.crossThreadDataSize); + iOpenCL::SPatchDataParameterStream dataParameterStream = {}; + dataParameterStream.DataParameterStreamSize = 4096; + kernelTokens.tokens.dataParameterStream = &dataParameterStream; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(dataParameterStream.DataParameterStreamSize, kernelDescriptor.kernelAttributes.crossThreadDataSize); + kernelTokens.tokens.dataParameterStream = nullptr; + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesPrivateMemory); + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.perThreadPrivateMemorySize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.stateless)); + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.pointerSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindless)); + iOpenCL::SPatchAllocateStatelessPrivateSurface privateSurface = {}; + privateSurface.DataParamOffset = 2; + privateSurface.DataParamSize = 3; + privateSurface.PerThreadPrivateMemorySize = 5; + privateSurface.SurfaceStateHeapOffset = 7; + kernelTokens.tokens.allocateStatelessPrivateSurface = &privateSurface; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesPrivateMemory); + EXPECT_EQ(privateSurface.PerThreadPrivateMemorySize, kernelDescriptor.kernelAttributes.perThreadPrivateMemorySize); + EXPECT_EQ(privateSurface.DataParamOffset, kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.stateless); + EXPECT_EQ(privateSurface.DataParamSize, kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.pointerSize); + EXPECT_EQ(privateSurface.SurfaceStateHeapOffset, kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindless)); + kernelTokens.tokens.allocateStatelessPrivateSurface = nullptr; + + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)); + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.pointerSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless)); + iOpenCL::SPatchAllocateStatelessConstantMemorySurfaceWithInitialization constantSurface = {}; + constantSurface.DataParamOffset = 2; + constantSurface.DataParamSize = 3; + constantSurface.SurfaceStateHeapOffset = 7; + kernelTokens.tokens.allocateStatelessConstantMemorySurfaceWithInitialization = &constantSurface; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(constantSurface.DataParamOffset, kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless); + EXPECT_EQ(constantSurface.DataParamSize, kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.pointerSize); + EXPECT_EQ(constantSurface.SurfaceStateHeapOffset, kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless)); + kernelTokens.tokens.allocateStatelessConstantMemorySurfaceWithInitialization = nullptr; + + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)); + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.pointerSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless)); + iOpenCL::SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization globalsSurface = {}; + globalsSurface.DataParamOffset = 2; + globalsSurface.DataParamSize = 3; + globalsSurface.SurfaceStateHeapOffset = 7; + kernelTokens.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization = &globalsSurface; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(globalsSurface.DataParamOffset, kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless); + EXPECT_EQ(globalsSurface.DataParamSize, kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.pointerSize); + EXPECT_EQ(globalsSurface.SurfaceStateHeapOffset, kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless)); + kernelTokens.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization = nullptr; + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesPrintf); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless)); + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.bindless)); + iOpenCL::SPatchAllocateStatelessPrintfSurface printfSurface = {}; + printfSurface.DataParamOffset = 2; + printfSurface.DataParamSize = 3; + printfSurface.SurfaceStateHeapOffset = 7; + kernelTokens.tokens.allocateStatelessPrintfSurface = &printfSurface; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesPrintf); + EXPECT_EQ(printfSurface.DataParamOffset, kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless); + EXPECT_EQ(printfSurface.DataParamSize, kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize); + EXPECT_EQ(printfSurface.SurfaceStateHeapOffset, kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.bindless)); + kernelTokens.tokens.allocateStatelessPrintfSurface = nullptr; + + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.stateless)); + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.pointerSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindless)); + iOpenCL::SPatchAllocateStatelessEventPoolSurface eventPoolSurface = {}; + eventPoolSurface.DataParamOffset = 2; + eventPoolSurface.DataParamSize = 3; + eventPoolSurface.SurfaceStateHeapOffset = 7; + kernelTokens.tokens.allocateStatelessEventPoolSurface = &eventPoolSurface; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(eventPoolSurface.DataParamOffset, kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.stateless); + EXPECT_EQ(eventPoolSurface.DataParamSize, kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.pointerSize); + EXPECT_EQ(eventPoolSurface.SurfaceStateHeapOffset, kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindless)); + kernelTokens.tokens.allocateStatelessEventPoolSurface = nullptr; + + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.stateless)); + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.pointerSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindless)); + iOpenCL::SPatchAllocateStatelessDefaultDeviceQueueSurface defaultDeviceQueueSurface = {}; + defaultDeviceQueueSurface.DataParamOffset = 2; + defaultDeviceQueueSurface.DataParamSize = 3; + defaultDeviceQueueSurface.SurfaceStateHeapOffset = 7; + kernelTokens.tokens.allocateStatelessDefaultDeviceQueueSurface = &defaultDeviceQueueSurface; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(defaultDeviceQueueSurface.DataParamOffset, kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.stateless); + EXPECT_EQ(defaultDeviceQueueSurface.DataParamSize, kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.pointerSize); + EXPECT_EQ(defaultDeviceQueueSurface.SurfaceStateHeapOffset, kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindless)); + kernelTokens.tokens.allocateStatelessDefaultDeviceQueueSurface = nullptr; + + EXPECT_EQ(0U, kernelDescriptor.kernelAttributes.perThreadSystemThreadSurfaceSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.stateless)); + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.pointerSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindless)); + iOpenCL::SPatchAllocateSystemThreadSurface systemThreadSurface = {}; + systemThreadSurface.Offset = 2; + systemThreadSurface.PerThreadSystemThreadSurfaceSize = 3; + kernelTokens.tokens.allocateSystemThreadSurface = &systemThreadSurface; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(systemThreadSurface.PerThreadSystemThreadSurfaceSize, kernelDescriptor.kernelAttributes.perThreadSystemThreadSurfaceSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.stateless)); + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.pointerSize); + EXPECT_EQ(systemThreadSurface.Offset, kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindless)); + kernelTokens.tokens.allocateSystemThreadSurface = nullptr; + + EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesSyncBuffer); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.stateless)); + EXPECT_EQ(0U, kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.pointerSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindless)); + iOpenCL::SPatchAllocateSyncBuffer syncBuffer = {}; + syncBuffer.DataParamOffset = 2; + syncBuffer.DataParamSize = 3; + syncBuffer.SurfaceStateHeapOffset = 7; + kernelTokens.tokens.allocateSyncBuffer = &syncBuffer; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesSyncBuffer); + EXPECT_EQ(defaultDeviceQueueSurface.DataParamOffset, kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.stateless); + EXPECT_EQ(defaultDeviceQueueSurface.DataParamSize, kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.pointerSize); + EXPECT_EQ(defaultDeviceQueueSurface.SurfaceStateHeapOffset, kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindless)); + kernelTokens.tokens.allocateSyncBuffer = nullptr; +} + +TEST(KernelDescriptorFromPatchtokens, GivenPrintfStringThenPopulatesStringsMapInDescriptor) { + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; + iOpenCL::SKernelBinaryHeaderCommon kernelHeader; + kernelTokens.header = &kernelHeader; + NEO::KernelDescriptor kernelDescriptor; + + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelMetadata.printfStringsMap.empty()); + + std::vector strTokStream; + std::string str0{"some_string0"}; + std::string str1{"another_string"}; + std::string str2{"yet_another_string"}; + std::string str3; + auto string0Off = PatchTokensTestData::pushBackStringToken(str0, 0, strTokStream); + auto string1Off = PatchTokensTestData::pushBackStringToken(str1, 2, strTokStream); + auto string2Off = PatchTokensTestData::pushBackStringToken(str2, 1, strTokStream); + auto string3Off = PatchTokensTestData::pushBackStringToken(str3, 3, strTokStream); + + kernelTokens.tokens.strings.push_back(reinterpret_cast(strTokStream.data() + string0Off)); + kernelTokens.tokens.strings.push_back(reinterpret_cast(strTokStream.data() + string1Off)); + kernelTokens.tokens.strings.push_back(reinterpret_cast(strTokStream.data() + string2Off)); + kernelTokens.tokens.strings.push_back(reinterpret_cast(strTokStream.data() + string3Off)); + + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + ASSERT_EQ(4U, kernelDescriptor.kernelMetadata.printfStringsMap.size()); + EXPECT_EQ(str0, kernelDescriptor.kernelMetadata.printfStringsMap[0]); + EXPECT_EQ(str1, kernelDescriptor.kernelMetadata.printfStringsMap[2]); + EXPECT_EQ(str2, kernelDescriptor.kernelMetadata.printfStringsMap[1]); + EXPECT_TRUE(kernelDescriptor.kernelMetadata.printfStringsMap[3].empty()); +} + +TEST(KernelDescriptorFromPatchtokens, GivenPureStatlessAddressingMdelThenBindfulOffsetIsLeftUndefined) { + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; + iOpenCL::SKernelBinaryHeaderCommon kernelHeader; + kernelTokens.header = &kernelHeader; + NEO::KernelDescriptor kernelDescriptor; + + iOpenCL::SPatchAllocateStatelessPrintfSurface printfSurface = {}; + printfSurface.DataParamOffset = 2; + printfSurface.DataParamSize = 3; + printfSurface.SurfaceStateHeapOffset = 7; + kernelTokens.tokens.allocateStatelessPrintfSurface = &printfSurface; + + kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::Stateless; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(printfSurface.DataParamOffset, kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless); + EXPECT_EQ(printfSurface.DataParamSize, kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.bindless)); +} + +TEST(KernelDescriptorFromPatchtokens, GivenhKernelAttributesThenPopulatesStringsMapInDescriptor) { + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; + iOpenCL::SKernelBinaryHeaderCommon kernelHeader; + kernelTokens.header = &kernelHeader; + NEO::KernelDescriptor kernelDescriptor; + + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelMetadata.kernelLanguageAttributes.empty()); + EXPECT_EQ(0U, kernelDescriptor.kernelMetadata.requiredSubGroupSize); + + iOpenCL::SPatchKernelAttributesInfo kernelAttributesToken; + kernelAttributesToken.AttributesSize = 0U; + kernelTokens.tokens.kernelAttributesInfo = &kernelAttributesToken; + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_TRUE(kernelDescriptor.kernelMetadata.kernelLanguageAttributes.empty()); + EXPECT_EQ(0U, kernelDescriptor.kernelMetadata.requiredSubGroupSize); + + std::string attribute = "intel_reqd_sub_group_size(32)"; + kernelAttributesToken.AttributesSize = static_cast(attribute.size()); + std::vector tokenStorage; + tokenStorage.insert(tokenStorage.end(), reinterpret_cast(&kernelAttributesToken), reinterpret_cast(&kernelAttributesToken + 1)); + tokenStorage.insert(tokenStorage.end(), reinterpret_cast(attribute.c_str()), reinterpret_cast(attribute.c_str() + attribute.length())); + + kernelTokens.tokens.kernelAttributesInfo = reinterpret_cast(tokenStorage.data()); + NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); + EXPECT_EQ(attribute, kernelDescriptor.kernelMetadata.kernelLanguageAttributes); + EXPECT_EQ(32U, kernelDescriptor.kernelMetadata.requiredSubGroupSize); +} + +TEST(KernelDescriptorFromPatchtokens, GivenValidKernelWithArgThenMetadataIsProperlyPopulated) { + PatchTokensTestData::ValidProgramWithKernelAndArg src; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, src.kernels[0], 4); + ASSERT_EQ(1U, dst.payloadMappings.explicitArgs.size()); + EXPECT_EQ(NEO::KernelArgMetadata::AccessReadWrite, dst.payloadMappings.explicitArgs[0].getTraits().accessQualifier); + EXPECT_EQ(NEO::KernelArgMetadata::AddrGlobal, dst.payloadMappings.explicitArgs[0].getTraits().addressQualifier); + NEO::KernelArgMetadata::TypeQualifiers typeQualifiers = {}; + typeQualifiers.constQual = true; + EXPECT_EQ(typeQualifiers.packed, dst.payloadMappings.explicitArgs[0].getTraits().typeQualifiers.packed); + EXPECT_EQ(0U, dst.payloadMappings.explicitArgs[0].getTraits().argByValSize); + ASSERT_EQ(1U, dst.explicitArgsExtendedMetadata.size()); + EXPECT_STREQ("__global", dst.explicitArgsExtendedMetadata[0].addressQualifier.c_str()); + EXPECT_STREQ("read_write", dst.explicitArgsExtendedMetadata[0].accessQualifier.c_str()); + EXPECT_STREQ("custom_arg", dst.explicitArgsExtendedMetadata[0].argName.c_str()); + EXPECT_STREQ("int*", dst.explicitArgsExtendedMetadata[0].type.c_str()); + EXPECT_STREQ("const", dst.explicitArgsExtendedMetadata[0].typeQualifiers.c_str()); +} + +TEST(KernelDescriptorFromPatchtokens, GivenValidKernelWithImageArgThenArgAccessQualifierIsPopulatedBasedOnArgInfo) { + PatchTokensTestData::ValidProgramWithKernelAndArg src; + iOpenCL::SPatchImageMemoryObjectKernelArgument imageArg = {}; + imageArg.Token = iOpenCL::PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT; + imageArg.Writeable = false; + src.kernels[0].tokens.kernelArgs[0].objectArg = &imageArg; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, src.kernels[0], 4); + ASSERT_EQ(1U, dst.payloadMappings.explicitArgs.size()); + EXPECT_EQ(NEO::KernelArgMetadata::AccessReadWrite, dst.payloadMappings.explicitArgs[0].getTraits().accessQualifier); +} + +TEST(KernelDescriptorFromPatchtokens, GivenValidKernelWithImageArgWhenArgInfoIsMissingThenArgAccessQualifierIsPopulatedBasedOnImageArgWriteableFlag) { + PatchTokensTestData::ValidProgramWithKernelAndArg src; + iOpenCL::SPatchImageMemoryObjectKernelArgument imageArg = {}; + imageArg.Token = iOpenCL::PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT; + src.kernels[0].tokens.kernelArgs[0].objectArg = &imageArg; + src.kernels[0].tokens.kernelArgs[0].argInfo = nullptr; + { + imageArg.Writeable = false; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, src.kernels[0], 4); + ASSERT_EQ(1U, dst.payloadMappings.explicitArgs.size()); + EXPECT_EQ(NEO::KernelArgMetadata::AccessReadOnly, dst.payloadMappings.explicitArgs[0].getTraits().accessQualifier); + } + + { + imageArg.Writeable = true; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, src.kernels[0], 4); + ASSERT_EQ(1U, dst.payloadMappings.explicitArgs.size()); + EXPECT_EQ(NEO::KernelArgMetadata::AccessReadWrite, dst.payloadMappings.explicitArgs[0].getTraits().accessQualifier); + } +} + +TEST(KernelDescriptorFromPatchtokens, GivenValidKernelWithNonDelimitedArgTypeThenUsesArgTypeAsIs) { + PatchTokensTestData::ValidProgramWithKernelAndArg src; + src.arg0TypeMutable[4] = '*'; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, src.kernels[0], 4); + ASSERT_EQ(1U, dst.explicitArgsExtendedMetadata.size()); + EXPECT_STREQ("int**", dst.explicitArgsExtendedMetadata[0].type.c_str()); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithGtpinInfoTokenThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + iOpenCL::SPatchItemHeader gtpinInfo = {}; + kernelTokens.tokens.gtpinInfo = >pinInfo; + + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_NE(nullptr, dst.external.igcInfoForGtpin); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithImageMemoryObjectKernelArgumentThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchImageMemoryObjectKernelArgument imageArg = {}; + imageArg.Token = iOpenCL::PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT; + imageArg.ArgumentNumber = 1; + imageArg.Offset = 0x40; + + kernelTokens.tokens.kernelArgs.resize(2); + kernelTokens.tokens.kernelArgs[1].objectArg = &imageArg; + { + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_EQ(1U, dst.kernelAttributes.numArgsToPatch); + ASSERT_EQ(2U, dst.payloadMappings.explicitArgs.size()); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].is()); + EXPECT_EQ(imageArg.Offset, dst.payloadMappings.explicitArgs[1].as().bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[1].as().bindless)); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isMediaImage); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isMediaBlockImage); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isTransformable); + ; + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().needsPatch); + } + + { + imageArg.Type = iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isMediaImage); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isMediaBlockImage); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isTransformable); + } + + { + imageArg.Type = iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA_BLOCK; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isMediaImage); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isMediaBlockImage); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isTransformable); + } + + { + imageArg.Transformable = 1; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isMediaImage); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isMediaBlockImage); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isTransformable); + } +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithImageMemoryObjectKernelArgumentWhenAccessQualifierAlreadPopulatedThenDontOverwrite) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchImageMemoryObjectKernelArgument imageArg = {}; + imageArg.Token = iOpenCL::PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT; + imageArg.ArgumentNumber = 1; + imageArg.Offset = 0x40; + kernelTokens.tokens.kernelArgs.resize(2); + kernelTokens.tokens.kernelArgs[1].objectArg = &imageArg; + + NEO::KernelDescriptor dst = {}; + dst.payloadMappings.explicitArgs.resize(2); + dst.payloadMappings.explicitArgs[1].getTraits().accessQualifier = NEO::KernelArgMetadata::AccessNone; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_EQ(NEO::KernelArgMetadata::AccessNone, dst.payloadMappings.explicitArgs[1].getTraits().accessQualifier); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithSamplerKernelArgumentThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchSamplerKernelArgument samplerArg = {}; + samplerArg.Token = iOpenCL::PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT; + samplerArg.ArgumentNumber = 1; + samplerArg.Offset = 0x40; + samplerArg.Type = iOpenCL::SAMPLER_OBJECT_TEXTURE; + kernelTokens.tokens.kernelArgs.resize(2); + kernelTokens.tokens.kernelArgs[1].objectArg = &samplerArg; + + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].is()); + EXPECT_EQ(samplerArg.Offset, dst.payloadMappings.explicitArgs[1].as().bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[1].as().bindless)); + EXPECT_EQ(samplerArg.Type, dst.payloadMappings.explicitArgs[1].as().samplerType); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isAccelerator); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().needsPatch); + EXPECT_FALSE(dst.kernelAttributes.flags.usesVme); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithSamplerKernelArgumentWhenSamplerIsVmeThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchSamplerKernelArgument samplerArg = {}; + samplerArg.Token = iOpenCL::PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT; + samplerArg.ArgumentNumber = 1; + samplerArg.Offset = 0x40; + kernelTokens.tokens.kernelArgs.resize(2); + kernelTokens.tokens.kernelArgs[1].objectArg = &samplerArg; + + { + samplerArg.Type = iOpenCL::SAMPLER_OBJECT_VME; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].is()); + EXPECT_EQ(samplerArg.Type, dst.payloadMappings.explicitArgs[1].as().samplerType); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isAccelerator); + EXPECT_TRUE(dst.kernelAttributes.flags.usesVme); + } + + { + samplerArg.Type = iOpenCL::SAMPLER_OBJECT_VE; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].is()); + EXPECT_EQ(samplerArg.Type, dst.payloadMappings.explicitArgs[1].as().samplerType); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isAccelerator); + EXPECT_FALSE(dst.kernelAttributes.flags.usesVme); + } + + { + samplerArg.Type = iOpenCL::SAMPLER_OBJECT_VD; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].is()); + EXPECT_EQ(samplerArg.Type, dst.payloadMappings.explicitArgs[1].as().samplerType); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isAccelerator); + EXPECT_FALSE(dst.kernelAttributes.flags.usesVme); + } +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithGlobalObjectArgThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchGlobalMemoryObjectKernelArgument globalMemArg = {}; + globalMemArg.Token = iOpenCL::PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; + globalMemArg.ArgumentNumber = 1; + globalMemArg.Offset = 0x40; + + kernelTokens.tokens.kernelArgs.resize(2); + kernelTokens.tokens.kernelArgs[1].objectArg = &globalMemArg; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_EQ(1U, dst.kernelAttributes.numArgsToPatch); + ASSERT_EQ(2U, dst.payloadMappings.explicitArgs.size()); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].is()); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[1].as().stateless)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[1].as().bindless)); + EXPECT_EQ(0U, dst.payloadMappings.explicitArgs[1].as().pointerSize); + EXPECT_EQ(globalMemArg.Offset, dst.payloadMappings.explicitArgs[1].as().bindful); + EXPECT_EQ(NEO::KernelArgMetadata::AddrGlobal, dst.payloadMappings.explicitArgs[1].getTraits().addressQualifier); + + EXPECT_FALSE(dst.payloadMappings.explicitArgs[0].getExtendedTypeInfo().needsPatch); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().needsPatch); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithStatelessGlobalMemoryObjectArgThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchStatelessGlobalMemoryObjectKernelArgument globalMemArg = {}; + globalMemArg.Token = iOpenCL::PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; + globalMemArg.ArgumentNumber = 1; + globalMemArg.DataParamOffset = 2; + globalMemArg.DataParamSize = 4; + globalMemArg.SurfaceStateHeapOffset = 128; + + kernelTokens.tokens.kernelArgs.resize(2); + kernelTokens.tokens.kernelArgs[1].objectArg = &globalMemArg; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_EQ(1U, dst.kernelAttributes.numArgsToPatch); + ASSERT_EQ(2U, dst.payloadMappings.explicitArgs.size()); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].is()); + EXPECT_EQ(globalMemArg.DataParamOffset, dst.payloadMappings.explicitArgs[1].as().stateless); + EXPECT_EQ(globalMemArg.DataParamSize, dst.payloadMappings.explicitArgs[1].as().pointerSize); + EXPECT_EQ(globalMemArg.SurfaceStateHeapOffset, dst.payloadMappings.explicitArgs[1].as().bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[1].as().bindless)); + EXPECT_EQ(NEO::KernelArgMetadata::AddrGlobal, dst.payloadMappings.explicitArgs[1].getTraits().addressQualifier); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().needsPatch); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithStatelessConstantMemoryObjectArgThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchStatelessConstantMemoryObjectKernelArgument constantMemArg = {}; + constantMemArg.Token = iOpenCL::PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT; + constantMemArg.ArgumentNumber = 1; + constantMemArg.DataParamOffset = 2; + constantMemArg.DataParamSize = 4; + constantMemArg.SurfaceStateHeapOffset = 128; + + kernelTokens.tokens.kernelArgs.resize(2); + kernelTokens.tokens.kernelArgs[1].objectArg = &constantMemArg; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_EQ(1U, dst.kernelAttributes.numArgsToPatch); + ASSERT_EQ(2U, dst.payloadMappings.explicitArgs.size()); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].is()); + EXPECT_EQ(constantMemArg.DataParamOffset, dst.payloadMappings.explicitArgs[1].as().stateless); + EXPECT_EQ(constantMemArg.DataParamSize, dst.payloadMappings.explicitArgs[1].as().pointerSize); + EXPECT_EQ(constantMemArg.SurfaceStateHeapOffset, dst.payloadMappings.explicitArgs[1].as().bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[1].as().bindless)); + EXPECT_EQ(NEO::KernelArgMetadata::AddrConstant, dst.payloadMappings.explicitArgs[1].getTraits().addressQualifier); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().needsPatch); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithStatelessDeviceQueueKernelArgumentThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchStatelessDeviceQueueKernelArgument deviceQueueArg = {}; + deviceQueueArg.Token = iOpenCL::PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT; + deviceQueueArg.ArgumentNumber = 1; + deviceQueueArg.DataParamOffset = 2; + deviceQueueArg.DataParamSize = 4; + deviceQueueArg.SurfaceStateHeapOffset = 128; + + kernelTokens.tokens.kernelArgs.resize(2); + kernelTokens.tokens.kernelArgs[1].objectArg = &deviceQueueArg; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_EQ(1U, dst.kernelAttributes.numArgsToPatch); + ASSERT_EQ(2U, dst.payloadMappings.explicitArgs.size()); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].is()); + EXPECT_EQ(deviceQueueArg.DataParamOffset, dst.payloadMappings.explicitArgs[1].as().stateless); + EXPECT_EQ(deviceQueueArg.DataParamSize, dst.payloadMappings.explicitArgs[1].as().pointerSize); + EXPECT_EQ(deviceQueueArg.SurfaceStateHeapOffset, dst.payloadMappings.explicitArgs[1].as().bindful); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[1].as().bindless)); + EXPECT_EQ(NEO::KernelArgMetadata::AddrGlobal, dst.payloadMappings.explicitArgs[1].getTraits().addressQualifier); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().needsPatch); + + EXPECT_FALSE(dst.payloadMappings.explicitArgs[0].getExtendedTypeInfo().isDeviceQueue); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].getExtendedTypeInfo().isDeviceQueue); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithByValueArgumentsThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + + iOpenCL::SPatchDataParameterBuffer paramArg10 = {}; + paramArg10.Token = iOpenCL::PATCH_TOKEN_DATA_PARAMETER_BUFFER; + paramArg10.ArgumentNumber = 1; + paramArg10.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; + paramArg10.Offset = 2; + paramArg10.DataSize = 3; + paramArg10.SourceOffset = 5; + + iOpenCL::SPatchDataParameterBuffer paramArg11 = {}; + paramArg11.Token = iOpenCL::PATCH_TOKEN_DATA_PARAMETER_BUFFER; + paramArg11.ArgumentNumber = 1; + paramArg11.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; + paramArg11.Offset = 7; + paramArg11.DataSize = 11; + paramArg11.SourceOffset = 13; + + iOpenCL::SPatchDataParameterBuffer paramNonArg = {}; + paramNonArg.Token = iOpenCL::PATCH_TOKEN_DATA_PARAMETER_BUFFER; + paramNonArg.ArgumentNumber = 2; + paramNonArg.Type = iOpenCL::DATA_PARAMETER_IMAGE_WIDTH; + paramNonArg.Offset = 17; + paramNonArg.DataSize = 19; + paramNonArg.SourceOffset = 23; + + kernelTokens.tokens.kernelArgs.resize(3); + kernelTokens.tokens.kernelArgs[1].byValMap.push_back(¶mArg10); + kernelTokens.tokens.kernelArgs[1].byValMap.push_back(¶mArg11); + kernelTokens.tokens.kernelArgs[2].byValMap.push_back(¶mNonArg); + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_EQ(3U, dst.payloadMappings.explicitArgs.size()); + EXPECT_EQ(2U, dst.kernelAttributes.numArgsToPatch); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[1].is()); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[2].is()); + ASSERT_EQ(2U, dst.payloadMappings.explicitArgs[1].as().elements.size()); + EXPECT_EQ(paramArg10.Offset, dst.payloadMappings.explicitArgs[1].as().elements[0].offset); + EXPECT_EQ(paramArg10.DataSize, dst.payloadMappings.explicitArgs[1].as().elements[0].size); + EXPECT_EQ(paramArg10.SourceOffset, dst.payloadMappings.explicitArgs[1].as().elements[0].sourceOffset); + EXPECT_EQ(paramArg11.Offset, dst.payloadMappings.explicitArgs[1].as().elements[1].offset); + EXPECT_EQ(paramArg11.DataSize, dst.payloadMappings.explicitArgs[1].as().elements[1].size); + EXPECT_EQ(paramArg11.SourceOffset, dst.payloadMappings.explicitArgs[1].as().elements[1].sourceOffset); + EXPECT_EQ(paramNonArg.Offset, dst.payloadMappings.explicitArgs[2].as().elements[0].offset); + EXPECT_EQ(paramNonArg.DataSize, dst.payloadMappings.explicitArgs[2].as().elements[0].size); + EXPECT_EQ(paramNonArg.SourceOffset, dst.payloadMappings.explicitArgs[2].as().elements[0].sourceOffset); + + ASSERT_EQ(2U, dst.kernelMetadata.allByValueKernelArguments.size()); + EXPECT_EQ(1U, dst.kernelMetadata.allByValueKernelArguments[0].argNum); + EXPECT_EQ(paramArg10.Offset, dst.kernelMetadata.allByValueKernelArguments[0].byValueElement.offset); + EXPECT_EQ(paramArg10.DataSize, dst.kernelMetadata.allByValueKernelArguments[0].byValueElement.size); + EXPECT_EQ(paramArg10.SourceOffset, dst.kernelMetadata.allByValueKernelArguments[0].byValueElement.sourceOffset); + EXPECT_EQ(1U, dst.kernelMetadata.allByValueKernelArguments[0].argNum); + EXPECT_EQ(paramArg11.Offset, dst.kernelMetadata.allByValueKernelArguments[1].byValueElement.offset); + EXPECT_EQ(paramArg11.DataSize, dst.kernelMetadata.allByValueKernelArguments[1].byValueElement.size); + EXPECT_EQ(paramArg11.SourceOffset, dst.kernelMetadata.allByValueKernelArguments[1].byValueElement.sourceOffset); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithPointerArgumentAndMetadataThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + kernelTokens.tokens.kernelArgs.resize(1); + kernelTokens.tokens.kernelArgs[0].objectType = NEO::PatchTokenBinary::ArgObjectType::Buffer; + { + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().bufferOffset)); + EXPECT_FALSE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().isPureStateful())); + EXPECT_FALSE(dst.kernelAttributes.flags.usesImages); + EXPECT_FALSE(dst.kernelAttributes.flags.usesSamplers); + } + { + iOpenCL::SPatchDataParameterBuffer bufferOffset = {}; + bufferOffset.Offset = 17; + + kernelTokens.tokens.kernelArgs[0].metadata.buffer.bufferOffset = &bufferOffset; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + EXPECT_EQ(bufferOffset.Offset, dst.payloadMappings.explicitArgs[0].as().bufferOffset); + EXPECT_FALSE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().isPureStateful())); + kernelTokens.tokens.kernelArgs[0].metadata.buffer.bufferOffset = nullptr; + EXPECT_FALSE(dst.kernelAttributes.flags.usesImages); + EXPECT_FALSE(dst.kernelAttributes.flags.usesSamplers); + } + { + iOpenCL::SPatchDataParameterBuffer pureStateful = {}; + kernelTokens.tokens.kernelArgs[0].metadata.buffer.pureStateful = &pureStateful; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().bufferOffset)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().isPureStateful())); + kernelTokens.tokens.kernelArgs[0].metadata.buffer.pureStateful = nullptr; + EXPECT_FALSE(dst.kernelAttributes.flags.usesImages); + EXPECT_FALSE(dst.kernelAttributes.flags.usesSamplers); + } +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithImageArgumentAndMetadataThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + kernelTokens.tokens.kernelArgs.resize(1); + kernelTokens.tokens.kernelArgs[0].objectType = NEO::PatchTokenBinary::ArgObjectType::Image; + { + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + auto &metadataPayload = dst.payloadMappings.explicitArgs[0].as().metadataPayload; + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.imgWidth)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.imgHeight)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.imgDepth)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.channelDataType)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.channelOrder)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.arraySize)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.numSamples)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.numMipLevels)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.flatBaseOffset)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.flatWidth)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.flatHeight)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.flatPitch)); + EXPECT_TRUE(dst.kernelAttributes.flags.usesImages); + EXPECT_FALSE(dst.kernelAttributes.flags.usesSamplers); + } + { + iOpenCL::SPatchDataParameterBuffer imgWidth = {}; + iOpenCL::SPatchDataParameterBuffer imgHeight = {}; + iOpenCL::SPatchDataParameterBuffer imgDepth = {}; + iOpenCL::SPatchDataParameterBuffer channelDataType = {}; + iOpenCL::SPatchDataParameterBuffer channelOrder = {}; + iOpenCL::SPatchDataParameterBuffer arraySize = {}; + iOpenCL::SPatchDataParameterBuffer numSamples = {}; + iOpenCL::SPatchDataParameterBuffer numMipLevels = {}; + iOpenCL::SPatchDataParameterBuffer flatBaseOffset = {}; + iOpenCL::SPatchDataParameterBuffer flatWidth = {}; + iOpenCL::SPatchDataParameterBuffer flatHeight = {}; + iOpenCL::SPatchDataParameterBuffer flatPitch = {}; + imgWidth.Offset = 2; + imgHeight.Offset = 3; + imgDepth.Offset = 5; + channelDataType.Offset = 7; + channelOrder.Offset = 11; + arraySize.Offset = 13; + numSamples.Offset = 17; + numMipLevels.Offset = 19; + flatBaseOffset.Offset = 23; + flatWidth.Offset = 29; + flatHeight.Offset = 31; + flatPitch.Offset = 37; + + kernelTokens.tokens.kernelArgs[0].metadata.image.width = &imgWidth; + kernelTokens.tokens.kernelArgs[0].metadata.image.width = &imgWidth; + kernelTokens.tokens.kernelArgs[0].metadata.image.height = &imgHeight; + kernelTokens.tokens.kernelArgs[0].metadata.image.depth = &imgDepth; + kernelTokens.tokens.kernelArgs[0].metadata.image.channelDataType = &channelDataType; + kernelTokens.tokens.kernelArgs[0].metadata.image.channelOrder = &channelOrder; + kernelTokens.tokens.kernelArgs[0].metadata.image.arraySize = &arraySize; + kernelTokens.tokens.kernelArgs[0].metadata.image.numSamples = &numSamples; + kernelTokens.tokens.kernelArgs[0].metadata.image.numMipLevels = &numMipLevels; + kernelTokens.tokens.kernelArgs[0].metadata.image.flatBaseOffset = &flatBaseOffset; + kernelTokens.tokens.kernelArgs[0].metadata.image.flatWidth = &flatWidth; + kernelTokens.tokens.kernelArgs[0].metadata.image.flatHeight = &flatHeight; + kernelTokens.tokens.kernelArgs[0].metadata.image.flatPitch = &flatPitch; + + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + auto &metadataPayload = dst.payloadMappings.explicitArgs[0].as().metadataPayload; + EXPECT_EQ(imgWidth.Offset, metadataPayload.imgWidth); + EXPECT_EQ(imgHeight.Offset, metadataPayload.imgHeight); + EXPECT_EQ(imgDepth.Offset, metadataPayload.imgDepth); + EXPECT_EQ(channelDataType.Offset, metadataPayload.channelDataType); + EXPECT_EQ(channelOrder.Offset, metadataPayload.channelOrder); + EXPECT_EQ(arraySize.Offset, metadataPayload.arraySize); + EXPECT_EQ(numSamples.Offset, metadataPayload.numSamples); + EXPECT_EQ(numMipLevels.Offset, metadataPayload.numMipLevels); + EXPECT_EQ(flatBaseOffset.Offset, metadataPayload.flatBaseOffset); + EXPECT_EQ(flatWidth.Offset, metadataPayload.flatWidth); + EXPECT_EQ(flatHeight.Offset, metadataPayload.flatHeight); + EXPECT_EQ(flatPitch.Offset, metadataPayload.flatPitch); + EXPECT_TRUE(dst.kernelAttributes.flags.usesImages); + EXPECT_FALSE(dst.kernelAttributes.flags.usesSamplers); + } +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithSamplerArgumentAndMetadataThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + kernelTokens.tokens.kernelArgs.resize(1); + kernelTokens.tokens.kernelArgs[0].objectType = NEO::PatchTokenBinary::ArgObjectType::Sampler; + { + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + auto &metadataPayload = dst.payloadMappings.explicitArgs[0].as().metadataPayload; + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.samplerAddressingMode)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.samplerNormalizedCoords)); + EXPECT_TRUE(NEO::isUndefinedOffset(metadataPayload.samplerSnapWa)); + EXPECT_FALSE(dst.kernelAttributes.flags.usesImages); + EXPECT_TRUE(dst.kernelAttributes.flags.usesSamplers); + } + { + iOpenCL::SPatchDataParameterBuffer addressingMode = {}; + iOpenCL::SPatchDataParameterBuffer normalizedCoords = {}; + iOpenCL::SPatchDataParameterBuffer snapWa = {}; + addressingMode.Offset = 2; + normalizedCoords.Offset = 3; + snapWa.Offset = 5; + + kernelTokens.tokens.kernelArgs[0].metadata.sampler.addressMode = &addressingMode; + kernelTokens.tokens.kernelArgs[0].metadata.sampler.normalizedCoords = &normalizedCoords; + kernelTokens.tokens.kernelArgs[0].metadata.sampler.coordinateSnapWaRequired = &snapWa; + + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + auto &metadataPayload = dst.payloadMappings.explicitArgs[0].as().metadataPayload; + EXPECT_EQ(addressingMode.Offset, metadataPayload.samplerAddressingMode); + EXPECT_EQ(normalizedCoords.Offset, metadataPayload.samplerNormalizedCoords); + EXPECT_EQ(snapWa.Offset, metadataPayload.samplerSnapWa); + EXPECT_FALSE(dst.kernelAttributes.flags.usesImages); + EXPECT_TRUE(dst.kernelAttributes.flags.usesSamplers); + } +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithSlmArgumentAndMetadataThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + kernelTokens.tokens.kernelArgs.resize(1); + { + kernelTokens.tokens.kernelArgs[0].objectType = NEO::PatchTokenBinary::ArgObjectType::Buffer; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().bufferOffset)); + EXPECT_FALSE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().isPureStateful())); + EXPECT_FALSE(dst.kernelAttributes.flags.usesImages); + EXPECT_FALSE(dst.kernelAttributes.flags.usesSamplers); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().stateless)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().bindless)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().bufferOffset)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().slmOffset)); + EXPECT_EQ(0U, dst.payloadMappings.explicitArgs[0].as().requiredSlmAlignment); + EXPECT_EQ(0U, dst.payloadMappings.explicitArgs[0].as().pointerSize); + } + { + kernelTokens.tokens.kernelArgs[0].objectType = NEO::PatchTokenBinary::ArgObjectType::Slm; + iOpenCL::SPatchDataParameterBuffer slmDesc = {}; + slmDesc.Offset = 17; + slmDesc.SourceOffset = 64; + + kernelTokens.tokens.kernelArgs[0].metadata.slm.token = &slmDesc; + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().bufferOffset)); + EXPECT_FALSE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().isPureStateful())); + EXPECT_FALSE(dst.kernelAttributes.flags.usesImages); + EXPECT_FALSE(dst.kernelAttributes.flags.usesSamplers); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().bindful)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().stateless)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().bindless)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.explicitArgs[0].as().bufferOffset)); + EXPECT_EQ(slmDesc.SourceOffset, dst.payloadMappings.explicitArgs[0].as().requiredSlmAlignment); + EXPECT_EQ(slmDesc.Offset, dst.payloadMappings.explicitArgs[0].as().slmOffset); + } +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithSamplerArgumentAndMetadataWhenSamplerTypeIsVmeThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + kernelTokens.tokens.kernelArgs.resize(1); + kernelTokens.tokens.kernelArgs[0].objectType = NEO::PatchTokenBinary::ArgObjectType::Sampler; + { + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + EXPECT_TRUE(dst.kernelAttributes.flags.usesSamplers); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[0].getExtendedTypeInfo().hasVmeExtendedDescriptor); + EXPECT_TRUE(dst.payloadMappings.explicitArgsExtendedDescriptors.empty()); + } + { + kernelTokens.tokens.kernelArgs[0].objectTypeSpecialized = NEO::PatchTokenBinary::ArgObjectTypeSpecialized::Vme; + iOpenCL::SPatchDataParameterBuffer mbBlockType = {}; + iOpenCL::SPatchDataParameterBuffer subpixelMode = {}; + iOpenCL::SPatchDataParameterBuffer sadAdjustMode = {}; + iOpenCL::SPatchDataParameterBuffer searchPathType = {}; + mbBlockType.Offset = 2; + subpixelMode.Offset = 3; + sadAdjustMode.Offset = 5; + searchPathType.Offset = 7; + + kernelTokens.tokens.kernelArgs[0].metadataSpecialized.vme.mbBlockType = &mbBlockType; + kernelTokens.tokens.kernelArgs[0].metadataSpecialized.vme.subpixelMode = &subpixelMode; + kernelTokens.tokens.kernelArgs[0].metadataSpecialized.vme.sadAdjustMode = &sadAdjustMode; + kernelTokens.tokens.kernelArgs[0].metadataSpecialized.vme.searchPathType = &searchPathType; + + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + EXPECT_TRUE(dst.kernelAttributes.flags.usesSamplers); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].getExtendedTypeInfo().hasVmeExtendedDescriptor); + ASSERT_EQ(1U, dst.payloadMappings.explicitArgsExtendedDescriptors.size()); + auto argVme = reinterpret_cast(dst.payloadMappings.explicitArgsExtendedDescriptors[0].get()); + EXPECT_EQ(mbBlockType.Offset, argVme->mbBlockType); + EXPECT_EQ(subpixelMode.Offset, argVme->subpixelMode); + EXPECT_EQ(sadAdjustMode.Offset, argVme->sadAdjustMode); + EXPECT_EQ(searchPathType.Offset, argVme->searchPathType); + } +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithSamplerArgumentAndMetadataWhenObjectIdIsPresentThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + kernelTokens.tokens.kernelArgs.resize(1); + kernelTokens.tokens.kernelArgs[0].objectType = NEO::PatchTokenBinary::ArgObjectType::Sampler; + { + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + EXPECT_FALSE(dst.payloadMappings.explicitArgs[0].getExtendedTypeInfo().hasDeviceSideEnqueueExtendedDescriptor); + EXPECT_TRUE(dst.payloadMappings.explicitArgsExtendedDescriptors.empty()); + } + { + iOpenCL::SPatchDataParameterBuffer objectId = {}; + kernelTokens.tokens.kernelArgs[0].objectId = &objectId; + objectId.Offset = 7; + + NEO::KernelDescriptor dst = {}; + NEO::populateKernelDescriptor(dst, kernelTokens, sizeof(void *)); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].is()); + EXPECT_TRUE(dst.kernelAttributes.flags.usesSamplers); + EXPECT_TRUE(dst.payloadMappings.explicitArgs[0].getExtendedTypeInfo().hasDeviceSideEnqueueExtendedDescriptor); + ASSERT_EQ(1U, dst.payloadMappings.explicitArgsExtendedDescriptors.size()); + auto argObjectId = reinterpret_cast(dst.payloadMappings.explicitArgsExtendedDescriptors[0].get()); + EXPECT_EQ(objectId.Offset, argObjectId->objectId); + } +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithInlineVmeThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + NEO::KernelDescriptor dst; + NEO::populateKernelDescriptor(dst, kernelTokens, 4); + EXPECT_FALSE(dst.kernelAttributes.flags.usesVme); + + iOpenCL::SPatchItemHeader inlineVme = {}; + kernelTokens.tokens.inlineVmeSamplerInfo = &inlineVme; + NEO::populateKernelDescriptor(dst, kernelTokens, 4); + EXPECT_TRUE(dst.kernelAttributes.flags.usesVme); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithSipDataThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + NEO::KernelDescriptor dst; + NEO::populateKernelDescriptor(dst, kernelTokens, 4); + EXPECT_EQ(0U, dst.entryPoints.systemKernel); + + iOpenCL::SPatchStateSIP sip = {}; + sip.SystemKernelOffset = 4096; + kernelTokens.tokens.stateSip = &sip; + NEO::populateKernelDescriptor(dst, kernelTokens, 4); + EXPECT_EQ(sip.SystemKernelOffset, dst.entryPoints.systemKernel); +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithDitpatchMetadataImplicitArgsThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + { + NEO::KernelDescriptor dst; + NEO::populateKernelDescriptor(dst, kernelTokens, 4); + for (uint32_t i = 0; i < 3U; ++i) { + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.dispatchTraits.localWorkSize[i])) << i; + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.dispatchTraits.localWorkSize2[i])) << i; + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.dispatchTraits.globalWorkOffset[i])) << i; + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[i])) << i; + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.dispatchTraits.globalWorkSize[i])) << i; + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.dispatchTraits.numWorkGroups[i])) << i; + } + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.dispatchTraits.workDim)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.implicitArgs.maxWorkGroupSize)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.implicitArgs.simdSize)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.implicitArgs.preferredWkgMultiple)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.implicitArgs.privateMemorySize)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.implicitArgs.localMemoryStatelessWindowSize)); + EXPECT_TRUE(NEO::isUndefinedOffset(dst.payloadMappings.implicitArgs.localMemoryStatelessWindowStartAddres)); + } + + { + iOpenCL::SPatchDataParameterBuffer localWorkSize[3] = {}; + iOpenCL::SPatchDataParameterBuffer localWorkSize2[3] = {}; + iOpenCL::SPatchDataParameterBuffer enqueuedLocalWorkSize[3] = {}; + iOpenCL::SPatchDataParameterBuffer numWorkGroups[3] = {}; + iOpenCL::SPatchDataParameterBuffer globalWorkOffset[3] = {}; + iOpenCL::SPatchDataParameterBuffer globalWorkSize[3] = {}; + iOpenCL::SPatchDataParameterBuffer maxWorkGroupSize = {}; + iOpenCL::SPatchDataParameterBuffer workDimensions = {}; + iOpenCL::SPatchDataParameterBuffer simdSize = {}; + iOpenCL::SPatchDataParameterBuffer parentEvent = {}; + iOpenCL::SPatchDataParameterBuffer privateMemoryStatelessSize = {}; + iOpenCL::SPatchDataParameterBuffer localMemoryStatelessWindowSize = {}; + iOpenCL::SPatchDataParameterBuffer localMemoryStatelessWindowStartAddress = {}; + iOpenCL::SPatchDataParameterBuffer preferredWorkgroupMultiple = {}; + localWorkSize[0].Offset = 2; + localWorkSize[1].Offset = 3; + localWorkSize[2].Offset = 5; + localWorkSize2[0].Offset = 7; + localWorkSize2[1].Offset = 11; + localWorkSize2[2].Offset = 13; + enqueuedLocalWorkSize[0].Offset = 17; + enqueuedLocalWorkSize[1].Offset = 19; + enqueuedLocalWorkSize[2].Offset = 23; + numWorkGroups[0].Offset = 23; + numWorkGroups[1].Offset = 29; + numWorkGroups[2].Offset = 31; + globalWorkOffset[0].Offset = 37; + globalWorkOffset[1].Offset = 41; + globalWorkOffset[2].Offset = 43; + globalWorkSize[0].Offset = 47; + globalWorkSize[1].Offset = 53; + globalWorkSize[2].Offset = 59; + maxWorkGroupSize.Offset = 61; + workDimensions.Offset = 67; + simdSize.Offset = 71; + parentEvent.Offset = 73; + privateMemoryStatelessSize.Offset = 79; + localMemoryStatelessWindowSize.Offset = 83; + localMemoryStatelessWindowStartAddress.Offset = 89; + preferredWorkgroupMultiple.Offset = 91; + for (uint32_t i = 0; i < 3U; ++i) { + kernelTokens.tokens.crossThreadPayloadArgs.localWorkSize[i] = &localWorkSize[i]; + kernelTokens.tokens.crossThreadPayloadArgs.localWorkSize2[i] = &localWorkSize2[i]; + kernelTokens.tokens.crossThreadPayloadArgs.globalWorkOffset[i] = &globalWorkOffset[i]; + kernelTokens.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[i] = &enqueuedLocalWorkSize[i]; + kernelTokens.tokens.crossThreadPayloadArgs.globalWorkSize[i] = &globalWorkSize[i]; + kernelTokens.tokens.crossThreadPayloadArgs.numWorkGroups[i] = &numWorkGroups[i]; + } + kernelTokens.tokens.crossThreadPayloadArgs.workDimensions = &workDimensions; + kernelTokens.tokens.crossThreadPayloadArgs.maxWorkGroupSize = &maxWorkGroupSize; + kernelTokens.tokens.crossThreadPayloadArgs.simdSize = &simdSize; + kernelTokens.tokens.crossThreadPayloadArgs.parentEvent = &parentEvent; + kernelTokens.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple = &preferredWorkgroupMultiple; + kernelTokens.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize = &privateMemoryStatelessSize; + kernelTokens.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize = &localMemoryStatelessWindowSize; + kernelTokens.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress = &localMemoryStatelessWindowStartAddress; + + NEO::KernelDescriptor dst; + NEO::populateKernelDescriptor(dst, kernelTokens, 4); + for (uint32_t i = 0; i < 3U; ++i) { + EXPECT_NE(0U, dst.payloadMappings.dispatchTraits.localWorkSize[i]) << i; + EXPECT_NE(0U, dst.payloadMappings.dispatchTraits.localWorkSize2[i]) << i; + EXPECT_NE(0U, dst.payloadMappings.dispatchTraits.globalWorkOffset[i]) << i; + EXPECT_NE(0U, dst.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[i]) << i; + EXPECT_NE(0U, dst.payloadMappings.dispatchTraits.globalWorkSize[i]) << i; + EXPECT_NE(0U, dst.payloadMappings.dispatchTraits.numWorkGroups[i]) << i; + + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.localWorkSize[i]->Offset, dst.payloadMappings.dispatchTraits.localWorkSize[i]) << i; + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.localWorkSize2[i]->Offset, dst.payloadMappings.dispatchTraits.localWorkSize2[i]) << i; + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.globalWorkOffset[i]->Offset, dst.payloadMappings.dispatchTraits.globalWorkOffset[i]) << i; + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[i]->Offset, dst.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[i]) << i; + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.globalWorkSize[i]->Offset, dst.payloadMappings.dispatchTraits.globalWorkSize[i]) << i; + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.numWorkGroups[i]->Offset, dst.payloadMappings.dispatchTraits.numWorkGroups[i]) << i; + } + EXPECT_NE(0U, dst.payloadMappings.dispatchTraits.workDim); + EXPECT_NE(0U, dst.payloadMappings.implicitArgs.maxWorkGroupSize); + EXPECT_NE(0U, dst.payloadMappings.implicitArgs.simdSize); + EXPECT_NE(0U, dst.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent); + EXPECT_NE(0U, dst.payloadMappings.implicitArgs.preferredWkgMultiple); + EXPECT_NE(0U, dst.payloadMappings.implicitArgs.privateMemorySize); + EXPECT_NE(0U, dst.payloadMappings.implicitArgs.localMemoryStatelessWindowSize); + EXPECT_NE(0U, dst.payloadMappings.implicitArgs.localMemoryStatelessWindowStartAddres); + + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.workDimensions->Offset, dst.payloadMappings.dispatchTraits.workDim); + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.maxWorkGroupSize->Offset, dst.payloadMappings.implicitArgs.maxWorkGroupSize); + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.simdSize->Offset, dst.payloadMappings.implicitArgs.simdSize); + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.parentEvent->Offset, dst.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent); + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple->Offset, dst.payloadMappings.implicitArgs.preferredWkgMultiple); + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize->Offset, dst.payloadMappings.implicitArgs.privateMemorySize); + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize->Offset, dst.payloadMappings.implicitArgs.localMemoryStatelessWindowSize); + EXPECT_EQ(kernelTokens.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress->Offset, dst.payloadMappings.implicitArgs.localMemoryStatelessWindowStartAddres); + } +} + +TEST(KernelDescriptorFromPatchtokens, GivenKernelWithChildBlocksMetadataImplicitArgsThenKernelDescriptorIsProperlyPopulated) { + std::vector storage; + NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); + NEO::KernelDescriptor dst; + NEO::populateKernelDescriptor(dst, kernelTokens, 4); + EXPECT_TRUE(dst.kernelMetadata.deviceSideEnqueueChildrenKernelsIdOffset.empty()); + + iOpenCL::SPatchDataParameterBuffer childBlocks[2] = {}; + childBlocks[0].ArgumentNumber = 0; + childBlocks[1].ArgumentNumber = 1; + childBlocks[0].Offset = 4096; + childBlocks[1].Offset = 8192; + kernelTokens.tokens.crossThreadPayloadArgs.childBlockSimdSize.push_back(&childBlocks[0]); + kernelTokens.tokens.crossThreadPayloadArgs.childBlockSimdSize.push_back(&childBlocks[1]); + NEO::populateKernelDescriptor(dst, kernelTokens, 4); + ASSERT_EQ(2U, dst.kernelMetadata.deviceSideEnqueueChildrenKernelsIdOffset.size()); + EXPECT_EQ(childBlocks[0].ArgumentNumber, dst.kernelMetadata.deviceSideEnqueueChildrenKernelsIdOffset[0].first); + EXPECT_EQ(childBlocks[1].ArgumentNumber, dst.kernelMetadata.deviceSideEnqueueChildrenKernelsIdOffset[1].first); + EXPECT_EQ(childBlocks[0].Offset, dst.kernelMetadata.deviceSideEnqueueChildrenKernelsIdOffset[0].second); + EXPECT_EQ(childBlocks[1].Offset, dst.kernelMetadata.deviceSideEnqueueChildrenKernelsIdOffset[1].second); +}