From 4808f663b6f7e2de99917774941722dc57f0a89e Mon Sep 17 00:00:00 2001 From: Krystian Chmielewski Date: Mon, 29 Mar 2021 12:34:25 +0200 Subject: [PATCH] Remove WorkloadInfo from KernelInfo Related-to: NEO-4729 Signed-off-by: Krystian Chmielewski --- .../helpers/hardware_commands_helper_base.inl | 2 +- opencl/source/kernel/kernel.cpp | 188 +++++++----------- opencl/source/program/kernel_info.cpp | 34 ++-- opencl/source/program/kernel_info.h | 23 --- .../program/kernel_info_from_patchtokens.cpp | 23 +-- .../command_queue/dispatch_walker_tests.cpp | 96 ++++----- .../kernel_reflection_surface_tests.cpp | 44 ++-- .../unit_test/kernel/kernel_slm_arg_tests.cpp | 2 +- .../unit_test/kernel/kernel_slm_tests.cpp | 28 +-- opencl/test/unit_test/kernel/kernel_tests.cpp | 38 ++-- opencl/test/unit_test/program/kernel_data.cpp | 10 +- .../unit_test/program/kernel_data_OCL2_0.cpp | 4 +- .../test/unit_test/program/program_tests.cpp | 24 +-- shared/source/program/program_info.cpp | 2 +- .../unit_test/program/program_info_tests.cpp | 2 +- 15 files changed, 214 insertions(+), 306 deletions(-) diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index 6faf4a71ba..c5ca76579b 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -46,7 +46,7 @@ size_t HardwareCommandsHelper::getSizeRequiredDSH(const Kernel &kerne totalSize += borderColorSize + additionalSizeRequiredDsh(); - DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.getKernelInfo().isVmeWorkload)); + DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.isVmeKernel())); return alignUp(totalSize, EncodeStates::alignInterfaceDescriptorData); } diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 7d51d0924e..11db6c4410 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -75,7 +75,7 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &c program->retainForKernel(); imageTransformer.reset(new ImageTransformer); maxKernelWorkGroupSize = static_cast(clDevice.getSharedDeviceInfo().maxWorkGroupSize); - slmTotalSize = kernelInfoArg.workloadInfo.slmStaticSize; + slmTotalSize = kernelInfoArg.kernelDescriptor.kernelAttributes.slmInlineSize; } Kernel::~Kernel() { @@ -190,8 +190,9 @@ cl_int Kernel::initialize() { auto &hwInfo = pClDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto &kernelDescriptor = kernelInfo.kernelDescriptor; + const auto &dispatchTraits = kernelDescriptor.payloadMappings.dispatchTraits; + const auto &implicitArgs = kernelDescriptor.payloadMappings.implicitArgs; auto maxSimdSize = kernelInfo.getMaxSimdSize(); - const auto &workloadInfo = kernelInfo.workloadInfo; const auto &heapInfo = kernelInfo.heapInfo; if (maxSimdSize != 1 && maxSimdSize < hwHelper.getMinimalSIMDSize()) { @@ -212,106 +213,71 @@ cl_int Kernel::initialize() { } auto crossThread = reinterpret_cast(crossThreadData); - globalWorkOffsetX = workloadInfo.globalWorkOffsetOffsets[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[0]) - : globalWorkOffsetX; - globalWorkOffsetY = workloadInfo.globalWorkOffsetOffsets[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[1]) - : globalWorkOffsetY; - globalWorkOffsetZ = workloadInfo.globalWorkOffsetOffsets[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[2]) - : globalWorkOffsetZ; + auto setDispatchTraitsIfValidOffset = [&](uint32_t *&crossThreadData, NEO::CrossThreadDataOffset offset) { + if (isValidOffset(offset)) { + crossThreadData = ptrOffset(crossThread, offset); + } + }; + setDispatchTraitsIfValidOffset(globalWorkOffsetX, dispatchTraits.globalWorkOffset[0]); + setDispatchTraitsIfValidOffset(globalWorkOffsetY, dispatchTraits.globalWorkOffset[1]); + setDispatchTraitsIfValidOffset(globalWorkOffsetZ, dispatchTraits.globalWorkOffset[2]); - localWorkSizeX = workloadInfo.localWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[0]) - : localWorkSizeX; - localWorkSizeY = workloadInfo.localWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[1]) - : localWorkSizeY; - localWorkSizeZ = workloadInfo.localWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[2]) - : localWorkSizeZ; + setDispatchTraitsIfValidOffset(localWorkSizeX, dispatchTraits.localWorkSize[0]); + setDispatchTraitsIfValidOffset(localWorkSizeY, dispatchTraits.localWorkSize[1]); + setDispatchTraitsIfValidOffset(localWorkSizeZ, dispatchTraits.localWorkSize[2]); - localWorkSizeX2 = workloadInfo.localWorkSizeOffsets2[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[0]) - : localWorkSizeX2; - localWorkSizeY2 = workloadInfo.localWorkSizeOffsets2[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[1]) - : localWorkSizeY2; - localWorkSizeZ2 = workloadInfo.localWorkSizeOffsets2[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[2]) - : localWorkSizeZ2; + setDispatchTraitsIfValidOffset(localWorkSizeX2, dispatchTraits.localWorkSize2[0]); + setDispatchTraitsIfValidOffset(localWorkSizeY2, dispatchTraits.localWorkSize2[1]); + setDispatchTraitsIfValidOffset(localWorkSizeZ2, dispatchTraits.localWorkSize2[2]); - globalWorkSizeX = workloadInfo.globalWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[0]) - : globalWorkSizeX; - globalWorkSizeY = workloadInfo.globalWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[1]) - : globalWorkSizeY; - globalWorkSizeZ = workloadInfo.globalWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[2]) - : globalWorkSizeZ; + setDispatchTraitsIfValidOffset(globalWorkSizeX, dispatchTraits.globalWorkSize[0]); + setDispatchTraitsIfValidOffset(globalWorkSizeY, dispatchTraits.globalWorkSize[1]); + setDispatchTraitsIfValidOffset(globalWorkSizeZ, dispatchTraits.globalWorkSize[2]); - enqueuedLocalWorkSizeX = workloadInfo.enqueuedLocalWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[0]) - : enqueuedLocalWorkSizeX; - enqueuedLocalWorkSizeY = workloadInfo.enqueuedLocalWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[1]) - : enqueuedLocalWorkSizeY; - enqueuedLocalWorkSizeZ = workloadInfo.enqueuedLocalWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[2]) - : enqueuedLocalWorkSizeZ; + setDispatchTraitsIfValidOffset(globalWorkOffsetX, dispatchTraits.globalWorkOffset[0]); + setDispatchTraitsIfValidOffset(globalWorkOffsetY, dispatchTraits.globalWorkOffset[1]); + setDispatchTraitsIfValidOffset(globalWorkOffsetZ, dispatchTraits.globalWorkOffset[2]); - numWorkGroupsX = workloadInfo.numWorkGroupsOffset[0] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[0]) - : numWorkGroupsX; - numWorkGroupsY = workloadInfo.numWorkGroupsOffset[1] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[1]) - : numWorkGroupsY; - numWorkGroupsZ = workloadInfo.numWorkGroupsOffset[2] != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[2]) - : numWorkGroupsZ; + setDispatchTraitsIfValidOffset(enqueuedLocalWorkSizeX, dispatchTraits.enqueuedLocalWorkSize[0]); + setDispatchTraitsIfValidOffset(enqueuedLocalWorkSizeY, dispatchTraits.enqueuedLocalWorkSize[1]); + setDispatchTraitsIfValidOffset(enqueuedLocalWorkSizeZ, dispatchTraits.enqueuedLocalWorkSize[2]); - maxWorkGroupSizeForCrossThreadData = workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.maxWorkGroupSizeOffset) - : maxWorkGroupSizeForCrossThreadData; - workDim = workloadInfo.workDimOffset != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.workDimOffset) - : workDim; - dataParameterSimdSize = workloadInfo.simdSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.simdSizeOffset) : dataParameterSimdSize; - parentEventOffset = workloadInfo.parentEventOffset != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.parentEventOffset) - : parentEventOffset; - preferredWkgMultipleOffset = workloadInfo.preferredWkgMultipleOffset != WorkloadInfo::undefinedOffset - ? ptrOffset(crossThread, workloadInfo.preferredWkgMultipleOffset) - : preferredWkgMultipleOffset; + setDispatchTraitsIfValidOffset(numWorkGroupsX, dispatchTraits.numWorkGroups[0]); + setDispatchTraitsIfValidOffset(numWorkGroupsY, dispatchTraits.numWorkGroups[1]); + setDispatchTraitsIfValidOffset(numWorkGroupsZ, dispatchTraits.numWorkGroups[2]); - *maxWorkGroupSizeForCrossThreadData = maxKernelWorkGroupSize; - *dataParameterSimdSize = maxSimdSize; - *preferredWkgMultipleOffset = maxSimdSize; - *parentEventOffset = WorkloadInfo::invalidParentEvent; + setDispatchTraitsIfValidOffset(workDim, dispatchTraits.workDim); + + auto setArgsIfValidOffset = [&](uint32_t *&crossThreadData, NEO::CrossThreadDataOffset offset, uint32_t value) { + if (isValidOffset(offset)) { + crossThreadData = ptrOffset(crossThread, offset); + *crossThreadData = value; + } + }; + setArgsIfValidOffset(maxWorkGroupSizeForCrossThreadData, implicitArgs.maxWorkGroupSize, maxKernelWorkGroupSize); + setArgsIfValidOffset(dataParameterSimdSize, implicitArgs.simdSize, maxSimdSize); + setArgsIfValidOffset(preferredWkgMultipleOffset, implicitArgs.preferredWkgMultiple, maxSimdSize); + setArgsIfValidOffset(parentEventOffset, implicitArgs.deviceSideEnqueueParentEvent, undefined); } // allocate our own SSH, if necessary sshLocalSize = heapInfo.SurfaceStateHeapSize; - if (sshLocalSize) { pSshLocal = std::make_unique(sshLocalSize); // copy the ssh into our local copy memcpy_s(pSshLocal.get(), sshLocalSize, - heapInfo.pSsh, sshLocalSize); + heapInfo.pSsh, heapInfo.SurfaceStateHeapSize); } numberOfBindingTableStates = kernelDescriptor.payloadMappings.bindingTable.numEntries; localBindingTableOffset = kernelDescriptor.payloadMappings.bindingTable.tableOffset; // patch crossthread data and ssh with inline surfaces, if necessary auto perHwThreadPrivateMemorySize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; - if (perHwThreadPrivateMemorySize) { privateSurfaceSize = KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch); - DEBUG_BREAK_IF(privateSurfaceSize == 0); + if (privateSurfaceSize > std::numeric_limits::max()) { return CL_OUT_OF_RESOURCES; } @@ -323,9 +289,11 @@ cl_int Kernel::initialize() { if (privateSurface == nullptr) { return CL_OUT_OF_RESOURCES; } - const auto &patch = kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress; - patchWithImplicitSurface(reinterpret_cast(privateSurface->getGpuAddressToPatch()), *privateSurface, patch); + + const auto &privateMemoryAddress = kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress; + patchWithImplicitSurface(reinterpret_cast(privateSurface->getGpuAddressToPatch()), *privateSurface, privateMemoryAddress); } + if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) { DEBUG_BREAK_IF(program->getConstantSurface(rootDeviceIndex) == nullptr); uintptr_t constMemory = isBuiltIn ? (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch(); @@ -342,6 +310,7 @@ cl_int Kernel::initialize() { patchWithImplicitSurface(reinterpret_cast(globalMemory), *program->getGlobalSurface(rootDeviceIndex), arg); } + // Patch Surface State Heap bool useGlobalAtomics = kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics; if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)) { @@ -1399,7 +1368,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndexIn, ++argIndex; } - slmTotalSize = kernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB); + slmTotalSize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize + alignUp(slmOffset, KB); return CL_SUCCESS; } @@ -2107,41 +2076,32 @@ void Kernel::ReflectionSurfaceHelper::getCurbeParams(std::vector(i * sizeof(uint32_t))}); + tokenMask |= shiftLeftBy(parameterType); + } + }; + emplaceIfValidOffsetAndShiftTokenMask(DATA_PARAMETER_LOCAL_WORK_SIZE, dispatchTraits.localWorkSize[i]); + emplaceIfValidOffsetAndShiftTokenMask(DATA_PARAMETER_LOCAL_WORK_SIZE, dispatchTraits.localWorkSize2[i]); + emplaceIfValidOffsetAndShiftTokenMask(DATA_PARAMETER_GLOBAL_WORK_OFFSET, dispatchTraits.globalWorkOffset[i]); + emplaceIfValidOffsetAndShiftTokenMask(DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE, dispatchTraits.enqueuedLocalWorkSize[i]); + emplaceIfValidOffsetAndShiftTokenMask(DATA_PARAMETER_GLOBAL_WORK_SIZE, dispatchTraits.globalWorkSize[i]); + emplaceIfValidOffsetAndShiftTokenMask(DATA_PARAMETER_NUM_WORK_GROUPS, dispatchTraits.numWorkGroups[i]); } - if (kernelInfo.workloadInfo.parentEventOffset != WorkloadInfo::undefinedOffset) { - curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_PARENT_EVENT, sizeof(uint32_t), kernelInfo.workloadInfo.parentEventOffset, 0}); - tokenMask |= shiftLeftBy(DATA_PARAMETER_PARENT_EVENT); - } - if (kernelInfo.workloadInfo.workDimOffset != WorkloadInfo::undefinedOffset) { - curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_WORK_DIMENSIONS, sizeof(uint32_t), kernelInfo.workloadInfo.workDimOffset, 0}); - tokenMask |= shiftLeftBy(DATA_PARAMETER_WORK_DIMENSIONS); + { + const auto &payloadMappings = kernelInfo.kernelDescriptor.payloadMappings; + auto emplaceIfValidOffsetAndShiftTokenMask = [&](uint parameterType, NEO::CrossThreadDataOffset offset) { + if (isValidOffset(offset)) { + curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{parameterType, sizeof(uint32_t), offset, 0}); + tokenMask |= shiftLeftBy(parameterType); + } + }; + emplaceIfValidOffsetAndShiftTokenMask(DATA_PARAMETER_PARENT_EVENT, payloadMappings.implicitArgs.deviceSideEnqueueParentEvent); + emplaceIfValidOffsetAndShiftTokenMask(DATA_PARAMETER_WORK_DIMENSIONS, payloadMappings.dispatchTraits.workDim); } std::sort(curbeParamsOut.begin(), curbeParamsOut.end(), compareFunction); @@ -2170,7 +2130,7 @@ uint32_t Kernel::ReflectionSurfaceHelper::setKernelData(void *reflectionSurface, kernelData->m_RequiredWkgSizes[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; kernelData->m_RequiredWkgSizes[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]; kernelData->m_RequiredWkgSizes[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]; - kernelData->m_InilineSLMSize = kernelInfo.workloadInfo.slmStaticSize; + kernelData->m_InilineSLMSize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize; bool localIdRequired = false; if (kernelInfo.kernelDescriptor.kernelAttributes.flags.usesFlattenedLocalIds || (kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels > 0)) { diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index d735de3e02..03ccb0a303 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -359,29 +359,19 @@ void KernelInfo::apply(const DeviceInfoKernelPayloadConstants &constants) { return; } - uint32_t privateMemoryStatelessSizeOffset = this->workloadInfo.privateMemoryStatelessSizeOffset; - uint32_t localMemoryStatelessWindowSizeOffset = this->workloadInfo.localMemoryStatelessWindowSizeOffset; - uint32_t localMemoryStatelessWindowStartAddressOffset = this->workloadInfo.localMemoryStatelessWindowStartAddressOffset; + const auto &implicitArgs = kernelDescriptor.payloadMappings.implicitArgs; + const auto privateMemorySize = static_cast(KernelHelper::getPrivateSurfaceSize(kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize, + constants.computeUnitsUsedForScratch)); - if (localMemoryStatelessWindowStartAddressOffset != WorkloadInfo::undefinedOffset) { - *(uintptr_t *)&(this->crossThreadData[localMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast(constants.slmWindow); - } - - if (localMemoryStatelessWindowSizeOffset != WorkloadInfo::undefinedOffset) { - *(uint32_t *)&(this->crossThreadData[localMemoryStatelessWindowSizeOffset]) = constants.slmWindowSize; - } - - auto perHwThreadSize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; - uint32_t privateMemorySize = static_cast(KernelHelper::getPrivateSurfaceSize(perHwThreadSize, - constants.computeUnitsUsedForScratch)); - - if (privateMemoryStatelessSizeOffset != WorkloadInfo::undefinedOffset) { - *(uint32_t *)&(this->crossThreadData[privateMemoryStatelessSizeOffset]) = privateMemorySize; - } - - if (this->workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) { - *(uint32_t *)&(this->crossThreadData[this->workloadInfo.maxWorkGroupSizeOffset]) = constants.maxWorkGroupSize; - } + auto setIfValidOffset = [&](auto value, NEO::CrossThreadDataOffset offset) { + if (isValidOffset(offset)) { + *ptrOffset(reinterpret_cast(crossThreadData), offset) = value; + } + }; + setIfValidOffset(reinterpret_cast(constants.slmWindow), implicitArgs.localMemoryStatelessWindowStartAddres); + setIfValidOffset(constants.slmWindowSize, implicitArgs.localMemoryStatelessWindowSize); + setIfValidOffset(privateMemorySize, implicitArgs.privateMemorySize); + setIfValidOffset(constants.maxWorkGroupSize, implicitArgs.maxWorkGroupSize); } std::string concatenateKernelNames(ArrayRef kernelInfos) { diff --git a/opencl/source/program/kernel_info.h b/opencl/source/program/kernel_info.h index ef343dfeaf..4ffd115144 100644 --- a/opencl/source/program/kernel_info.h +++ b/opencl/source/program/kernel_info.h @@ -43,31 +43,9 @@ extern bool useKernelDescriptor; extern std::map typeSizeMap; -struct WorkloadInfo { - enum : uint32_t { undefinedOffset = std::numeric_limits::max() }; - enum : uint32_t { invalidParentEvent = std::numeric_limits::max() }; - - uint32_t globalWorkOffsetOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; - uint32_t globalWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; - uint32_t localWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; - uint32_t localWorkSizeOffsets2[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; - uint32_t enqueuedLocalWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; - uint32_t numWorkGroupsOffset[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; - uint32_t maxWorkGroupSizeOffset = undefinedOffset; - uint32_t workDimOffset = undefinedOffset; - uint32_t slmStaticSize = 0; - uint32_t simdSizeOffset = undefinedOffset; - uint32_t parentEventOffset = undefinedOffset; - uint32_t preferredWkgMultipleOffset = undefinedOffset; - uint32_t privateMemoryStatelessSizeOffset = undefinedOffset; - uint32_t localMemoryStatelessWindowSizeOffset = undefinedOffset; - uint32_t localMemoryStatelessWindowStartAddressOffset = undefinedOffset; -}; - static const float YTilingRatioValue = 1.3862943611198906188344642429164f; struct WorkSizeInfo { - uint32_t maxWorkGroupSize; uint32_t minWorkGroupSize; bool hasBarriers; @@ -168,7 +146,6 @@ struct KernelInfo { PatchInfo patchInfo = {}; std::vector kernelArgInfo; std::vector kernelNonArgInfo; - WorkloadInfo workloadInfo = {}; std::vector> childrenKernelsIdOffset; bool usesSsh = false; bool requiresSshForBuffers = false; diff --git a/opencl/source/program/kernel_info_from_patchtokens.cpp b/opencl/source/program/kernel_info_from_patchtokens.cpp index d5e93c3ccf..2a1b0f39a0 100644 --- a/opencl/source/program/kernel_info_from_patchtokens.cpp +++ b/opencl/source/program/kernel_info_from_patchtokens.cpp @@ -24,15 +24,13 @@ inline void storeTokenIfNotNull(KernelInfo &kernelInfo, T *token) { kernelInfo.storePatchToken(token); } } - template inline uint32_t getOffset(T *token) { if (token != nullptr) { return token->Offset; } - return WorkloadInfo::undefinedOffset; + return undefined; } - void populateKernelInfoArgMetadata(KernelInfo &dstKernelInfoArg, const SPatchKernelArgumentInfo *src) { if (nullptr == src) { return; @@ -158,7 +156,7 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch storeTokenIfNotNull(dst, src.tokens.executionEnvironment); dst.usesSsh = src.tokens.bindingTableState && (src.tokens.bindingTableState->Count > 0); - dst.workloadInfo.slmStaticSize = src.tokens.allocateLocalSurface ? src.tokens.allocateLocalSurface->TotalInlineLocalMemorySize : 0U; + dst.kernelDescriptor.kernelAttributes.slmInlineSize = src.tokens.allocateLocalSurface ? src.tokens.allocateLocalSurface->TotalInlineLocalMemorySize : 0U; dst.kernelArgInfo.resize(src.tokens.kernelArgs.size()); @@ -178,23 +176,6 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch dst.isVmeWorkload = dst.isVmeWorkload || (src.tokens.inlineVmeSamplerInfo != nullptr); dst.systemKernelOffset = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U; - for (uint32_t i = 0; i < 3U; ++i) { - dst.workloadInfo.localWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize[i]); - dst.workloadInfo.localWorkSizeOffsets2[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize2[i]); - dst.workloadInfo.globalWorkOffsetOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkOffset[i]); - dst.workloadInfo.enqueuedLocalWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[i]); - dst.workloadInfo.globalWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkSize[i]); - dst.workloadInfo.numWorkGroupsOffset[i] = getOffset(src.tokens.crossThreadPayloadArgs.numWorkGroups[i]); - } - - dst.workloadInfo.maxWorkGroupSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.maxWorkGroupSize); - dst.workloadInfo.workDimOffset = getOffset(src.tokens.crossThreadPayloadArgs.workDimensions); - dst.workloadInfo.simdSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.simdSize); - dst.workloadInfo.parentEventOffset = getOffset(src.tokens.crossThreadPayloadArgs.parentEvent); - dst.workloadInfo.preferredWkgMultipleOffset = getOffset(src.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple); - dst.workloadInfo.privateMemoryStatelessSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize); - dst.workloadInfo.localMemoryStatelessWindowSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize); - dst.workloadInfo.localMemoryStatelessWindowStartAddressOffset = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress); for (auto &childSimdSize : src.tokens.crossThreadPayloadArgs.childBlockSimdSize) { dst.childrenKernelsIdOffset.push_back({childSimdSize->ArgumentNumber, childSimdSize->Offset}); } diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index 957d09e983..62082089d6 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -249,7 +249,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDis HWTEST_F(DispatchWalkerTest, GivenDefaultLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.workDimOffset = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -282,7 +282,7 @@ HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDi DebugManager.flags.EnableComputeWorkSizeND.set(false); DebugManager.flags.EnableComputeWorkSizeSquared.set(true); MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.workDimOffset = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -312,7 +312,7 @@ HWTEST_F(DispatchWalkerTest, GivenNdLwsAlgorithmWhenDispatchingWalkerThenDimensi DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(true); MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.workDimOffset = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -343,7 +343,7 @@ HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimens DebugManager.flags.EnableComputeWorkSizeND.set(false); DebugManager.flags.EnableComputeWorkSizeSquared.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.workDimOffset = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -371,9 +371,9 @@ HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimens HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkGroupsIsCorrectlySet) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.numWorkGroupsOffset[0] = 0; - kernelInfo.workloadInfo.numWorkGroupsOffset[1] = 4; - kernelInfo.workloadInfo.numWorkGroupsOffset[2] = 8; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = 4; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -403,9 +403,9 @@ HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkG } HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGlobalWorkOffsetIsCorrectlySet) { - kernelInfo.workloadInfo.globalWorkOffsetOffsets[0] = 0u; - kernelInfo.workloadInfo.globalWorkOffsetOffsets[1] = 4u; - kernelInfo.workloadInfo.globalWorkOffsetOffsets[2] = 8u; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[0] = 0u; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[1] = 4u; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[2] = 8u; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -439,9 +439,9 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatch DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; - kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; - kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -471,9 +471,9 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThe DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(true); MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; - kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; - kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -504,9 +504,9 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatch DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; - kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; - kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -537,9 +537,9 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffW DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; - kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; - kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -567,9 +567,9 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffW HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; - kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; - kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -598,12 +598,12 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsC HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; - kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; - kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; - kernelInfo.workloadInfo.localWorkSizeOffsets2[0] = 12; - kernelInfo.workloadInfo.localWorkSizeOffsets2[1] = 16; - kernelInfo.workloadInfo.localWorkSizeOffsets2[2] = 20; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[0] = 12; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1] = 16; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[2] = 20; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -635,15 +635,15 @@ HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLw HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel1(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; - kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; - kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); - kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[0] = 12; - kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[1] = 16; - kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[2] = 20; + kernelInfoWithSampler.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 12; + kernelInfoWithSampler.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 16; + kernelInfoWithSampler.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 20; ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); DispatchInfo di1(pClDevice, &kernel1, 3, {10, 10, 10}, {1, 2, 3}, {0, 0, 0}); @@ -686,15 +686,15 @@ HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorre HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel1(program.get(), kernelInfo, *pClDevice); MockKernel mainKernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; - kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; - kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; - kernelInfo.workloadInfo.localWorkSizeOffsets2[0] = 12; - kernelInfo.workloadInfo.localWorkSizeOffsets2[1] = 16; - kernelInfo.workloadInfo.localWorkSizeOffsets2[2] = 20; - kernelInfo.workloadInfo.numWorkGroupsOffset[0] = 24; - kernelInfo.workloadInfo.numWorkGroupsOffset[1] = 28; - kernelInfo.workloadInfo.numWorkGroupsOffset[2] = 32; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[0] = 12; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1] = 16; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[2] = 20; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 24; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = 28; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = 32; ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); ASSERT_EQ(CL_SUCCESS, mainKernel.initialize()); @@ -1329,7 +1329,7 @@ HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenAuxToNonAuxWhenTran auto &builder = static_cast &>(baseBuilder); MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.workDimOffset = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &cmdStream = pCmdQ->getCS(0); @@ -1383,7 +1383,7 @@ HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenNonAuxToAuxWhenTran auto &builder = static_cast &>(baseBuilder); MockKernel kernel(program.get(), kernelInfo, *pClDevice); - kernelInfo.workloadInfo.workDimOffset = 0; + kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &cmdStream = pCmdQ->getCS(0); diff --git a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp index 973e91212e..1a6b26a1a8 100644 --- a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp @@ -890,34 +890,34 @@ TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithExecutionParametersWhenPa const uint32_t globalOffsetOffsets[3] = {52, 56, 60}; const uint32_t enqueuedLocalWorkSizeOffsets[3] = {64, 68, 72}; - info.workloadInfo.localWorkSizeOffsets[0] = lwsOffsets[0]; - info.workloadInfo.localWorkSizeOffsets[1] = lwsOffsets[1]; - info.workloadInfo.localWorkSizeOffsets[2] = lwsOffsets[2]; + info.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = lwsOffsets[0]; + info.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = lwsOffsets[1]; + info.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = lwsOffsets[2]; - info.workloadInfo.localWorkSizeOffsets2[0] = lwsOffsets2[0]; - info.workloadInfo.localWorkSizeOffsets2[1] = lwsOffsets2[1]; - info.workloadInfo.localWorkSizeOffsets2[2] = lwsOffsets2[2]; + info.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[0] = lwsOffsets2[0]; + info.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1] = lwsOffsets2[1]; + info.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[2] = lwsOffsets2[2]; - info.workloadInfo.globalWorkSizeOffsets[0] = gwsOffsets[0]; - info.workloadInfo.globalWorkSizeOffsets[1] = gwsOffsets[1]; - info.workloadInfo.globalWorkSizeOffsets[2] = gwsOffsets[2]; + info.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = gwsOffsets[0]; + info.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[1] = gwsOffsets[1]; + info.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[2] = gwsOffsets[2]; - info.workloadInfo.numWorkGroupsOffset[0] = numOffsets[0]; - info.workloadInfo.numWorkGroupsOffset[1] = numOffsets[1]; - info.workloadInfo.numWorkGroupsOffset[2] = numOffsets[2]; + info.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = numOffsets[0]; + info.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = numOffsets[1]; + info.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = numOffsets[2]; - info.workloadInfo.globalWorkOffsetOffsets[0] = globalOffsetOffsets[0]; - info.workloadInfo.globalWorkOffsetOffsets[1] = globalOffsetOffsets[1]; - info.workloadInfo.globalWorkOffsetOffsets[2] = globalOffsetOffsets[2]; + info.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[0] = globalOffsetOffsets[0]; + info.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[1] = globalOffsetOffsets[1]; + info.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[2] = globalOffsetOffsets[2]; - info.workloadInfo.enqueuedLocalWorkSizeOffsets[0] = enqueuedLocalWorkSizeOffsets[0]; - info.workloadInfo.enqueuedLocalWorkSizeOffsets[1] = enqueuedLocalWorkSizeOffsets[1]; - info.workloadInfo.enqueuedLocalWorkSizeOffsets[2] = enqueuedLocalWorkSizeOffsets[2]; + info.kernelDescriptor.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[0] = enqueuedLocalWorkSizeOffsets[0]; + info.kernelDescriptor.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[1] = enqueuedLocalWorkSizeOffsets[1]; + info.kernelDescriptor.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[2] = enqueuedLocalWorkSizeOffsets[2]; - info.workloadInfo.workDimOffset = workDimOffset; + info.kernelDescriptor.payloadMappings.dispatchTraits.workDim = workDimOffset; // NUM_HARDWARE_THREADS unsupported EXPECT_TRUE(numHwThreads > 0u); - info.workloadInfo.parentEventOffset = parentEventOffset; + info.kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent = parentEventOffset; std::vector curbeParams; uint64_t tokenMask = 0; @@ -1247,7 +1247,7 @@ class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParamm_RequiredWkgSizes[0]); EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1], kernelData->m_RequiredWkgSizes[1]); EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2], kernelData->m_RequiredWkgSizes[2]); - EXPECT_EQ(info.workloadInfo.slmStaticSize, kernelData->m_InilineSLMSize); + EXPECT_EQ(info.kernelDescriptor.kernelAttributes.slmInlineSize, kernelData->m_InilineSLMSize); if (localIDPresent.flattend || localIDPresent.x || localIDPresent.y || localIDPresent.z) EXPECT_EQ(1u, kernelData->m_NeedLocalIDS); diff --git a/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp index 177d0027b5..f7beb6a40a 100644 --- a/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp @@ -49,7 +49,7 @@ class KernelSlmArgTest : public MultiRootDeviceWithSubDevicesFixture { pKernelInfo[rootDeviceIndex]->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo[rootDeviceIndex]->kernelArgInfo[2].slmAlignment = 0x400; pKernelInfo[rootDeviceIndex]->kernelArgInfo[2].metadata.addressQualifier = KernelArgMetadata::AddrLocal; - pKernelInfo[rootDeviceIndex]->workloadInfo.slmStaticSize = 3 * KB; + pKernelInfo[rootDeviceIndex]->kernelDescriptor.kernelAttributes.slmInlineSize = 3 * KB; kernelInfos[rootDeviceIndex] = pKernelInfo[rootDeviceIndex].get(); } diff --git a/opencl/test/unit_test/kernel/kernel_slm_tests.cpp b/opencl/test/unit_test/kernel/kernel_slm_tests.cpp index e663e7a21e..e2b36babb7 100644 --- a/opencl/test/unit_test/kernel/kernel_slm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_slm_tests.cpp @@ -67,7 +67,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgr // define kernel info kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1; - kernelInfo.workloadInfo.slmStaticSize = GetParam() * KB; + kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = GetParam() * KB; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -101,32 +101,32 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgr uint32_t ExpectedSLMSize = 0; if (::renderCoreFamily == IGFX_GEN8_CORE) { - if (kernelInfo.workloadInfo.slmStaticSize <= (4 * 1024)) { + if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (4 * 1024)) { ExpectedSLMSize = 1; - } else if (kernelInfo.workloadInfo.slmStaticSize <= (8 * 1024)) { + } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (8 * 1024)) { ExpectedSLMSize = 2; - } else if (kernelInfo.workloadInfo.slmStaticSize <= (16 * 1024)) { + } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (16 * 1024)) { ExpectedSLMSize = 4; - } else if (kernelInfo.workloadInfo.slmStaticSize <= (32 * 1024)) { + } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (32 * 1024)) { ExpectedSLMSize = 8; - } else if (kernelInfo.workloadInfo.slmStaticSize <= (64 * 1024)) { + } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (64 * 1024)) { ExpectedSLMSize = 16; } } else { - if (kernelInfo.workloadInfo.slmStaticSize <= (1 * 1024)) // its a power of "2" +1 for example 1 is 2^0 ( 0+1); 2 is 2^1 is (1+1) etc. + if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (1 * 1024)) // its a power of "2" +1 for example 1 is 2^0 ( 0+1); 2 is 2^1 is (1+1) etc. { ExpectedSLMSize = 1; - } else if (kernelInfo.workloadInfo.slmStaticSize <= (2 * 1024)) { + } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (2 * 1024)) { ExpectedSLMSize = 2; - } else if (kernelInfo.workloadInfo.slmStaticSize <= (4 * 1024)) { + } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (4 * 1024)) { ExpectedSLMSize = 3; - } else if (kernelInfo.workloadInfo.slmStaticSize <= (8 * 1024)) { + } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (8 * 1024)) { ExpectedSLMSize = 4; - } else if (kernelInfo.workloadInfo.slmStaticSize <= (16 * 1024)) { + } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (16 * 1024)) { ExpectedSLMSize = 5; - } else if (kernelInfo.workloadInfo.slmStaticSize <= (32 * 1024)) { + } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (32 * 1024)) { ExpectedSLMSize = 6; - } else if (kernelInfo.workloadInfo.slmStaticSize <= (64 * 1024)) { + } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (64 * 1024)) { ExpectedSLMSize = 7; } } @@ -154,7 +154,7 @@ HWTEST_F(KernelSLMAndBarrierTest, GivenInterfaceDescriptorProgrammedWhenOverride DebugManagerStateRestore dbgRestore; DebugManager.flags.OverrideSlmAllocationSize.set(expectedSlmSize); - kernelInfo.workloadInfo.slmStaticSize = 0; + kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = 0; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 8a02aeeee4..fd59690809 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -2476,7 +2476,7 @@ struct KernelCrossThreadTests : Test { TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkOffsetIsCorrect) { - pKernelInfo->workloadInfo.globalWorkOffsetOffsets[1] = 4; + pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[1] = 4; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -2489,7 +2489,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkOffsetIsCorr TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSizeIsCorrect) { - pKernelInfo->workloadInfo.localWorkSizeOffsets[0] = 0xc; + pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0xc; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -2502,7 +2502,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSizeIsCorrect TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSize2IsCorrect) { - pKernelInfo->workloadInfo.localWorkSizeOffsets2[1] = 0xd; + pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1] = 0xd; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -2515,7 +2515,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSize2IsCorrec TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkSizeIsCorrect) { - pKernelInfo->workloadInfo.globalWorkSizeOffsets[2] = 8; + pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[2] = 8; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -2528,7 +2528,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkSizeIsCorrec TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkDimIsCorrect) { - pKernelInfo->workloadInfo.workDimOffset = 12; + pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.workDim = 12; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -2539,9 +2539,9 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkDimIsCorrect) TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenNumWorkGroupsIsCorrect) { - pKernelInfo->workloadInfo.numWorkGroupsOffset[0] = 0 * sizeof(uint32_t); - pKernelInfo->workloadInfo.numWorkGroupsOffset[1] = 1 * sizeof(uint32_t); - pKernelInfo->workloadInfo.numWorkGroupsOffset[2] = 2 * sizeof(uint32_t); + pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 0 * sizeof(uint32_t); + pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = 1 * sizeof(uint32_t); + pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = 2 * sizeof(uint32_t); MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -2556,7 +2556,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenNumWorkGroupsIsCorrect TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeIsCorrect) { - pKernelInfo->workloadInfo.enqueuedLocalWorkSizeOffsets[0] = 0; + pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[0] = 0; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -2568,39 +2568,39 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeI } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSizeIsCorrect) { - pKernelInfo->workloadInfo.maxWorkGroupSizeOffset = 12; + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.maxWorkGroupSize = 12; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.maxWorkGroupSizeForCrossThreadData); - EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.maxWorkGroupSizeOffset), static_cast(kernel.maxWorkGroupSizeForCrossThreadData)); + EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.maxWorkGroupSize), static_cast(kernel.maxWorkGroupSizeForCrossThreadData)); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.maxKernelWorkGroupSize); } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenDataParameterSimdSizeIsCorrect) { - pKernelInfo->workloadInfo.simdSizeOffset = 16; + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.simdSize = 16; pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 16; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.dataParameterSimdSize); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.dataParameterSimdSize); - EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.simdSizeOffset), static_cast(kernel.dataParameterSimdSize)); + EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.simdSize), static_cast(kernel.dataParameterSimdSize)); EXPECT_EQ_VAL(pKernelInfo->getMaxSimdSize(), *kernel.dataParameterSimdSize); } -TEST_F(KernelCrossThreadTests, GivenParentEventOffsetWhenKernelIsInitializedThenParentEventIsInitiatedWithInvalid) { - pKernelInfo->workloadInfo.parentEventOffset = 16; +TEST_F(KernelCrossThreadTests, GivenParentEventOffsetWhenKernelIsInitializedThenParentEventIsInitiatedWithUndefined) { + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent = 16; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.parentEventOffset); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.parentEventOffset); - EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.parentEventOffset), static_cast(kernel.parentEventOffset)); - EXPECT_EQ(WorkloadInfo::invalidParentEvent, *kernel.parentEventOffset); + EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent), static_cast(kernel.parentEventOffset)); + EXPECT_EQ(undefined, *kernel.parentEventOffset); } TEST_F(KernelCrossThreadTests, WhenAddingKernelThenProgramRefCountIsIncremented) { @@ -2617,7 +2617,7 @@ TEST_F(KernelCrossThreadTests, WhenAddingKernelThenProgramRefCountIsIncremented) TEST_F(KernelCrossThreadTests, GivenSlmStatisSizeWhenCreatingKernelThenSlmTotalSizeIsSet) { - pKernelInfo->workloadInfo.slmStaticSize = 1024; + pKernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize = 1024; MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); @@ -2650,7 +2650,7 @@ TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCu TEST_F(KernelCrossThreadTests, givenKernelWithPreferredWkgMultipleWhenItIsCreatedThenCurbeIsPatchedProperly) { - pKernelInfo->workloadInfo.preferredWkgMultipleOffset = 8; + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.preferredWkgMultiple = 8; MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); kernel->initialize(); diff --git a/opencl/test/unit_test/program/kernel_data.cpp b/opencl/test/unit_test/program/kernel_data.cpp index 6ad78ce4d8..43bfae06b8 100644 --- a/opencl/test/unit_test/program/kernel_data.cpp +++ b/opencl/test/unit_test/program/kernel_data.cpp @@ -857,7 +857,7 @@ TEST_F(KernelDataTest, GivenDataParameterWorkDimensionsWhenBuildingThenProgramIs EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); - EXPECT_EQ(offsetWorkDim, pKernelInfo->workloadInfo.workDimOffset); + EXPECT_EQ(offsetWorkDim, pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.workDim); } TEST_F(KernelDataTest, GivenDataParameterSimdSizeWhenBuildingThenProgramIsCorrect) { @@ -884,7 +884,7 @@ TEST_F(KernelDataTest, GivenDataParameterSimdSizeWhenBuildingThenProgramIsCorrec EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); - EXPECT_EQ(offsetSimdSize, pKernelInfo->workloadInfo.simdSizeOffset); + EXPECT_EQ(offsetSimdSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.simdSize); } TEST_F(KernelDataTest, GivenParameterPrivateMemoryStatelessSizeWhenBuildingThenProgramIsCorrect) { @@ -986,7 +986,7 @@ TEST_F(KernelDataTest, GivenDataParameterNumWorkGroupsWhenBuildingThenProgramIsC EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); - EXPECT_EQ(offsetNumWorkGroups[argumentNumber], pKernelInfo->workloadInfo.numWorkGroupsOffset[argumentNumber]); + EXPECT_EQ(offsetNumWorkGroups[argumentNumber], pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[argumentNumber]); } TEST_F(KernelDataTest, GivenDataParameterMaxWorkgroupSizeWhenBuildingThenProgramIsCorrect) { @@ -1013,7 +1013,7 @@ TEST_F(KernelDataTest, GivenDataParameterMaxWorkgroupSizeWhenBuildingThenProgram EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); - EXPECT_EQ(offsetMaxWorkGroupSize, pKernelInfo->workloadInfo.maxWorkGroupSizeOffset); + EXPECT_EQ(offsetMaxWorkGroupSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.maxWorkGroupSize); } TEST_F(KernelDataTest, GivenDataParameterSamplerAddressModeWhenBuildingThenProgramIsCorrect) { @@ -1155,7 +1155,7 @@ TEST_F(KernelDataTest, GivenPatchTokenAllocateLocalSurfaceWhenBuildingThenProgra buildAndDecode(); - EXPECT_EQ(1024u, pKernelInfo->workloadInfo.slmStaticSize); + EXPECT_EQ(1024u, pKernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize); } TEST_F(KernelDataTest, GivenPatchTokenAllocateStatelessPrintfSurfaceWhenBuildingThenProgramIsCorrect) { diff --git a/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp b/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp index a3aefe9bb6..fd070ad0ce 100644 --- a/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp +++ b/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp @@ -90,7 +90,7 @@ TEST_F(KernelDataTest, GIVENdataParameterParentEventWHENdecodeTokensTHENoffsetLo buildAndDecode(); - EXPECT_EQ(pKernelInfo->workloadInfo.parentEventOffset, offsetSimdSize); + EXPECT_EQ(pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent, offsetSimdSize); } TEST_F(KernelDataTest, GIVENdataParameterPreferredWorkgroupMultipleTokenWHENbinaryIsdecodedTHENcorrectOffsetIsAssigned) { @@ -110,7 +110,7 @@ TEST_F(KernelDataTest, GIVENdataParameterPreferredWorkgroupMultipleTokenWHENbina buildAndDecode(); - EXPECT_EQ(pKernelInfo->workloadInfo.preferredWkgMultipleOffset, offset); + EXPECT_EQ(pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.preferredWkgMultiple, offset); } TEST_F(KernelDataTest, GIVENdataParameterObjectIdWHENdecodeTokensTHENoffsetLocatedInKernelArgInfo) { diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index b77cf5f8ea..0cd86d4e5d 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -1404,9 +1404,9 @@ TEST_F(PatchTokenTests, WhenBuildingProgramThenGwsIsSet) { auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[0]); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[1]); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[2]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[0]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[1]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[2]); } TEST_F(PatchTokenTests, WhenBuildingProgramThenLwsIsSet) { @@ -1422,18 +1422,18 @@ TEST_F(PatchTokenTests, WhenBuildingProgramThenLwsIsSet) { auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[0]); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[1]); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[2]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2]); pKernelInfo = pProgram->getKernelInfo("test_get_local_size", rootDeviceIndex); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[0]); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[1]); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[2]); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets2[0]); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets2[1]); - ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets2[2]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[0]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1]); + ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[2]); } TEST_F(PatchTokenTests, WhenBuildingProgramThenConstantKernelArgsAreAvailable) { diff --git a/shared/source/program/program_info.cpp b/shared/source/program/program_info.cpp index 9fd64de8b5..7f50d0130f 100644 --- a/shared/source/program/program_info.cpp +++ b/shared/source/program/program_info.cpp @@ -28,7 +28,7 @@ size_t getMaxInlineSlmNeeded(const ProgramInfo &programInfo) { bool requiresLocalMemoryWindowVA(const ProgramInfo &programInfo) { for (const auto &kernelInfo : programInfo.kernelInfos) { - if (WorkloadInfo::undefinedOffset != kernelInfo->workloadInfo.localMemoryStatelessWindowStartAddressOffset) { + if (isValidOffset(kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.localMemoryStatelessWindowStartAddres)) { return true; } } diff --git a/shared/test/unit_test/program/program_info_tests.cpp b/shared/test/unit_test/program/program_info_tests.cpp index 79bbfcff80..ebafc9e918 100644 --- a/shared/test/unit_test/program/program_info_tests.cpp +++ b/shared/test/unit_test/program/program_info_tests.cpp @@ -61,6 +61,6 @@ TEST(RequiresLocalMemoryWindowVA, GivenProgramWithKernelsNotLocalMemoryWindowVAT TEST(RequiresLocalMemoryWindowVA, GivenProgramWithKernelsWhenSomeOfKernelRequireLocalMemoryWidnowVAThenReturnTrue) { NEO::ProgramInfo programInfo; programInfo.kernelInfos = {new NEO::KernelInfo(), new NEO::KernelInfo(), new NEO::KernelInfo()}; - programInfo.kernelInfos[1]->workloadInfo.localMemoryStatelessWindowStartAddressOffset = 0U; + programInfo.kernelInfos[1]->kernelDescriptor.payloadMappings.implicitArgs.localMemoryStatelessWindowStartAddres = 0U; EXPECT_TRUE(NEO::requiresLocalMemoryWindowVA(programInfo)); } \ No newline at end of file