diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index c3be94888b..a64f97d0cc 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -915,7 +915,6 @@ HWTEST_F(KernelPropertiesTests, givenValidKernelAndNoMediavfestateThenSpillMemSi } } - ki->patchInfo.mediavfestate = nullptr; EXPECT_EQ(0u, kernelProperties.spillMemSize); } @@ -939,7 +938,6 @@ HWTEST_F(KernelPropertiesTests, givenValidKernelAndNollocateStatelessPrivateSurf } } - ki->patchInfo.pAllocateStatelessPrivateSurface = nullptr; EXPECT_EQ(0u, kernelProperties.privateMemSize); } diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index df23b690ae..55355c380e 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -3431,7 +3431,7 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue, } if ((pKernel->getExecutionType() != KernelExecutionType::Default) || - pKernel->isUsingSyncBuffer(pCommandQueue->getDevice().getRootDeviceIndex())) { + pKernel->usesSyncBuffer(pCommandQueue->getDevice().getRootDeviceIndex())) { retVal = CL_INVALID_KERNEL; TRACING_EXIT(clEnqueueNDRangeKernel, &retVal); return retVal; @@ -5884,7 +5884,7 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue, } } - if (pKernel->isUsingSyncBuffer(rootDeviceIndex)) { + if (pKernel->usesSyncBuffer(rootDeviceIndex)) { if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) { retVal = CL_INVALID_KERNEL; return retVal; diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 99037788f7..4b07897f4e 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -537,7 +537,7 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) { auto rootDeviceIndex = device->getRootDeviceIndex(); DEBUG_BREAK_IF(!kernel->requiresSshForBuffers(rootDeviceIndex)); auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap(rootDeviceIndex)), - kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->Offset); + kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful); void *addressToPatch = reinterpret_cast(debugSurface->getGpuAddress()); size_t sizeToPatch = debugSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0); diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index 79e10d58f0..eb700c1ace 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -106,6 +106,7 @@ class GpgpuWalkerHelper { static size_t setGpgpuWalkerThreadData( WALKER_TYPE *walkerCmd, + const KernelDescriptor &kernelDescriptor, const size_t globalOffsets[3], const size_t startWorkGroups[3], const size_t numWorkGroups[3], @@ -114,7 +115,6 @@ class GpgpuWalkerHelper { uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, - const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t requiredWorkgroupOrder); static void dispatchProfilingCommandsStart( diff --git a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl index dbf23aad32..02fa0d6fa2 100644 --- a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl +++ b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl @@ -20,6 +20,7 @@ namespace NEO { template inline size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData( WALKER_TYPE *walkerCmd, + const KernelDescriptor &kernelDescriptor, const size_t globalOffsets[3], const size_t startWorkGroups[3], const size_t numWorkGroups[3], @@ -28,7 +29,6 @@ inline size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData( uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, - const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t requiredWorkgroupOrder) { auto localWorkSize = localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2]; @@ -142,9 +142,8 @@ void GpgpuWalkerHelper::dispatchScheduler( size_t globalOffsets[3] = {0, 0, 0}; size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1}; - GpgpuWalkerHelper::setGpgpuWalkerThreadData(&cmdWalker, globalOffsets, globalOffsets, workGroups, localWorkSizes, - simd, 1, true, inlineDataProgrammingRequired, - *kernelInfo.patchInfo.threadPayload, 0u); + GpgpuWalkerHelper::setGpgpuWalkerThreadData(&cmdWalker, kernelInfo.kernelDescriptor, globalOffsets, globalOffsets, workGroups, localWorkSizes, + simd, 1, true, inlineDataProgrammingRequired, 0u); *pGpGpuWalkerCmd = cmdWalker; // Implement disabling special WA DisableLSQCROPERFforOCL if needed diff --git a/opencl/source/command_queue/hardware_interface_bdw_plus.inl b/opencl/source/command_queue/hardware_interface_bdw_plus.inl index 917aaec6da..8c71aa0a9b 100644 --- a/opencl/source/command_queue/hardware_interface_bdw_plus.inl +++ b/opencl/source/command_queue/hardware_interface_bdw_plus.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -105,10 +105,10 @@ inline void HardwareInterface::programWalker( true, commandQueue.getDevice()); - GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, globalOffsets, startWorkGroups, + GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor, + globalOffsets, startWorkGroups, numWorkGroups, localWorkSizes, simd, dim, - false, false, - *kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload, 0u); + false, false, 0u); EncodeDispatchKernel::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd); *walkerCmdBuf = walkerCmd; diff --git a/opencl/source/device_queue/device_queue_hw_base.inl b/opencl/source/device_queue/device_queue_hw_base.inl index 40e2763976..2534019ac3 100644 --- a/opencl/source/device_queue/device_queue_hw_base.inl +++ b/opencl/source/device_queue/device_queue_hw_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -256,7 +256,7 @@ uint64_t DeviceQueueHw::getBlockKernelStartPointer(const Device &devi auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (blockAllocation && isCcsUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - blockKernelStartPointer += blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; + blockKernelStartPointer += blockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP; } return blockKernelStartPointer; } diff --git a/opencl/source/device_queue/device_queue_hw_bdw_plus.inl b/opencl/source/device_queue/device_queue_hw_bdw_plus.inl index 9a6d9b1dcd..b874e80320 100644 --- a/opencl/source/device_queue/device_queue_hw_bdw_plus.inl +++ b/opencl/source/device_queue/device_queue_hw_bdw_plus.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -182,7 +182,7 @@ void DeviceQueueHw::setupIndirectState(IndirectHeap &surfaceStateHeap auto blockKernelStartPointer = getBlockKernelStartPointer(getDevice(), pBlockInfo, isCcsUsed); - auto bindingTableCount = pBlockInfo->patchInfo.bindingTableState->Count; + auto bindingTableCount = static_cast(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries); maxBindingTableCount = std::max(maxBindingTableCount, bindingTableCount); totalBlockSSHSize += alignUp(pBlockInfo->heapInfo.SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); @@ -192,15 +192,14 @@ void DeviceQueueHw::setupIndirectState(IndirectHeap &surfaceStateHeap pBlockInfo->heapInfo.pSsh, pBlockInfo->heapInfo.SurfaceStateHeapSize, bindingTableCount, - pBlockInfo->patchInfo.bindingTableState->Offset); + pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset); parentKernel->setReflectionSurfaceBlockBtOffset(i, static_cast(btOffset)); // Determine SIMD size uint32_t simd = pBlockInfo->getMaxSimdSize(); - DEBUG_BREAK_IF(pBlockInfo->patchInfo.interfaceDescriptorData == nullptr); - uint32_t idOffset = pBlockInfo->patchInfo.interfaceDescriptorData->Offset; + uint32_t idOffset = pBlockInfo->kernelDescriptor.kernelMetadata.deviceSideEnqueueBlockInterfaceDescriptorOffset; const INTERFACE_DESCRIPTOR_DATA *pBlockID = static_cast(ptrOffset(pBlockInfo->heapInfo.pDsh, idOffset)); pIDDestination[blockIndex + i] = *pBlockID; @@ -214,10 +213,7 @@ void DeviceQueueHw::setupIndirectState(IndirectHeap &surfaceStateHeap // Set offset to sampler states, block's DHSOffset is added by scheduler pIDDestination[blockIndex + i].setSamplerStatePointer(static_cast(pBlockInfo->getBorderColorStateSize())); - auto threadPayload = pBlockInfo->patchInfo.threadPayload; - DEBUG_BREAK_IF(nullptr == threadPayload); - - auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload); + auto numChannels = pBlockInfo->kernelDescriptor.kernelAttributes.numLocalIdChannels; auto grfSize = device->getDeviceInfo().grfSize; auto sizePerThreadData = getPerThreadSizeLocalIDs(simd, grfSize, numChannels); auto numGrfPerThreadData = static_cast(sizePerThreadData / grfSize); diff --git a/opencl/source/helpers/cl_hw_helper_bdw_plus.inl b/opencl/source/helpers/cl_hw_helper_bdw_plus.inl index 1d03eb7788..519c141e4b 100644 --- a/opencl/source/helpers/cl_hw_helper_bdw_plus.inl +++ b/opencl/source/helpers/cl_hw_helper_bdw_plus.inl @@ -19,7 +19,7 @@ inline cl_command_queue_capabilities_intel ClHwHelperHw::getAdditiona template cl_ulong ClHwHelperHw::getKernelPrivateMemSize(const KernelInfo &kernelInfo) const { - return kernelInfo.patchInfo.pAllocateStatelessPrivateSurface ? kernelInfo.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize : 0; + return kernelInfo.kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; } } // namespace NEO diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index f85d88e19c..6430dcbb25 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -35,18 +35,14 @@ size_t HardwareCommandsHelper::getSizeRequiredDSH( const Kernel &kernel) { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; - const auto &patchInfo = kernel.getKernelInfo(rootDeviceIndex).patchInfo; - auto samplerCount = patchInfo.samplerStateArray - ? patchInfo.samplerStateArray->Count - : 0; + const auto &samplerTable = kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.samplerTable; + + auto samplerCount = samplerTable.numSamplers; auto totalSize = samplerCount ? alignUp(samplerCount * sizeof(SAMPLER_STATE), INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE) : 0; - auto borderColorSize = patchInfo.samplerStateArray - ? patchInfo.samplerStateArray->Offset - patchInfo.samplerStateArray->BorderColorOffset - : 0; - + auto borderColorSize = samplerTable.borderColor; borderColorSize = alignUp(borderColorSize + EncodeStates::alignIndirectStatePointer - 1, EncodeStates::alignIndirectStatePointer); @@ -63,14 +59,12 @@ size_t HardwareCommandsHelper::getSizeRequiredIOH( const Kernel &kernel, size_t localWorkSize) { typedef typename GfxFamily::WALKER_TYPE WALKER_TYPE; + const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex); - auto threadPayload = kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload; - DEBUG_BREAK_IF(nullptr == threadPayload); - - auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload); + auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels; uint32_t grfSize = sizeof(typename GfxFamily::GRF); return alignUp((kernel.getCrossThreadDataSize(rootDeviceIndex) + - getPerThreadDataSizeTotal(kernel.getKernelInfo(rootDeviceIndex).getMaxSimdSize(), grfSize, numChannels, localWorkSize)), + getPerThreadDataSizeTotal(kernelInfo.getMaxSimdSize(), grfSize, numChannels, localWorkSize)), WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); } @@ -132,7 +126,7 @@ size_t HardwareCommandsHelper::getSshSizeForExecutionModel(const Kern totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize; totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState->Count); + maxBindingTableCount = std::max(maxBindingTableCount, static_cast(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries)); } SchedulerKernel &scheduler = kernel.getContext().getSchedulerKernel(); @@ -233,31 +227,28 @@ size_t HardwareCommandsHelper::sendIndirectState( // Copy the kernel over to the ISH const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex); - const auto &patchInfo = kernelInfo.patchInfo; ssh.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); kernel.patchBindlessSurfaceStateOffsets(device, ssh.getUsed()); - auto dstBindingTablePointer = EncodeSurfaceState::pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0, + auto dstBindingTablePointer = EncodeSurfaceState::pushBindingTableAndSurfaceStates(ssh, kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries, kernel.getSurfaceStateHeap(rootDeviceIndex), kernel.getSurfaceStateHeapSize(rootDeviceIndex), kernel.getNumberOfBindingTableStates(rootDeviceIndex), kernel.getBindingTableOffset(rootDeviceIndex)); // Copy our sampler state if it exists - uint32_t samplerStateOffset = 0; + const auto &samplerTable = kernelInfo.kernelDescriptor.payloadMappings.samplerTable; uint32_t samplerCount = 0; - if (patchInfo.samplerStateArray) { - samplerCount = patchInfo.samplerStateArray->Count; - samplerStateOffset = EncodeStates::copySamplerState(&dsh, patchInfo.samplerStateArray->Offset, - samplerCount, patchInfo.samplerStateArray->BorderColorOffset, + uint32_t samplerStateOffset = 0; + if (isValidOffset(samplerTable.tableOffset) && isValidOffset(samplerTable.borderColor)) { + samplerCount = samplerTable.numSamplers; + samplerStateOffset = EncodeStates::copySamplerState(&dsh, samplerTable.tableOffset, + samplerCount, samplerTable.borderColor, kernel.getDynamicStateHeap(rootDeviceIndex), device.getBindlessHeapsHelper()); } - auto threadPayload = kernelInfo.patchInfo.threadPayload; - DEBUG_BREAK_IF(nullptr == threadPayload); - auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2]; auto threadsPerThreadGroup = static_cast(getThreadsPerWG(simd, localWorkItems)); - auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload); + auto numChannels = static_cast(kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels); uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize(rootDeviceIndex); @@ -348,16 +339,14 @@ bool HardwareCommandsHelper::inlineDataProgrammingRequired(const Kern checkKernelForInlineData = !!DebugManager.flags.EnablePassInlineData.get(); } if (checkKernelForInlineData) { - return kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->PassInlineData; + return kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.flags.passInlineData; } return false; } template bool HardwareCommandsHelper::kernelUsesLocalIds(const Kernel &kernel, uint32_t rootDeviceIndex) { - return (kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->LocalIDXPresent || - kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->LocalIDYPresent || - kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->LocalIDZPresent); + return kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.numLocalIdChannels > 0; } } // namespace NEO diff --git a/opencl/source/helpers/per_thread_data.cpp b/opencl/source/helpers/per_thread_data.cpp index 5b7bfd79a6..df0608d456 100644 --- a/opencl/source/helpers/per_thread_data.cpp +++ b/opencl/source/helpers/per_thread_data.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -35,13 +35,13 @@ size_t PerThreadDataHelper::sendPerThreadData( return offsetPerThreadData; } -uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize) { - uint32_t multiplier = static_cast(getGRFsPerThread(simd, grfSize)); +uint32_t PerThreadDataHelper::getThreadPayloadSize(const KernelDescriptor &kernelDescriptor, uint32_t grfSize) { + uint32_t multiplier = static_cast(getGRFsPerThread(kernelDescriptor.kernelAttributes.simdSize, grfSize)); uint32_t threadPayloadSize = 0; - threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * grfSize; - threadPayloadSize += (threadPayload.HeaderPresent) ? grfSize : 0; - threadPayloadSize += (threadPayload.LocalIDFlattenedPresent) ? (grfSize * multiplier) : 0; - threadPayloadSize += (threadPayload.UnusedPerThreadConstantPresent) ? grfSize : 0; + threadPayloadSize = kernelDescriptor.kernelAttributes.numLocalIdChannels * multiplier * grfSize; + threadPayloadSize += (kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent) ? grfSize : 0; + threadPayloadSize += (kernelDescriptor.kernelAttributes.flags.usesFlattenedLocalIds) ? (grfSize * multiplier) : 0; + threadPayloadSize += (kernelDescriptor.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent) ? grfSize : 0; return threadPayloadSize; } } // namespace NEO diff --git a/opencl/source/helpers/per_thread_data.h b/opencl/source/helpers/per_thread_data.h index f10695c797..d2954c7f29 100644 --- a/opencl/source/helpers/per_thread_data.h +++ b/opencl/source/helpers/per_thread_data.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #pragma once #include "shared/source/helpers/local_id_gen.h" +#include "shared/source/kernel/kernel_descriptor.h" #include "patch_shared.h" @@ -42,12 +43,6 @@ struct PerThreadDataHelper { const std::array &workgroupWalkOrder, bool hasKernelOnlyImages); - static inline uint32_t getNumLocalIdChannels(const iOpenCL::SPatchThreadPayload &threadPayload) { - return threadPayload.LocalIDXPresent + - threadPayload.LocalIDYPresent + - threadPayload.LocalIDZPresent; - } - - static uint32_t getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize); + static uint32_t getThreadPayloadSize(const KernelDescriptor &kernelDescriptor, uint32_t grfSize); }; } // namespace NEO diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 5e38d2cd82..c97ec24946 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -136,15 +136,37 @@ inline void patch(const SrcT &src, void *dst, uint32_t dstOffsetBytes) { *patchLocation = static_cast(src); } +void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const ArgDescPointer &arg) { + auto rootDeviceIndex = device.getRootDeviceIndex(); + + void *crossThreadData = getCrossThreadData(rootDeviceIndex); + if ((nullptr != crossThreadData) && isValidOffset(arg.stateless)) { + auto pp = ptrOffset(crossThreadData, arg.stateless); + uintptr_t addressToPatch = reinterpret_cast(ptrToPatchInCrossThreadData); + patchWithRequiredSize(pp, arg.pointerSize, addressToPatch); + if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { + PatchInfoData patchInfoData(addressToPatch, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast(crossThreadData), arg.stateless, PatchInfoAllocationType::IndirectObjectHeap, arg.pointerSize); + this->patchInfoDataList.push_back(patchInfoData); + } + } + + void *ssh = getSurfaceStateHeap(rootDeviceIndex); + if ((nullptr != ssh) & isValidOffset(arg.bindful)) { + auto surfaceState = ptrOffset(ssh, arg.bindful); + void *addressToPatch = reinterpret_cast(allocation.getGpuAddressToPatch()); + size_t sizeToPatch = allocation.getUnderlyingBufferSize(); + Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0); + } +} + template void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const PatchTokenT &patch) { - uint32_t crossThreadDataOffset = patch.DataParamOffset; - uint32_t pointerSize = patch.DataParamSize; - uint32_t sshOffset = patch.SurfaceStateHeapOffset; auto rootDeviceIndex = device.getRootDeviceIndex(); + uint32_t pointerSize = patch.DataParamSize; + void *crossThreadData = getCrossThreadData(rootDeviceIndex); - void *ssh = getSurfaceStateHeap(rootDeviceIndex); if (crossThreadData != nullptr) { + uint32_t crossThreadDataOffset = patch.DataParamOffset; auto pp = ptrOffset(crossThreadData, crossThreadDataOffset); uintptr_t addressToPatch = reinterpret_cast(ptrToPatchInCrossThreadData); patchWithRequiredSize(pp, pointerSize, addressToPatch); @@ -154,7 +176,9 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic } } + void *ssh = getSurfaceStateHeap(rootDeviceIndex); if (ssh) { + uint32_t sshOffset = patch.SurfaceStateHeapOffset; auto surfaceState = ptrOffset(ssh, sshOffset); void *addressToPatch = reinterpret_cast(allocation.getGpuAddressToPatch()); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); @@ -185,15 +209,12 @@ cl_int Kernel::initialize() { auto maxSimdSize = kernelInfo.getMaxSimdSize(); const auto &workloadInfo = kernelInfo.workloadInfo; const auto &heapInfo = kernelInfo.heapInfo; - const auto &patchInfo = kernelInfo.patchInfo; if (maxSimdSize != 1 && maxSimdSize < hwHelper.getMinimalSIMDSize()) { return CL_INVALID_KERNEL; } - kernelDeviceInfo.crossThreadDataSize = patchInfo.dataParameterStream - ? patchInfo.dataParameterStream->DataParameterStreamSize - : 0; + kernelDeviceInfo.crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize; // now allocate our own cross-thread data, if necessary if (kernelDeviceInfo.crossThreadDataSize) { @@ -297,11 +318,11 @@ cl_int Kernel::initialize() { memcpy_s(kernelDeviceInfo.pSshLocal.get(), kernelDeviceInfo.sshLocalSize, heapInfo.pSsh, kernelDeviceInfo.sshLocalSize); } - kernelDeviceInfo.numberOfBindingTableStates = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Count : 0; - kernelDeviceInfo.localBindingTableOffset = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Offset : 0; + kernelDeviceInfo.numberOfBindingTableStates = kernelDescriptor.payloadMappings.bindingTable.numEntries; + kernelDeviceInfo.localBindingTableOffset = kernelDescriptor.payloadMappings.bindingTable.tableOffset; // patch crossthread data and ssh with inline surfaces, if necessary - auto perHwThreadPrivateMemorySize = PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(patchInfo.pAllocateStatelessPrivateSurface, kernelInfo.getMaxSimdSize()); + auto perHwThreadPrivateMemorySize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; if (perHwThreadPrivateMemorySize) { kernelDeviceInfo.privateSurfaceSize = KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch); @@ -318,23 +339,23 @@ cl_int Kernel::initialize() { if (kernelDeviceInfo.privateSurface == nullptr) { return CL_OUT_OF_RESOURCES; } - const auto &patch = patchInfo.pAllocateStatelessPrivateSurface; - patchWithImplicitSurface(reinterpret_cast(kernelDeviceInfo.privateSurface->getGpuAddressToPatch()), *kernelDeviceInfo.privateSurface, pClDevice->getDevice(), *patch); + const auto &patch = kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress; + patchWithImplicitSurface(reinterpret_cast(kernelDeviceInfo.privateSurface->getGpuAddressToPatch()), *kernelDeviceInfo.privateSurface, pClDevice->getDevice(), patch); } - if (patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization) { + if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) { DEBUG_BREAK_IF(program->getConstantSurface(rootDeviceIndex) == nullptr); uintptr_t constMemory = isBuiltIn ? (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch(); - const auto &patch = patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization; - patchWithImplicitSurface(reinterpret_cast(constMemory), *program->getConstantSurface(rootDeviceIndex), pClDevice->getDevice(), *patch); + const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress; + patchWithImplicitSurface(reinterpret_cast(constMemory), *program->getConstantSurface(rootDeviceIndex), pClDevice->getDevice(), arg); } - if (patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization) { + if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) { DEBUG_BREAK_IF(program->getGlobalSurface(rootDeviceIndex) == nullptr); uintptr_t globalMemory = isBuiltIn ? (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch(); - const auto &patch = patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization; - patchWithImplicitSurface(reinterpret_cast(globalMemory), *program->getGlobalSurface(rootDeviceIndex), pClDevice->getDevice(), *patch); + const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress; + patchWithImplicitSurface(reinterpret_cast(globalMemory), *program->getGlobalSurface(rootDeviceIndex), pClDevice->getDevice(), arg); } if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)) { @@ -367,7 +388,7 @@ cl_int Kernel::initialize() { if (isParentKernel) { program->allocateBlockPrivateSurfaces(*pClDevice); } - if (program->isKernelDebugEnabled() && kernelInfo.patchInfo.pAllocateSystemThreadSurface) { + if (program->isKernelDebugEnabled() && isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) { debugEnabled = true; } auto numArgs = kernelInfo.kernelArgInfo.size(); @@ -518,8 +539,8 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize, break; case CL_KERNEL_ATTRIBUTES: - pSrc = defaultKernelInfo.attributes.c_str(); - srcSize = defaultKernelInfo.attributes.length() + 1; + pSrc = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelLanguageAttributes.c_str(); + srcSize = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelLanguageAttributes.length() + 1; break; case CL_KERNEL_BINARY_PROGRAM_INTEL: @@ -613,7 +634,6 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para cl_ulong localMemorySize; auto rootDeviceIndex = device.getRootDeviceIndex(); auto &kernelInfo = *kernelInfos[rootDeviceIndex]; - const auto &patchInfo = kernelInfo.patchInfo; const auto &kernelDescriptor = kernelInfo.kernelDescriptor; size_t preferredWorkGroupSizeMultiple = 0; cl_ulong scratchSize; @@ -644,9 +664,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para break; case CL_KERNEL_LOCAL_MEM_SIZE: - localMemorySize = patchInfo.localsurface - ? patchInfo.localsurface->TotalInlineLocalMemorySize - : 0; + localMemorySize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize; srcSize = sizeof(localMemorySize); pSrc = &localMemorySize; break; @@ -661,7 +679,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para break; case CL_KERNEL_SPILL_MEM_SIZE_INTEL: - scratchSize = kernelInfo.patchInfo.mediavfestate ? kernelInfo.patchInfo.mediavfestate->PerThreadScratchSpace : 0; + scratchSize = kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; srcSize = sizeof(scratchSize); pSrc = &scratchSize; break; @@ -762,7 +780,6 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para workGroupSize2.val[0] = workGroupSize; workGroupSize2.val[1] = (workGroupSize > 0) ? 1 : 0; return changeGetInfoStatusToCLResultType(info.set(workGroupSize2)); - case 3: default: struct size_t3 { size_t val[3]; @@ -1881,7 +1898,7 @@ void Kernel::createReflectionSurface() { ReflectionSurfaceHelper::getCurbeParams(curbeParamsForBlocks[i], tokenMask[i], firstSSHTokenIndex, *pBlockInfo, hwInfo); - maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize)); + maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast(pBlockInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize)); samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(hwInfo); samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment); @@ -1956,7 +1973,7 @@ void Kernel::createReflectionSurface() { const char *pSrc = pBlockInfo->crossThreadData; memcpy_s(pDst, pBlockInfo->getConstantBufferSize(), pSrc, pBlockInfo->getConstantBufferSize()); - btOffset += pBlockInfo->patchInfo.bindingTableState->Offset; + btOffset += pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset; kernelDataOffset = newKernelDataOffset; } @@ -2033,14 +2050,14 @@ void Kernel::patchBlocksCurbeWithConstantValues() { uint64_t constantMemoryCurbeOffset = ReflectionSurfaceHelper::undefinedOffset; uint32_t constantMemoryPatchSize = 0; - if (pBlockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization) { - globalMemoryCurbeOffset = pBlockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset; - globalMemoryPatchSize = pBlockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamSize; + if (isValidOffset(pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) { + globalMemoryCurbeOffset = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless; + globalMemoryPatchSize = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.pointerSize; } - if (pBlockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization) { - constantMemoryCurbeOffset = pBlockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset; - constantMemoryPatchSize = pBlockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamSize; + if (isValidOffset(pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) { + constantMemoryCurbeOffset = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless; + constantMemoryPatchSize = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.pointerSize; } ReflectionSurfaceHelper::patchBlocksCurbeWithConstantValues(kernelReflectionSurface->getUnderlyingBuffer(), blockID, @@ -2104,19 +2121,18 @@ void Kernel::ReflectionSurfaceHelper::getCurbeParams(std::vector(kernelInfo.heapInfo.pSsh) + kernelInfo.patchInfo.bindingTableState->Offset; - - for (uint32_t i = 0; i < kernelInfo.patchInfo.bindingTableState->Count; i++) { + const void *ssh = static_cast(kernelInfo.heapInfo.pSsh) + kernelInfo.kernelDescriptor.payloadMappings.bindingTable.tableOffset; + for (uint32_t i = 0; i < kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries; i++) { uint32_t pointer = hwHelper.getBindingTableStateSurfaceStatePointer(ssh, i); if (pointer == kernelInfo.kernelArgInfo[argNumber].offsetHeap) { bindingTableIndex = i; break; } } - DEBUG_BREAK_IF(!((bindingTableIndex != 253) || (kernelInfo.patchInfo.bindingTableState->Count == 0))); + DEBUG_BREAK_IF(bindingTableIndex == 253); } } else if (kernelInfo.kernelArgInfo[argNumber].isSampler) { @@ -2152,8 +2168,8 @@ void Kernel::ReflectionSurfaceHelper::getCurbeParams(std::vectorDataSize, param->Offset, param->ArgumentNumber}); + for (auto param : kernelInfo.kernelDescriptor.kernelMetadata.allByValueKernelArguments) { + curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_KERNEL_ARGUMENT, param.byValueElement.size, param.byValueElement.offset, param.argNum}); tokenMask |= shiftLeftBy(DATA_PARAMETER_KERNEL_ARGUMENT); } @@ -2210,8 +2226,8 @@ uint32_t Kernel::ReflectionSurfaceHelper::setKernelData(void *reflectionSurface, kernelData->m_numberOfCurbeTokens = static_cast(curbeParamsIn.size() - kernelInfo.kernelArgInfo.size()); kernelData->m_numberOfSamplerStates = static_cast(kernelInfo.getSamplerStateArrayCount()); kernelData->m_SizeOfSamplerHeap = static_cast(samplerHeapSize); - kernelData->m_SamplerBorderColorStateOffsetOnDSH = kernelInfo.patchInfo.samplerStateArray ? kernelInfo.patchInfo.samplerStateArray->BorderColorOffset : 0; - kernelData->m_SamplerStateArrayOffsetOnDSH = kernelInfo.patchInfo.samplerStateArray ? kernelInfo.patchInfo.samplerStateArray->Offset : (uint32_t)-1; + kernelData->m_SamplerBorderColorStateOffsetOnDSH = isValidOffset(kernelInfo.kernelDescriptor.payloadMappings.samplerTable.borderColor) ? kernelInfo.kernelDescriptor.payloadMappings.samplerTable.borderColor : 0; + kernelData->m_SamplerStateArrayOffsetOnDSH = isValidOffset(kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset) ? kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset : -1; kernelData->m_sizeOfConstantBuffer = kernelInfo.getConstantBufferSize(); kernelData->m_PatchTokensMask = tokenMaskIn; kernelData->m_ScratchSpacePatchValue = 0; @@ -2223,25 +2239,18 @@ uint32_t Kernel::ReflectionSurfaceHelper::setKernelData(void *reflectionSurface, kernelData->m_InilineSLMSize = kernelInfo.workloadInfo.slmStaticSize; bool localIdRequired = false; - if (kernelInfo.patchInfo.threadPayload) { - if (kernelInfo.patchInfo.threadPayload->LocalIDFlattenedPresent || - kernelInfo.patchInfo.threadPayload->LocalIDXPresent || - kernelInfo.patchInfo.threadPayload->LocalIDYPresent || - kernelInfo.patchInfo.threadPayload->LocalIDZPresent) { - localIdRequired = true; - } - kernelData->m_PayloadSize = PerThreadDataHelper::getThreadPayloadSize(*kernelInfo.patchInfo.threadPayload, kernelData->m_SIMDSize, hwInfo.capabilityTable.grfSize); + if (kernelInfo.kernelDescriptor.kernelAttributes.flags.usesFlattenedLocalIds || (kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels > 0)) { + localIdRequired = true; } + kernelData->m_PayloadSize = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, hwInfo.capabilityTable.grfSize); kernelData->m_NeedLocalIDS = localIdRequired ? 1 : 0; kernelData->m_DisablePreemption = 0u; bool concurrentExecAllowed = true; - if (kernelInfo.patchInfo.pAllocateStatelessPrivateSurface) { - if (kernelInfo.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize > 0) { - concurrentExecAllowed = false; - } + if (kernelInfo.kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize > 0) { + concurrentExecAllowed = false; } kernelData->m_CanRunConcurently = concurrentExecAllowed ? 1 : 0; @@ -2285,7 +2294,7 @@ void Kernel::ReflectionSurfaceHelper::setKernelAddressData(void *reflectionSurfa kernelAddressData->m_ConstantBufferOffset = constantBufferOffset; kernelAddressData->m_SSHTokensOffset = sshTokensOffset; kernelAddressData->m_BTSoffset = btOffset; - kernelAddressData->m_BTSize = static_cast(kernelInfo.patchInfo.bindingTableState ? kernelInfo.patchInfo.bindingTableState->Count * hwHelper.getBindingTableStateSize() : 0); + kernelAddressData->m_BTSize = static_cast(kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries * hwHelper.getBindingTableStateSize()); } template <> @@ -2428,14 +2437,11 @@ void Kernel::provideInitializationHints() { kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize); } - const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo; - if (patchInfo.mediavfestate) { - auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace; - scratchSize *= pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize(); - if (scratchSize > 0) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH, - kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize); - } + auto scratchSize = kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * + pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize(); + if (scratchSize > 0) { + context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH, + kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize); } } } @@ -2487,19 +2493,18 @@ void Kernel::patchBlocksSimdSize(uint32_t rootDeviceIndex) { } bool Kernel::usesSyncBuffer(uint32_t rootDeviceIndex) { - return (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSyncBuffer != nullptr); + return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesSyncBuffer; } void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset) { auto rootDeviceIndex = device.getRootDeviceIndex(); - auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo; - auto bufferPatchAddress = ptrOffset(getCrossThreadData(rootDeviceIndex), patchInfo.pAllocateSyncBuffer->DataParamOffset); - patchWithRequiredSize(bufferPatchAddress, patchInfo.pAllocateSyncBuffer->DataParamSize, + const auto &syncBuffer = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress; + auto bufferPatchAddress = ptrOffset(getCrossThreadData(rootDeviceIndex), syncBuffer.stateless); + patchWithRequiredSize(bufferPatchAddress, syncBuffer.pointerSize, ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)); - if (requiresSshForBuffers(rootDeviceIndex)) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), - patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset); + if (isValidOffset(syncBuffer.bindful)) { + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), syncBuffer.bindful); auto addressToPatch = gfxAllocation->getUnderlyingBuffer(); auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0); @@ -2693,7 +2698,7 @@ uint64_t Kernel::getKernelStartOffset( if (kernelInfos[rootDeviceIndex]->getGraphicsAllocation()) { kernelStartOffset = kernelInfos[rootDeviceIndex]->getGraphicsAllocation()->getGpuAddressToPatch(); if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) { - kernelStartOffset += kernelInfos[rootDeviceIndex]->patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad; + kernelStartOffset += kernelInfos[rootDeviceIndex]->kernelDescriptor.entryPoints.skipPerThreadDataLoad; } } @@ -2703,7 +2708,7 @@ uint64_t Kernel::getKernelStartOffset( auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - kernelStartOffset += kernelInfos[rootDeviceIndex]->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; + kernelStartOffset += kernelInfos[rootDeviceIndex]->kernelDescriptor.entryPoints.skipSetFFIDGP; } return kernelStartOffset; diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 985a756a04..c3d3428a66 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -217,11 +217,11 @@ class Kernel : public BaseObject<_cl_kernel> { Program *getProgram() const { return program; } uint32_t getScratchSize(uint32_t rootDeviceIndex) { - return getKernelInfo(rootDeviceIndex).patchInfo.mediavfestate ? getKernelInfo(rootDeviceIndex).patchInfo.mediavfestate->PerThreadScratchSpace : 0; + return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; } uint32_t getPrivateScratchSize(uint32_t rootDeviceIndex) { - return getKernelInfo(rootDeviceIndex).patchInfo.mediaVfeStateSlot1 ? getKernelInfo(rootDeviceIndex).patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace : 0; + return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.perThreadScratchSize[1]; } void createReflectionSurface(); @@ -335,9 +335,6 @@ class Kernel : public BaseObject<_cl_kernel> { KernelExecutionType getExecutionType() const { return executionType; } - bool isUsingSyncBuffer(uint32_t rootDeviceIndex) const { - return (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSyncBuffer != nullptr); - } bool checkIfIsParentKernelAndBlocksUsesPrintf(); @@ -345,18 +342,8 @@ class Kernel : public BaseObject<_cl_kernel> { return getKernelInfo(rootDeviceIndex).gpuPointerSize == 4; } - int32_t getDebugSurfaceBti(uint32_t rootDeviceIndex) const { - if (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface) { - return getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->BTI; - } - return -1; - } - size_t getPerThreadSystemThreadSurfaceSize(uint32_t rootDeviceIndex) const { - if (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface) { - return getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize; - } - return 0; + return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.perThreadSystemThreadSurfaceSize; } std::vector &getPatchInfoDataList() { return patchInfoDataList; }; @@ -496,6 +483,7 @@ class Kernel : public BaseObject<_cl_kernel> { void *patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t rootDeviceIndex); + void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const ArgDescPointer &arg); // Sets-up both crossThreadData and ssh for given implicit (private/constant, etc.) allocation template void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const PatchTokenT &patch); diff --git a/opencl/source/kernel/kernel.inl b/opencl/source/kernel/kernel.inl index 6a2998bac2..dd6ea0dede 100644 --- a/opencl/source/kernel/kernel.inl +++ b/opencl/source/kernel/kernel.inl @@ -58,11 +58,12 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf auto privateSurface = blockManager->getPrivateSurface(i); - UNRECOVERABLE_IF(pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface != nullptr && pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize && privateSurface == nullptr); - + UNRECOVERABLE_IF((pBlockInfo->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize > 0U) && privateSurface == nullptr); if (privateSurface) { - privateSurfaceOffset = pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamOffset; - privateSurfacePatchSize = pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamSize; + const auto &privateMemory = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress; + UNRECOVERABLE_IF(false == isValidOffset(privateMemory.stateless)); + privateSurfaceOffset = privateMemory.stateless; + privateSurfacePatchSize = privateMemory.pointerSize; privateSurfaceGpuAddress = privateSurface->getGpuAddressToPatch(); } diff --git a/opencl/source/program/block_kernel_manager.cpp b/opencl/source/program/block_kernel_manager.cpp index d91482a0e0..dcb90041d9 100644 --- a/opencl/source/program/block_kernel_manager.cpp +++ b/opencl/source/program/block_kernel_manager.cpp @@ -16,7 +16,7 @@ namespace NEO { void BlockKernelManager::addBlockKernelInfo(KernelInfo *blockKernelInfo) { blockKernelInfoArray.push_back(blockKernelInfo); - blockUsesPrintf |= blockKernelInfo->kernelDescriptor.kernelAttributes.flags.usesPrintf; + blockUsesPrintf = blockKernelInfo->kernelDescriptor.kernelAttributes.flags.usesPrintf; } const KernelInfo *BlockKernelManager::getBlockKernelInfo(size_t ordinal) { diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index dc1538b232..fc1a7e32bb 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -298,45 +298,6 @@ void KernelInfo::storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgum storeKernelArgPatchInfo(argNum, pStatelessDeviceQueueKernelArg->DataParamSize, pStatelessDeviceQueueKernelArg->DataParamOffset, 0, pStatelessDeviceQueueKernelArg->SurfaceStateHeapOffset); } -void KernelInfo::storePatchToken( - const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg) { - usesSsh |= true; - patchInfo.pAllocateStatelessPrivateSurface = pStatelessPrivateSurfaceArg; -} - -void KernelInfo::storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg) { - usesSsh |= true; - patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = pStatelessConstantMemorySurfaceWithInitializationArg; -} - -void KernelInfo::storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg) { - usesSsh |= true; - patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = pStatelessGlobalMemorySurfaceWithInitializationArg; -} - -void KernelInfo::storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo) { - this->patchInfo.pKernelAttributesInfo = pKernelAttributesInfo; - attributes = reinterpret_cast(pKernelAttributesInfo) + sizeof(SPatchKernelAttributesInfo); - - auto start = attributes.find("intel_reqd_sub_group_size("); - if (start != std::string::npos) { - start += strlen("intel_reqd_sub_group_size("); - auto stop = attributes.find(")", start); - std::stringstream requiredSubGroupSizeStr(attributes.substr(start, stop - start)); - requiredSubGroupSizeStr >> requiredSubGroupSize; - } -} - -void KernelInfo::storePatchToken(const SPatchAllocateSystemThreadSurface *pSystemThreadSurface) { - usesSsh |= true; - patchInfo.pAllocateSystemThreadSurface = pSystemThreadSurface; -} - -void KernelInfo::storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBuffer) { - usesSsh |= true; - patchInfo.pAllocateSyncBuffer = pAllocateSyncBuffer; -} - void KernelInfo::storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t dataOffset, uint32_t sourceOffset, uint32_t offsetSSH) { resizeKernelArgInfoAndRegisterParameter(argNum); @@ -350,8 +311,7 @@ void KernelInfo::storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uin } size_t KernelInfo::getSamplerStateArrayCount() const { - size_t count = patchInfo.samplerStateArray ? (size_t)patchInfo.samplerStateArray->Count : 0; - return count; + return kernelDescriptor.payloadMappings.samplerTable.numSamplers; } size_t KernelInfo::getSamplerStateArraySize(const HardwareInfo &hwInfo) const { size_t samplerStateArraySize = getSamplerStateArrayCount() * Sampler::getSamplerStateSize(hwInfo); @@ -360,22 +320,22 @@ size_t KernelInfo::getSamplerStateArraySize(const HardwareInfo &hwInfo) const { size_t KernelInfo::getBorderColorStateSize() const { size_t borderColorSize = 0; - if (patchInfo.samplerStateArray) { - borderColorSize = patchInfo.samplerStateArray->Offset - patchInfo.samplerStateArray->BorderColorOffset; + if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0U) { + borderColorSize = kernelDescriptor.payloadMappings.samplerTable.tableOffset - kernelDescriptor.payloadMappings.samplerTable.borderColor; } return borderColorSize; } size_t KernelInfo::getBorderColorOffset() const { size_t borderColorOffset = 0; - if (patchInfo.samplerStateArray) { - borderColorOffset = patchInfo.samplerStateArray->BorderColorOffset; + if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0U) { + borderColorOffset = kernelDescriptor.payloadMappings.samplerTable.borderColor; } return borderColorOffset; } uint32_t KernelInfo::getConstantBufferSize() const { - return patchInfo.dataParameterStream ? patchInfo.dataParameterStream->DataParameterStreamSize : 0; + return kernelDescriptor.kernelAttributes.crossThreadDataSize; } bool KernelInfo::createKernelAllocation(const Device &device, bool internalIsa) { @@ -412,12 +372,9 @@ void KernelInfo::apply(const DeviceInfoKernelPayloadConstants &constants) { *(uint32_t *)&(this->crossThreadData[localMemoryStatelessWindowSizeOffset]) = constants.slmWindowSize; } - uint32_t privateMemorySize = 0U; - if (patchInfo.pAllocateStatelessPrivateSurface) { - auto perHwThreadSize = PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(patchInfo.pAllocateStatelessPrivateSurface, this->getMaxSimdSize()); - privateMemorySize = static_cast(KernelHelper::getPrivateSurfaceSize(perHwThreadSize, - constants.computeUnitsUsedForScratch)); - } + auto perHwThreadSize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; + uint32_t privateMemorySize = static_cast(KernelHelper::getPrivateSurfaceSize(perHwThreadSize, + constants.computeUnitsUsedForScratch)); if (privateMemoryStatelessSizeOffset != WorkloadInfo::undefinedOffset) { *(uint32_t *)&(this->crossThreadData[privateMemoryStatelessSizeOffset]) = privateMemorySize; diff --git a/opencl/source/program/kernel_info.h b/opencl/source/program/kernel_info.h index 0f901cb205..ef343dfeaf 100644 --- a/opencl/source/program/kernel_info.h +++ b/opencl/source/program/kernel_info.h @@ -112,12 +112,6 @@ struct KernelInfo { void storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg); void storeKernelArgument(const SPatchSamplerKernelArgument *pSamplerKernelArg); void storePatchToken(const SPatchExecutionEnvironment *execEnv); - void storePatchToken(const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg); - void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg); - void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg); - void storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo); - void storePatchToken(const SPatchAllocateSystemThreadSurface *pSystemThreadSurface); - void storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBuffer); GraphicsAllocation *getGraphicsAllocation() const { return this->kernelAllocation; } void resizeKernelArgInfoAndRegisterParameter(uint32_t argCount) { if (kernelArgInfo.size() <= argCount) { @@ -170,7 +164,6 @@ struct KernelInfo { bool createKernelAllocation(const Device &device, bool internalIsa); void apply(const DeviceInfoKernelPayloadConstants &constants); - std::string attributes; HeapInfo heapInfo = {}; PatchInfo patchInfo = {}; std::vector kernelArgInfo; @@ -182,7 +175,6 @@ struct KernelInfo { bool hasIndirectStatelessAccess = false; bool isVmeWorkload = false; char *crossThreadData = nullptr; - size_t requiredSubGroupSize = 0; uint32_t gpuPointerSize = 0; const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr; uint32_t argumentsToPatchNum = 0; diff --git a/opencl/source/program/kernel_info_from_patchtokens.cpp b/opencl/source/program/kernel_info_from_patchtokens.cpp index 7e73f8b30b..d5e93c3ccf 100644 --- a/opencl/source/program/kernel_info_from_patchtokens.cpp +++ b/opencl/source/program/kernel_info_from_patchtokens.cpp @@ -135,9 +135,6 @@ void populateKernelInfoArg(KernelInfo &dstKernelInfo, KernelArgInfo &dstKernelIn for (auto &byValArg : src.byValMap) { dstKernelInfo.storeKernelArgument(byValArg); - if (byValArg->Type == DATA_PARAMETER_KERNEL_ARGUMENT) { - dstKernelInfo.patchInfo.dataParameterBuffersKernelArgs.push_back(byValArg); - } } dstKernelInfoArg.offsetObjectId = getOffset(src.objectId); @@ -160,17 +157,8 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch dst.heapInfo.pSsh = src.heaps.surfaceState.begin(); storeTokenIfNotNull(dst, src.tokens.executionEnvironment); - dst.patchInfo.samplerStateArray = src.tokens.samplerStateArray; - dst.patchInfo.bindingTableState = src.tokens.bindingTableState; dst.usesSsh = src.tokens.bindingTableState && (src.tokens.bindingTableState->Count > 0); - dst.patchInfo.localsurface = src.tokens.allocateLocalSurface; dst.workloadInfo.slmStaticSize = src.tokens.allocateLocalSurface ? src.tokens.allocateLocalSurface->TotalInlineLocalMemorySize : 0U; - dst.patchInfo.mediavfestate = src.tokens.mediaVfeState[0]; - dst.patchInfo.mediaVfeStateSlot1 = src.tokens.mediaVfeState[1]; - dst.patchInfo.interfaceDescriptorDataLoad = src.tokens.mediaInterfaceDescriptorLoad; - dst.patchInfo.interfaceDescriptorData = src.tokens.interfaceDescriptorData; - dst.patchInfo.threadPayload = src.tokens.threadPayload; - dst.patchInfo.dataParameterStream = src.tokens.dataParameterStream; dst.kernelArgInfo.resize(src.tokens.kernelArgs.size()); @@ -180,15 +168,15 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch populateKernelInfoArg(dst, kernelInfoArg, decodedKernelArg); } - storeTokenIfNotNull(dst, src.tokens.kernelAttributesInfo); - storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrivateSurface); - storeTokenIfNotNull(dst, src.tokens.allocateStatelessConstantMemorySurfaceWithInitialization); - storeTokenIfNotNull(dst, src.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization); - storeTokenIfNotNull(dst, src.tokens.allocateSyncBuffer); + if (nullptr != src.tokens.allocateSyncBuffer) { + dst.usesSsh = true; + } + if (nullptr != src.tokens.allocateSystemThreadSurface) { + dst.usesSsh = true; + } dst.isVmeWorkload = dst.isVmeWorkload || (src.tokens.inlineVmeSamplerInfo != nullptr); dst.systemKernelOffset = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U; - storeTokenIfNotNull(dst, src.tokens.allocateSystemThreadSurface); for (uint32_t i = 0; i < 3U; ++i) { dst.workloadInfo.localWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize[i]); @@ -221,10 +209,9 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch populateKernelDescriptor(dst.kernelDescriptor, src, gpuPointerSizeInBytes); } - if (dst.patchInfo.dataParameterStream && dst.patchInfo.dataParameterStream->DataParameterStreamSize) { - uint32_t crossThreadDataSize = dst.patchInfo.dataParameterStream->DataParameterStreamSize; - dst.crossThreadData = new char[crossThreadDataSize]; - memset(dst.crossThreadData, 0x00, crossThreadDataSize); + if (dst.kernelDescriptor.kernelAttributes.crossThreadDataSize) { + dst.crossThreadData = new char[dst.kernelDescriptor.kernelAttributes.crossThreadDataSize]; + memset(dst.crossThreadData, 0x00, dst.kernelDescriptor.kernelAttributes.crossThreadDataSize); } } diff --git a/opencl/source/program/patch_info.h b/opencl/source/program/patch_info.h index 74c1726b19..22519ea8c3 100644 --- a/opencl/source/program/patch_info.h +++ b/opencl/source/program/patch_info.h @@ -48,26 +48,10 @@ using iOpenCL::SPatchThreadPayload; using iOpenCL::SProgramBinaryHeader; struct PatchInfo { - const SPatchMediaInterfaceDescriptorLoad *interfaceDescriptorDataLoad = nullptr; - const SPatchAllocateLocalSurface *localsurface = nullptr; - const SPatchMediaVFEState *mediavfestate = nullptr; - const SPatchMediaVFEState *mediaVfeStateSlot1 = nullptr; - const SPatchInterfaceDescriptorData *interfaceDescriptorData = nullptr; - const SPatchSamplerStateArray *samplerStateArray = nullptr; - const SPatchBindingTableState *bindingTableState = nullptr; - ::std::vector dataParameterBuffersKernelArgs; ::std::vector statelessGlobalMemObjKernelArgs; ::std::vector imageMemObjKernelArgs; - const SPatchDataParameterStream *dataParameterStream = nullptr; - const SPatchThreadPayload *threadPayload = nullptr; - const SPatchKernelAttributesInfo *pKernelAttributesInfo = nullptr; - const SPatchAllocateStatelessPrivateSurface *pAllocateStatelessPrivateSurface = nullptr; - const SPatchAllocateSyncBuffer *pAllocateSyncBuffer = nullptr; - const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr; - const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr; - const SPatchAllocateSystemThreadSurface *pAllocateSystemThreadSurface = nullptr; }; } // namespace NEO diff --git a/opencl/source/program/program.cpp b/opencl/source/program/program.cpp index 7fd6532181..9862d90cab 100644 --- a/opencl/source/program/program.cpp +++ b/opencl/source/program/program.cpp @@ -348,16 +348,13 @@ void Program::allocateBlockPrivateSurfaces(const ClDevice &clDevice) { for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *info = blockKernelManager->getBlockKernelInfo(i); - if (info->patchInfo.pAllocateStatelessPrivateSurface) { - auto perHwThreadPrivateMemorySize = PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(info->patchInfo.pAllocateStatelessPrivateSurface, info->getMaxSimdSize()); + auto perHwThreadPrivateMemorySize = info->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; + if (perHwThreadPrivateMemorySize > 0 && blockKernelManager->getPrivateSurface(i) == nullptr) { + auto privateSize = static_cast(KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, clDevice.getSharedDeviceInfo().computeUnitsUsedForScratch)); - if (perHwThreadPrivateMemorySize > 0 && blockKernelManager->getPrivateSurface(i) == nullptr) { - auto privateSize = static_cast(KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, clDevice.getSharedDeviceInfo().computeUnitsUsedForScratch)); - - auto *privateSurface = this->executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties( - {rootDeviceIndex, privateSize, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, clDevice.getDeviceBitfield()}); - blockKernelManager->pushPrivateSurface(privateSurface, i); - } + auto *privateSurface = this->executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties( + {rootDeviceIndex, privateSize, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, clDevice.getDeviceBitfield()}); + blockKernelManager->pushPrivateSurface(privateSurface, i); } } } diff --git a/opencl/source/scheduler/scheduler_kernel.h b/opencl/source/scheduler/scheduler_kernel.h index c1ba93e8ef..d7fe366a62 100644 --- a/opencl/source/scheduler/scheduler_kernel.h +++ b/opencl/source/scheduler/scheduler_kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -35,7 +35,7 @@ class SchedulerKernel : public Kernel { size_t getCurbeSize() { auto &defaultKernelInfo = getDefaultKernelInfo(); - size_t crossTrheadDataSize = defaultKernelInfo.patchInfo.dataParameterStream ? defaultKernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize : 0; + size_t crossTrheadDataSize = defaultKernelInfo.kernelDescriptor.kernelAttributes.crossThreadDataSize; size_t dshSize = defaultKernelInfo.heapInfo.DynamicStateHeapSize; crossTrheadDataSize = alignUp(crossTrheadDataSize, 64); diff --git a/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl b/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl index 70fdfab4e6..87cf036ad8 100644 --- a/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl +++ b/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -135,9 +135,9 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithAllocateSyncBufferPatchWhenEx cl_event *eventWaitList = nullptr; cl_event *event = nullptr; SPatchAllocateSyncBuffer patchAllocateSyncBuffer; - pProgram->mockKernelInfo.patchInfo.pAllocateSyncBuffer = &patchAllocateSyncBuffer; + populateKernelDescriptor(pProgram->mockKernelInfo.kernelDescriptor, patchAllocateSyncBuffer); - EXPECT_TRUE(pKernel->isUsingSyncBuffer(testedRootDeviceIndex)); + EXPECT_TRUE(pKernel->usesSyncBuffer(testedRootDeviceIndex)); retVal = clEnqueueNDRangeKernel( pCommandQueue, diff --git a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl index 34171356f4..e6b8960d6e 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -141,11 +141,11 @@ TEST_F(KernelSubGroupInfoKhrReturnCompileSizeTest, GivenKernelWhenGettingRequire EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); size_t requiredSubGroupSize = 0; - auto start = pKernel->getKernelInfo(rootDeviceIndex).attributes.find("intel_reqd_sub_group_size("); + auto start = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); - auto stop = pKernel->getKernelInfo(rootDeviceIndex).attributes.find(")", start); - requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).attributes.substr(start, stop - start)); + auto stop = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find(")", start); + requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.substr(start, stop - start)); } EXPECT_EQ(paramValue, requiredSubGroupSize); diff --git a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl index 36f2bc76e6..1668512d5e 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -286,11 +286,11 @@ TEST_F(KernelSubGroupInfoReturnCompileSizeTest, GivenKernelWhenGettingCompileSub EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); size_t requiredSubGroupSize = 0; - auto start = pKernel->getKernelInfo(rootDeviceIndex).attributes.find("intel_reqd_sub_group_size("); + auto start = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); - auto stop = pKernel->getKernelInfo(rootDeviceIndex).attributes.find(")", start); - requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).attributes.substr(start, stop - start)); + auto stop = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find(")", start); + requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.substr(start, stop - start)); } EXPECT_EQ(paramValue[0], requiredSubGroupSize); diff --git a/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl index bc9db2e9b3..0ba74f6f8c 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl @@ -5,6 +5,8 @@ * */ +#include "shared/source/device_binary_format/patchtokens_decoder.h" + #include "opencl/test/unit_test/fixtures/kernel_work_group_info_fixture.h" using namespace NEO; @@ -79,7 +81,7 @@ TEST_F(clGetKernelWorkGroupInfoTests, GivenKernelRequiringScratchSpaceWhenGettin MockKernelWithInternals mockKernel(*pDevice); SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = 1024; //whatever greater than 0 - mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); cl_ulong scratchSpaceSize = static_cast(mockKernel.mockKernel->getScratchSize(testedRootDeviceIndex)); EXPECT_EQ(scratchSpaceSize, 1024u); @@ -106,7 +108,7 @@ HWTEST2_F(clGetKernelWorkGroupInfoTests, givenKernelHavingPrivateMemoryAllocatio MockKernelWithInternals mockKernel(*pDevice); SPatchAllocateStatelessPrivateSurface privateAllocation; privateAllocation.PerThreadPrivateMemorySize = 1024; - mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrivateSurface = &privateAllocation; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, privateAllocation); retVal = clGetKernelWorkGroupInfo( mockKernel, @@ -116,9 +118,9 @@ HWTEST2_F(clGetKernelWorkGroupInfoTests, givenKernelHavingPrivateMemoryAllocatio ¶m_value, ¶mValueSizeRet); - EXPECT_EQ(retVal, CL_SUCCESS); - EXPECT_EQ(paramValueSizeRet, sizeof(cl_ulong)); - EXPECT_EQ(param_value, privateAllocation.PerThreadPrivateMemorySize); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(sizeof(cl_ulong), paramValueSizeRet); + EXPECT_EQ(PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(privateAllocation, mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize), param_value); } TEST_F(clGetKernelWorkGroupInfoTests, givenKernelNotHavingPrivateMemoryAllocationWhenAskedForPrivateAllocationSizeThenZeroIsReturned) { diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 51621437fc..97821ce2b0 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -1046,8 +1046,9 @@ HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsC std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); MockCommandQueue cmdQ(context.get(), pClDevice, 0); - kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset, rootDeviceIndex); + const auto &systemThreadSurfaceAddress = kernel->getAllocatedKernelInfo()->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful; kernel->getAllocatedKernelInfo()->usesSsh = true; + kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress, rootDeviceIndex); auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); cmdQ.getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize); @@ -1066,8 +1067,9 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSet std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); MockCommandQueue cmdQ(context.get(), pClDevice, 0); - kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset, rootDeviceIndex); + const auto &systemThreadSurfaceAddress = kernel->getAllocatedKernelInfo()->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful; kernel->getAllocatedKernelInfo()->usesSsh = true; + kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress, rootDeviceIndex); auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); commandStreamReceiver.allocateDebugSurface(SipKernel::maxDbgSurfaceSize); auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index f303adb7e7..3046499001 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -46,32 +46,35 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture, memset(&kernelHeader, 0, sizeof(kernelHeader)); kernelHeader.KernelHeapSize = sizeof(kernelIsa); + SPatchDataParameterStream dataParameterStream = {}; memset(&dataParameterStream, 0, sizeof(dataParameterStream)); dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData); + populateKernelDescriptor(kernelInfo.kernelDescriptor, dataParameterStream); + populateKernelDescriptor(kernelInfoWithSampler.kernelDescriptor, dataParameterStream); + SPatchThreadPayload threadPayload = {}; memset(&threadPayload, 0, sizeof(threadPayload)); threadPayload.LocalIDXPresent = 1; threadPayload.LocalIDYPresent = 1; threadPayload.LocalIDZPresent = 1; + populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload); + populateKernelDescriptor(kernelInfoWithSampler.kernelDescriptor, threadPayload); + SPatchSamplerStateArray samplerArray = {}; samplerArray.BorderColorOffset = 0; samplerArray.Count = 1; samplerArray.Offset = 4; samplerArray.Size = 2; samplerArray.Token = 0; + populateKernelDescriptor(kernelInfoWithSampler.kernelDescriptor, samplerArray); kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa); - kernelInfo.patchInfo.dataParameterStream = &dataParameterStream; kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; - kernelInfo.patchInfo.threadPayload = &threadPayload; kernelInfoWithSampler.heapInfo.pKernelHeap = kernelIsa; kernelInfoWithSampler.heapInfo.KernelHeapSize = sizeof(kernelIsa); - kernelInfoWithSampler.patchInfo.dataParameterStream = &dataParameterStream; kernelInfoWithSampler.kernelDescriptor.kernelAttributes.simdSize = 32; - kernelInfoWithSampler.patchInfo.threadPayload = &threadPayload; - kernelInfoWithSampler.patchInfo.samplerStateArray = &samplerArray; kernelInfoWithSampler.heapInfo.pDsh = static_cast(dsh); } @@ -94,9 +97,6 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture, std::unique_ptr program; SKernelBinaryHeaderCommon kernelHeader = {}; - SPatchDataParameterStream dataParameterStream = {}; - SPatchThreadPayload threadPayload = {}; - SPatchSamplerStateArray samplerArray = {}; KernelInfo kernelInfo; KernelInfo kernelInfoWithSampler; @@ -145,10 +145,10 @@ HWTEST_F(DispatchWalkerTest, givenSimd1WhenSetGpgpuWalkerThreadDataThenSimdInWal size_t numWorkGroups[] = {1, 1, 1}; size_t localWorkSizesIn[] = {32, 1, 1}; uint32_t simd = 1; - iOpenCL::SPatchThreadPayload threadPayload; + KernelDescriptor kd; GpgpuWalkerHelper::setGpgpuWalkerThreadData( - computeWalker, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, threadPayload, 5u); + computeWalker, kd, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, 5u); EXPECT_EQ(computeWalker->getSimdSize(), 32 >> 4); } @@ -197,10 +197,12 @@ HWTEST_F(DispatchWalkerTest, WhenDispatchingWalkerThenCommandStreamMemoryIsntCha } HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDispatched) { + SPatchThreadPayload threadPayload = {}; threadPayload.LocalIDXPresent = 0; threadPayload.LocalIDYPresent = 0; threadPayload.LocalIDZPresent = 0; threadPayload.UnusedPerThreadConstantPresent = 1; + populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload); MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); diff --git a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp index 02d5bbcfc4..8c2fc237aa 100644 --- a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -159,7 +159,7 @@ HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelFromProgramWithDebugEnabledWhe std::unique_ptr> mockCmdQ(new GMockCommandQueueHw(context, pClDevice, 0)); mockCmdQ->getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize); - EXPECT_NE(nullptr, kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface); + EXPECT_TRUE(isValidOffset(kernel->getDefaultKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)); EXPECT_CALL(*mockCmdQ.get(), setupDebugSurface(kernel.get())).Times(1).RetiresOnSaturation(); @@ -175,7 +175,7 @@ HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelWithoutSystemThreadSurfaceWhen std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); kernel->initialize(); - EXPECT_EQ(nullptr, kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface); + EXPECT_FALSE(isValidOffset(kernel->getDefaultKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)); std::unique_ptr> mockCmdQ(new GMockCommandQueueHw(context, pClDevice, 0)); diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index a18a661435..f19f1772b0 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -571,8 +571,8 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs MockKernelWithInternals kernelInternals{*pClDevice, context}; kernelInternals.kernelInfo.usesSsh = true; kernelInternals.kernelInfo.requiresSshForBuffers = true; - kernelInternals.kernelInfo.patchInfo.pAllocateSyncBuffer = &sPatchAllocateSyncBuffer; - kernelInternals.kernelInfo.patchInfo.bindingTableState = &sPatchBindingTableState; + populateKernelDescriptor(kernelInternals.kernelInfo.kernelDescriptor, sPatchAllocateSyncBuffer); + populateKernelDescriptor(kernelInternals.kernelInfo.kernelDescriptor, sPatchBindingTableState); kernelInternals.kernelInfo.heapInfo.SurfaceStateHeapSize = sizeof(RENDER_SURFACE_STATE) + sizeof(BINDING_TABLE_STATE); auto kernel = kernelInternals.mockKernel; kernel->initialize(); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index 665bbe3e31..d35820bdc3 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -495,14 +495,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScra HardwareParse hwParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; - - SPatchMediaVFEState mediaVFEstate; uint32_t scratchSize = 4096u; - mediaVFEstate.PerThreadScratchSpace = scratchSize; - MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; + + SPatchMediaVFEState mediaVFEstate; + mediaVFEstate.PerThreadScratchSpace = scratchSize; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); auto sizeToProgram = PreambleHelper::getScratchSizeValueToProgramMediaVfeState(scratchSize); @@ -536,14 +535,13 @@ HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPri csr.getMemoryManager()->setForce32BitAllocations(false); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; - - SPatchMediaVFEState mediaVFEstate; uint32_t privateScratchSize = 4096u; - mediaVFEstate.PerThreadScratchSpace = privateScratchSize; - MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstate; + + SPatchMediaVFEState mediaVFEstate; + mediaVFEstate.PerThreadScratchSpace = privateScratchSize; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 1); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 6f233da8da..82a3f87432 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -308,13 +308,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc EXPECT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); - SPatchMediaVFEState mediaVFEstate; auto scratchSize = GetParam().scratchSize; - mediaVFEstate.PerThreadScratchSpace = scratchSize; - MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; + + SPatchMediaVFEState mediaVFEstate; + mediaVFEstate.PerThreadScratchSpace = scratchSize; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); uint32_t sizeToProgram = (scratchSize / static_cast(MemoryConstants::kiloByte)); uint32_t bitValue = 0u; @@ -370,6 +370,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc } mediaVFEstate.PerThreadScratchSpace = scratchSize; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); auto itorfirstBBEnd = find(itorWalker, cmdList.end()); ASSERT_NE(cmdList.end(), itorfirstBBEnd); @@ -443,13 +444,13 @@ HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWi auto mockCsr = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); - SPatchMediaVFEState mediaVFEstate; uint32_t scratchSize = 1024u; - mediaVFEstate.PerThreadScratchSpace = scratchSize; - MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; + + SPatchMediaVFEState mediaVFEstate; + mediaVFEstate.PerThreadScratchSpace = scratchSize; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); uint32_t sizeToProgram = (scratchSize / static_cast(MemoryConstants::kiloByte)); uint32_t bitValue = 0u; @@ -486,12 +487,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelWithScratch, givenDeviceForcing32bitAll auto memoryManager = csr->getMemoryManager(); memoryManager->setForce32BitAllocations(true); - SPatchMediaVFEState mediaVFEstate; auto scratchSize = 1024; - mediaVFEstate.PerThreadScratchSpace = scratchSize; MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; + + SPatchMediaVFEState mediaVFEstate; + mediaVFEstate.PerThreadScratchSpace = scratchSize; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); enqueueKernel(mockKernel); auto graphicsAllocation = csr->getScratchAllocation(); diff --git a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp index 5c8e30f5f6..3ecc654542 100644 --- a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp @@ -64,6 +64,7 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest { void SetUpT() { SyncBufferEnqueueHandlerTest::SetUp(); kernelInternals = std::make_unique(*pClDevice, context); + kernelInternals->kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; kernel = kernelInternals->mockKernel; kernel->executionType = KernelExecutionType::Concurrent; commandQueue = reinterpret_cast(new MockCommandQueueHw(context, pClDevice, 0)); @@ -81,7 +82,7 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest { sPatchAllocateSyncBuffer.SurfaceStateHeapOffset = 0; sPatchAllocateSyncBuffer.DataParamOffset = 0; sPatchAllocateSyncBuffer.DataParamSize = sizeof(uint8_t); - kernelInternals->kernelInfo.patchInfo.pAllocateSyncBuffer = &sPatchAllocateSyncBuffer; + populateKernelDescriptor(kernelInternals->kernelInfo.kernelDescriptor, sPatchAllocateSyncBuffer); } MockSyncBufferHandler *getSyncBufferHandler() { @@ -176,6 +177,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBuffer using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; kernelInternals->kernelInfo.usesSsh = true; kernelInternals->kernelInfo.requiresSshForBuffers = true; + kernelInternals->kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindfulAndStateless; patchAllocateSyncBuffer(); pClDevice->allocateSyncBufferHandler(); diff --git a/opencl/test/unit_test/command_queue/work_group_size_tests.cpp b/opencl/test/unit_test/command_queue/work_group_size_tests.cpp index 255437c6e2..a48d9ff912 100644 --- a/opencl/test/unit_test/command_queue/work_group_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/work_group_size_tests.cpp @@ -87,9 +87,9 @@ struct WorkGroupSizeBase { Math::divideAndRoundUp(workItems[0], workGroupSize[0]), Math::divideAndRoundUp(workItems[1], workGroupSize[1]), Math::divideAndRoundUp(workItems[2], workGroupSize[2])}; - const iOpenCL::SPatchThreadPayload threadPayload = {}; - GpgpuWalkerHelper::setGpgpuWalkerThreadData(&pCmd, globalOffsets, workGroupsStart, workGroupsNum, - workGroupSize, simdSize, dims, true, false, threadPayload, 0u); + KernelDescriptor kd; + GpgpuWalkerHelper::setGpgpuWalkerThreadData(&pCmd, kd, globalOffsets, workGroupsStart, workGroupsNum, + workGroupSize, simdSize, dims, true, false, 0u); //And check if it is programmed correctly auto numWorkItems = computeWalkerWorkItems(pCmd); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 26114ee06b..4c1c8d31ea 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -592,10 +592,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecu size_t GWS = 1; uint32_t scratchSize = 1024; - SPatchMediaVFEState mediaVFEstate; + SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = scratchSize; - kernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; + populateKernelDescriptor(kernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); EXPECT_EQ(false, kernel.mockKernel->isBuiltIn); @@ -667,6 +667,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecu //now re-try to see if SBA is not programmed scratchSize *= 2; mediaVFEstate.PerThreadScratchSpace = scratchSize; + populateKernelDescriptor(kernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); @@ -707,10 +708,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenNDRangeKer size_t GWS = 1; uint32_t scratchSize = 1024; - SPatchMediaVFEState mediaVFEstate; + SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = scratchSize; - kernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; + populateKernelDescriptor(kernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); EXPECT_EQ(false, kernel.mockKernel->isBuiltIn); diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp index 396bba5c06..9391b6e5cd 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp @@ -825,19 +825,17 @@ TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenConte auto size = zeroSized ? 0 : 1024; MockKernelWithInternals mockKernel(context->getDevices(), context); - SPatchMediaVFEState mediaVFEstate; + SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = size; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); uint32_t computeUnitsForScratch[] = {0x10, 0x20}; - for (auto &pClDevice : context->getDevices()) { auto &deviceInfo = const_cast(pClDevice->getSharedDeviceInfo()); deviceInfo.computeUnitsUsedForScratch = computeUnitsForScratch[pClDevice->getRootDeviceIndex()]; } - mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; - mockKernel.mockKernel->initialize(); for (auto &pClDevice : context->getDevices()) { @@ -858,16 +856,15 @@ TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThen auto size = zeroSized ? 0 : 1024; MockKernelWithInternals mockKernel(*pDevice, context); - SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateMemorySurface = {}; + SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateMemorySurface = {}; allocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = size; allocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 128; allocateStatelessPrivateMemorySurface.DataParamOffset = 16; allocateStatelessPrivateMemorySurface.DataParamSize = 8; - allocateStatelessPrivateMemorySurface.IsSimtThread = isSmitThread; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, allocateStatelessPrivateMemorySurface); - mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrivateSurface = &allocateStatelessPrivateMemorySurface; size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch; size *= isSmitThread ? mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize() : 1; diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 508b29c62a..b4812ed1d5 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -571,7 +571,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut sPatchPrintfSurface.DataParamOffset = 0; sPatchPrintfSurface.DataParamSize = 8; populateKernelDescriptor(kernelInfo->kernelDescriptor, sPatchPrintfSurface); - kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap.insert(std::make_pair(0, testString)); + kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap[0].assign(testString); uint64_t crossThread[10]; pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); diff --git a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp index 485d94e6ff..47621e3b6d 100644 --- a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -68,7 +68,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - kernelIsaAddress += pKernel->getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->OffsetToSkipSetFFIDGP; + kernelIsaAddress += pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.entryPoints.skipSetFFIDGP; } pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); @@ -94,15 +94,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); ASSERT_NE(nullptr, pBlockInfo); - ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream); - ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload); auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize; - const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize; + const uint32_t sizeCrossThreadData = pBlockInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize / grfSize; - auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload); - auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->getMaxSimdSize(), numChannels); + auto numChannels = pBlockInfo->kernelDescriptor.kernelAttributes.numLocalIdChannels; + auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->getMaxSimdSize(), grfSize, numChannels); uint32_t numGrfPerThreadData = static_cast(sizePerThreadData / grfSize); numGrfPerThreadData = std::max(numGrfPerThreadData, 1u); @@ -117,7 +115,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; + expectedBlockKernelAddress += pBlockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP; } EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress); @@ -133,7 +131,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelWithPrivate size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount(); for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) { - if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) { + if (pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->kernelDescriptor.kernelAttributes.flags.usesPrivateMemory) { kernelRequiringPrivateSurface = i; break; } @@ -163,7 +161,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlocksWithPrivateMemor size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount(); for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) { - if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) { + if (pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->kernelDescriptor.kernelAttributes.flags.usesPrivateMemory) { kernelRequiringPrivateSurface = i; break; } @@ -321,18 +319,16 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); ASSERT_NE(nullptr, pBlockInfo); - ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream); - ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload); Kernel *blockKernel = Kernel::create(pKernel->getProgram(), MockKernel::toKernelInfoContainer(*pBlockInfo, rootDeviceIndex), nullptr); blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); if (blockKernel->getNumberOfBindingTableStates(rootDeviceIndex) > 0) { - ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState); - auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset); + ASSERT_TRUE(isValidOffset(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset)); + auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset); EXPECT_EQ(0U, reinterpret_cast(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE); auto dstBindingTable = reinterpret_cast(dstBlockBti); - auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset); + auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset); auto srcBindingTable = reinterpret_cast(srcBlockBti); for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(rootDeviceIndex); ++i) { uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer(); diff --git a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp index ee3ab33c63..1ec480899d 100644 --- a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp +++ b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp @@ -175,7 +175,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue size_t sshUsed = blockedCommandsData->ssh->getUsed(); size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates(rootDeviceIndex) * sizeof(RENDER_SURFACE_STATE) + - pKernel->getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) + + pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.bindingTable.numEntries * sizeof(BINDING_TABLE_STATE) + UnitTestHelper::getDefaultSshUsage(); if ((pKernel->requiresSshForBuffers(rootDeviceIndex)) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) { diff --git a/opencl/test/unit_test/gen12lp/device_queue_tests_gen12lp.cpp b/opencl/test/unit_test/gen12lp/device_queue_tests_gen12lp.cpp index f4918ed55d..36844d04a9 100644 --- a/opencl/test/unit_test/gen12lp/device_queue_tests_gen12lp.cpp +++ b/opencl/test/unit_test/gen12lp/device_queue_tests_gen12lp.cpp @@ -21,13 +21,13 @@ GEN12LPTEST_F(DeviceQueueHwTest, givenDeviceQueueWhenRunningOnCCsThenFfidSkipOff KernelInfo *blockInfo = const_cast(mockParentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0)); blockInfo->createKernelAllocation(device->getDevice(), false); ASSERT_NE(nullptr, blockInfo->getGraphicsAllocation()); - const_cast(blockInfo->patchInfo.threadPayload)->OffsetToSkipSetFFIDGP = 0x1234; + blockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP = 0x1234; auto &hwInfo = const_cast(device->getHardwareInfo()); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); hwInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(REVISION_A0, hwInfo); - uint64_t expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch() + blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; + uint64_t expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch() + blockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP; uint64_t offset = MockDeviceQueueHw::getBlockKernelStartPointer(device->getDevice(), blockInfo, true); EXPECT_EQ(expectedOffset, offset); diff --git a/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp b/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp index f77f3a7bff..7b27da595a 100644 --- a/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp +++ b/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp @@ -31,7 +31,7 @@ TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenS auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals mockKernelWithInternals{*device}; - mockKernelWithInternals.kernelInfo.patchInfo.threadPayload = &threadPayload; + populateKernelDescriptor(mockKernelWithInternals.kernelInfo.kernelDescriptor, threadPayload); for (auto isCcsUsed : ::testing::Bool()) { uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed, rootDeviceIndex); diff --git a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp index 30db5f72c7..c7e33ee9da 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp @@ -36,15 +36,14 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; - pMediaVFEstate = new SPatchMediaVFEState(); - pMediaVFEstate->PerThreadScratchSpace = 1024; - pMediaVFEstate->ScratchSpaceOffset = 0; + SPatchMediaVFEState mediaVFEstate = {}; + mediaVFEstate.PerThreadScratchSpace = 1024; + mediaVFEstate.ScratchSpaceOffset = 0; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, mediaVFEstate, 0); SPatchAllocateStatelessPrintfSurface printfSurface = {}; populateKernelDescriptor(pKernelInfo->kernelDescriptor, printfSurface); - pKernelInfo->patchInfo.mediavfestate = pMediaVFEstate; - KernelArgPatchInfo kernelArg1PatchInfo; KernelArgPatchInfo kernelArg2PatchInfo; KernelArgPatchInfo kernelArg3PatchInfo; @@ -76,7 +75,6 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture void TearDown() override { delete pKernel; - delete pMediaVFEstate; delete pProgram; ContextFixture::TearDown(); @@ -84,7 +82,6 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture } std::unique_ptr pKernelInfo; - SPatchMediaVFEState *pMediaVFEstate = nullptr; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; char pCrossThreadData[128]; diff --git a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp index 997a8118c5..4b269e982f 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp @@ -33,10 +33,10 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture { pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; - pMediaVFEstate = new SPatchMediaVFEState(); - pMediaVFEstate->PerThreadScratchSpace = 1024; - pMediaVFEstate->ScratchSpaceOffset = 0; - pKernelInfo->patchInfo.mediavfestate = pMediaVFEstate; + SPatchMediaVFEState mediaVFEstate = {}; + mediaVFEstate.PerThreadScratchSpace = 1024; + mediaVFEstate.ScratchSpaceOffset = 0; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, mediaVFEstate, 0); SPatchAllocateStatelessPrintfSurface printfSurface = {}; populateKernelDescriptor(pKernelInfo->kernelDescriptor, printfSurface); @@ -48,7 +48,6 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture { } void TearDown() override { delete pKernel; - delete pMediaVFEstate; delete pProgram; ContextFixture::TearDown(); @@ -56,7 +55,6 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture { } std::unique_ptr pKernelInfo; - SPatchMediaVFEState *pMediaVFEstate = nullptr; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; }; diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index b91c8dee6c..f6ab77a707 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -587,8 +587,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF); size_t localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ; - ASSERT_NE(nullptr, modifiedKernelInfo.patchInfo.threadPayload); - auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*modifiedKernelInfo.patchInfo.threadPayload); + auto numChannels = modifiedKernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels; size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(modifiedKernelInfo.getMaxSimdSize(), grfSize, numChannels, localWorkSize); ASSERT_LE(expectedIohSize, ioh.getUsed()); @@ -692,26 +691,26 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // define patch offsets for global, constant, private, event pool and default device queue surfaces - SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; - AllocateStatelessGlobalMemorySurfaceWithInitialization.GlobalBufferIndex = 0; - AllocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; - AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; - AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; - pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; + SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization allocateStatelessGlobalMemorySurfaceWithInitialization; + allocateStatelessGlobalMemorySurfaceWithInitialization.GlobalBufferIndex = 0; + allocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; + allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; + allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessGlobalMemorySurfaceWithInitialization); - SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; - AllocateStatelessConstantMemorySurfaceWithInitialization.ConstantBufferIndex = 0; - AllocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 64; - AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 8; - AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; - pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; + SPatchAllocateStatelessConstantMemorySurfaceWithInitialization allocateStatelessConstantMemorySurfaceWithInitialization; + allocateStatelessConstantMemorySurfaceWithInitialization.ConstantBufferIndex = 0; + allocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 64; + allocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 8; + allocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessConstantMemorySurfaceWithInitialization); - SPatchAllocateStatelessPrivateSurface AllocateStatelessPrivateMemorySurface; - AllocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 32; - AllocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 128; - AllocateStatelessPrivateMemorySurface.DataParamOffset = 16; - AllocateStatelessPrivateMemorySurface.DataParamSize = 8; - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &AllocateStatelessPrivateMemorySurface; + SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateMemorySurface; + allocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 32; + allocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 128; + allocateStatelessPrivateMemorySurface.DataParamOffset = 16; + allocateStatelessPrivateMemorySurface.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessPrivateMemorySurface); SPatchAllocateStatelessEventPoolSurface allocateStatelessEventPoolSurface; allocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 192; @@ -767,14 +766,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh bindingTableState.Count = 5; bindingTableState.Offset = btiOffset; bindingTableState.SurfaceStateOffset = 0; - pKernelInfo->patchInfo.bindingTableState = &bindingTableState; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, bindingTableState); // setup thread payload - SPatchThreadPayload threadPayload; + SPatchThreadPayload threadPayload = {}; threadPayload.LocalIDXPresent = 1; threadPayload.LocalIDYPresent = 1; threadPayload.LocalIDZPresent = 1; - pKernelInfo->patchInfo.threadPayload = &threadPayload; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, threadPayload); // define stateful path pKernelInfo->usesSsh = true; @@ -948,15 +947,87 @@ HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStat bindingTableState.Count = 0; bindingTableState.Offset = 64; bindingTableState.SurfaceStateOffset = 0; - pKernelInfo->patchInfo.bindingTableState = &bindingTableState; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, bindingTableState); dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); - pKernelInfo->patchInfo.bindingTableState = nullptr; - delete pKernel; } +HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerStateArrayWhenSendIndirectStateIsCalledThenInterfaceDescriptorIsNotPopulated) { + using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; + CommandQueueHw cmdQ(pContext, pClDevice, 0, false); + + auto &commandStream = cmdQ.getCS(1024); + auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); + *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; + + auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); + auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); + auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); + const size_t localWorkSize = 256; + const size_t localWorkSizes[3]{localWorkSize, 1, 1}; + uint32_t interfaceDescriptorIndex = 0; + auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); + + //Undefined Offset, Defined BorderColorOffset + SPatchSamplerStateArray samplerStateArray = {}; + samplerStateArray.BorderColorOffset = 0; + samplerStateArray.Count = 2; + samplerStateArray.Offset = undefined; + samplerStateArray.Size = sizeof(SPatchSamplerStateArray); + samplerStateArray.Token = 1; + populateKernelDescriptor(mockKernelWithInternal->kernelInfo.kernelDescriptor, samplerStateArray); + + HardwareCommandsHelper::sendIndirectState( + commandStream, + dsh, + ioh, + ssh, + *mockKernelWithInternal->mockKernel, + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), + mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), + localWorkSizes, + 0, + interfaceDescriptorIndex, + pDevice->getPreemptionMode(), + pWalkerCmd, + nullptr, + true, + *pDevice); + + auto interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); + EXPECT_EQ(0U, interfaceDescriptor->getSamplerStatePointer()); + EXPECT_EQ(0U, interfaceDescriptor->getSamplerCount()); + + //Defined Offset, Undefined BorderColorOffset + samplerStateArray.Offset = 0; + samplerStateArray.BorderColorOffset = undefined; + populateKernelDescriptor(mockKernelWithInternal->kernelInfo.kernelDescriptor, samplerStateArray); + + HardwareCommandsHelper::sendIndirectState( + commandStream, + dsh, + ioh, + ssh, + *mockKernelWithInternal->mockKernel, + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), + mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), + localWorkSizes, + 0, + interfaceDescriptorIndex, + pDevice->getPreemptionMode(), + pWalkerCmd, + nullptr, + true, + *pDevice); + + interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); + EXPECT_EQ(0U, interfaceDescriptor->getSamplerStatePointer()); + EXPECT_EQ(0U, interfaceDescriptor->getSamplerCount()); +} HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenIndirectStateIsProgrammedThenBorderColorIsCorrectlyCopiedToDshAndSamplerStatesAreProgrammedWithPointer) { typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; @@ -979,20 +1050,18 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd const uint32_t borderColorSize = 64; const uint32_t samplerStateSize = sizeof(SAMPLER_STATE) * 2; - SPatchSamplerStateArray samplerStateArray; + SPatchSamplerStateArray samplerStateArray = {}; samplerStateArray.BorderColorOffset = 0x0; samplerStateArray.Count = 2; samplerStateArray.Offset = borderColorSize; samplerStateArray.Size = samplerStateSize; samplerStateArray.Token = 1; + populateKernelDescriptor(mockKernelWithInternal->kernelInfo.kernelDescriptor, samplerStateArray); char *mockDsh = new char[(borderColorSize + samplerStateSize) * 4]; - memset(mockDsh, 6, borderColorSize); memset(mockDsh + borderColorSize, 8, borderColorSize); - mockKernelWithInternal->kernelInfo.heapInfo.pDsh = mockDsh; - mockKernelWithInternal->kernelInfo.patchInfo.samplerStateArray = &samplerStateArray; uint64_t interfaceDescriptorTableOffset = dsh.getUsed(); dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); @@ -1100,7 +1169,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, WhenGettingSizeR totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize; totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState ? pBlockInfo->patchInfo.bindingTableState->Count : 0); + maxBindingTableCount = std::max(maxBindingTableCount, static_cast(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries)); } totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; @@ -1128,22 +1197,21 @@ HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineT DebugManager.flags.EnablePassInlineData.set(1u); uint32_t crossThreadData[8]; - - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1; + mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true; mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } HWTEST_F(HardwareCommandsTest, givenNoDebugSettingsWhenDefaultModeIsExcercisedThenWeFollowKernelSettingForInlineProgramming) { - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1; + mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true; EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } HWTEST_F(HardwareCommandsTest, givenDisabledPassInlineDataWhenKernelAllowsInlineThenReturnFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnablePassInlineData.set(0u); - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1; + mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true; EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } @@ -1153,41 +1221,19 @@ HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInli uint32_t crossThreadData[8]; - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 0; + mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = false; mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } -HWTEST_F(HardwareCommandsTest, whenLocalIdxInXDimPresentThenExpectLocalIdsInUseIsTrue) { - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 1; - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0; - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0; - +HWTEST_F(HardwareCommandsTest, whenNumLocalIdsIsBiggerThanZeroThenExpectLocalIdsInUseIsTrue) { + mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels = 1; EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } -HWTEST_F(HardwareCommandsTest, whenLocalIdxInYDimPresentThenExpectLocalIdsInUseIsTrue) { - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0; - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 1; - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0; - - EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); -} - -HWTEST_F(HardwareCommandsTest, whenLocalIdxInZDimPresentThenExpectLocalIdsInUseIsTrue) { - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0; - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0; - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 1; - - EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); -} - -HWTEST_F(HardwareCommandsTest, whenLocalIdxAreNotPresentThenExpectLocalIdsInUseIsFalse) { - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0; - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0; - const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0; - +HWTEST_F(HardwareCommandsTest, whenNumLocalIdsIsZeroThenExpectLocalIdsInUseIsFalse) { + mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels = 0; EXPECT_FALSE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h index 33b1d000d5..543f0c605f 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h @@ -45,7 +45,7 @@ struct HardwareCommandsTest : ClDeviceFixture, template size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) { - return EncodeSurfaceState::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState->Count : 0, + return EncodeSurfaceState::pushBindingTableAndSurfaceStates(dstHeap, srcKernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.bindingTable.numEntries, srcKernel.getSurfaceStateHeap(rootDeviceIndex), srcKernel.getSurfaceStateHeapSize(rootDeviceIndex), srcKernel.getNumberOfBindingTableStates(rootDeviceIndex), srcKernel.getBindingTableOffset(rootDeviceIndex)); } diff --git a/opencl/test/unit_test/helpers/per_thread_data_tests.cpp b/opencl/test/unit_test/helpers/per_thread_data_tests.cpp index 357e57eee8..d8da7e5f24 100644 --- a/opencl/test/unit_test/helpers/per_thread_data_tests.cpp +++ b/opencl/test/unit_test/helpers/per_thread_data_tests.cpp @@ -13,6 +13,7 @@ #include "opencl/source/helpers/per_thread_data.h" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" +#include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "patch_shared.h" @@ -26,22 +27,22 @@ struct PerThreadDataTests : public ClDeviceFixture, void SetUp() override { ClDeviceFixture::SetUp(); - threadPayload = {}; + SPatchThreadPayload threadPayload = {}; threadPayload.LocalIDXPresent = localIdX ? 1 : 0; threadPayload.LocalIDYPresent = localIdY ? 1 : 0; threadPayload.LocalIDZPresent = localIdZ ? 1 : 0; threadPayload.LocalIDFlattenedPresent = flattenedId; threadPayload.UnusedPerThreadConstantPresent = !(localIdX || localIdY || localIdZ || flattenedId); + populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload); - kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; + simd = 32; + kernelInfo.kernelDescriptor.kernelAttributes.simdSize = simd; kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa); - kernelInfo.patchInfo.threadPayload = &threadPayload; - - simd = kernelInfo.getMaxSimdSize(); + kernelInfo.kernelDescriptor.kernelAttributes.simdSize = kernelInfo.getMaxSimdSize(); numChannels = threadPayload.LocalIDXPresent + threadPayload.LocalIDYPresent + threadPayload.LocalIDZPresent; @@ -66,7 +67,6 @@ struct PerThreadDataTests : public ClDeviceFixture, size_t indirectHeapMemorySize; SKernelBinaryHeaderCommon kernelHeader; - SPatchThreadPayload threadPayload; KernelInfo kernelInfo; }; @@ -121,22 +121,20 @@ HWTEST_F(PerThreadDataXYZTests, Given2x4x8WhenSendingPerThreadDataThenCorrectAmo } HWTEST_F(PerThreadDataXYZTests, GivenDifferentSimdWhenGettingThreadPayloadSizeThenCorrectSizeIsReturned) { - simd = 32; - uint32_t size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; + uint32_t size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 2u * 3u, size); - simd = 16; - size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; + size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 3u, size); - simd = 16; - threadPayload.HeaderPresent = 1; - size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent = true; + size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 4u, size); - simd = 16; - threadPayload.UnusedPerThreadConstantPresent = 1; - size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent = true; + size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 5u, size); } @@ -181,39 +179,36 @@ HWTEST_F(PerThreadDataNoIdsTests, GivenThreadPaylodDataWithoutLocalIdsWhenSendin } HWTEST_F(PerThreadDataNoIdsTests, GivenSimdWhenGettingThreadPayloadSizeThenCorrectValueIsReturned) { - simd = 32; - uint32_t size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; + uint32_t size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize, size); - simd = 16; - size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; + size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize, size); - simd = 16; - threadPayload.HeaderPresent = 1; - size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent = true; + size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 2u, size); } typedef PerThreadDataTests PerThreadDataFlattenedIdsTests; HWTEST_F(PerThreadDataFlattenedIdsTests, GivenSimdWhenGettingThreadPayloadSizeThenCorrectValueIsReturned) { - simd = 32; - uint32_t size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; + uint32_t size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 2u, size); - simd = 16; - size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; + size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize, size); - simd = 16; - threadPayload.HeaderPresent = 1; - size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent = true; + size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 2u, size); - simd = 32; - threadPayload.HeaderPresent = 1; - size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); + kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; + size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 3u, size); } @@ -226,7 +221,7 @@ TEST(PerThreadDataTest, WhenSettingLocalIdsInPerThreadDataThenIdsAreSetInCorrect const std::array localWorkSizes = {{24, 1, 1}}; const std::array workgroupWalkOrder = {{0, 1, 2}}; - auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, numChannels, localWorkSize, grfSize); + auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize); auto sizeOverSizedBuffer = sizePerThreadDataTotal * 4; auto buffer = static_cast(alignedMalloc(sizeOverSizedBuffer, 16)); diff --git a/opencl/test/unit_test/kernel/debug_kernel_tests.cpp b/opencl/test/unit_test/kernel/debug_kernel_tests.cpp index 55f76d8a96..c29eec7d2f 100644 --- a/opencl/test/unit_test/kernel/debug_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/debug_kernel_tests.cpp @@ -16,15 +16,6 @@ using namespace NEO; -TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetDebugSurfaceBtiIsCalledThenCorrectValueIsReturned) { - auto device = std::make_unique(new MockDevice); - MockProgram program(toClDeviceVector(*device)); - program.enableKernelDebug(); - std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); - - EXPECT_EQ(0, kernel->getDebugSurfaceBti(device->getRootDeviceIndex())); -} - TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenCorrectValueIsReturned) { auto device = std::make_unique(new MockDevice); MockProgram program(toClDeviceVector(*device)); @@ -54,15 +45,6 @@ TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenQueryingIsKernelDebugEnable EXPECT_FALSE(kernel->isKernelDebugEnabled()); } -TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetDebugSurfaceBtiIsCalledThenInvalidIndexValueIsReturned) { - auto device = std::make_unique(new MockDevice); - MockProgram program(toClDeviceVector(*device)); - program.enableKernelDebug(); - std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); - - EXPECT_EQ(-1, kernel->getDebugSurfaceBti(device->getRootDeviceIndex())); -} - TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenZeroIsReturned) { auto device = std::make_unique(new MockDevice); MockProgram program(toClDeviceVector(*device)); diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index f01066b9fc..f186d95e62 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -83,8 +83,8 @@ struct MultiDeviceKernelArgBufferTest : public ::testing::Test { pKernelInfosStorage[i]->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfosStorage[i]->kernelArgInfo[0].isBuffer = true; - pKernelInfosStorage[i]->patchInfo.dataParameterStream = &dataParameterStream[i]; dataParameterStream[i].DataParameterStreamSize = (i + 1) * sizeof(void *); + populateKernelDescriptor(pKernelInfosStorage[i]->kernelDescriptor, dataParameterStream[i]); pKernelInfosStorage[i]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = i * sizeof(void *); pKernelInfosStorage[i]->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(void *); diff --git a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp index ab7b1710b1..8e29c7ea88 100644 --- a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp @@ -193,8 +193,7 @@ HWTEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithSetBindingTableStateAnd SPatchBindingTableState bindingTableStateInfo; bindingTableStateInfo.Offset = 0; bindingTableStateInfo.Count = 4; - - info.patchInfo.bindingTableState = &bindingTableStateInfo; + populateKernelDescriptor(info.kernelDescriptor, bindingTableStateInfo); BINDING_TABLE_STATE bindingTableState[4]; @@ -245,8 +244,7 @@ HWTEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBindingTableStateAndIma SPatchBindingTableState bindingTableStateInfo; bindingTableStateInfo.Offset = 0; bindingTableStateInfo.Count = 0; - - info.patchInfo.bindingTableState = &bindingTableStateInfo; + populateKernelDescriptor(info.kernelDescriptor, bindingTableStateInfo); BINDING_TABLE_STATE bindingTableState[1]; @@ -347,8 +345,9 @@ TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBufferAndDataParameterBuf dataParameterBuffer.SourceOffset = 0; dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; - info.patchInfo.dataParameterBuffersKernelArgs.push_back(&dataParameterBuffer); info.storeKernelArgument(&dataParameterBuffer); + info.kernelDescriptor.payloadMappings.explicitArgs.resize(1); + populateKernelArgDescriptor(info.kernelDescriptor, 0, dataParameterBuffer); std::vector curbeParams; uint64_t tokenMask = 0; @@ -659,14 +658,14 @@ TEST(KernelReflectionSurfaceTestSingle, GivenNoKernelArgsWhenObtainingKernelRefl SPatchDataParameterStream dataParameterStream; dataParameterStream.Size = 0; dataParameterStream.DataParameterStreamSize = 0; - info.patchInfo.dataParameterStream = &dataParameterStream; + populateKernelDescriptor(info.kernelDescriptor, dataParameterStream); SPatchBindingTableState bindingTableState; bindingTableState.Count = 0; bindingTableState.Offset = 0; bindingTableState.Size = 0; bindingTableState.SurfaceStateOffset = 0; - info.patchInfo.bindingTableState = &bindingTableState; + populateKernelDescriptor(info.kernelDescriptor, bindingTableState); KernelInfoContainer kernelInfos; kernelInfos.push_back(&info); @@ -712,14 +711,14 @@ TEST(KernelReflectionSurfaceTestSingle, GivenDeviceQueueKernelArgWhenObtainingKe SPatchDataParameterStream dataParameterStream; dataParameterStream.Size = 0; dataParameterStream.DataParameterStreamSize = 0; - info.patchInfo.dataParameterStream = &dataParameterStream; + populateKernelDescriptor(info.kernelDescriptor, dataParameterStream); SPatchBindingTableState bindingTableState; bindingTableState.Count = 0; bindingTableState.Offset = 0; bindingTableState.Size = 0; bindingTableState.SurfaceStateOffset = 0; - info.patchInfo.bindingTableState = &bindingTableState; + populateKernelDescriptor(info.kernelDescriptor, bindingTableState); KernelArgInfo argInfo; argInfo.isDeviceQueue = true; @@ -787,7 +786,7 @@ TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelReflectionSurfaceThenKerne blockCurbeParamCounts[i] = curbeParamsForBlock.size(); - maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize)); + maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast(pBlockInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize)); totalCurbeParamsSize += blockCurbeParamCounts[i]; size_t samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(pDevice->getHardwareInfo()); @@ -1234,21 +1233,21 @@ class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam curbeParams; }; @@ -1293,12 +1290,16 @@ TEST_P(ReflectionSurfaceHelperSetKernelDataTest, WhenSettingKernelDataThenDataAn std::tie(localIDPresent, privateSurfaceSize) = GetParam(); + SPatchThreadPayload threadPayload = {}; threadPayload.LocalIDFlattenedPresent = localIDPresent.flattend; threadPayload.LocalIDXPresent = localIDPresent.x; threadPayload.LocalIDYPresent = localIDPresent.y; threadPayload.LocalIDZPresent = localIDPresent.z; + populateKernelDescriptor(info.kernelDescriptor, threadPayload); + SPatchAllocateStatelessPrivateSurface privateSurface = {}; privateSurface.PerThreadPrivateMemorySize = privateSurfaceSize; + populateKernelDescriptor(info.kernelDescriptor, privateSurface); std::unique_ptr kernelDataMemory(new char[4096]); @@ -1351,8 +1352,6 @@ TEST_P(ReflectionSurfaceHelperSetKernelDataTest, WhenSettingKernelDataThenDataAn } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullThreadPayloadWhenSettingKernelDataThenDataAndOffsetsAreCorrect) { - info.patchInfo.threadPayload = nullptr; - std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; @@ -1380,11 +1379,11 @@ TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullThreadPayloadWhenSetti } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullPrivateSurfaceWhenSettingKernelDataThenDataAndOffsetsAreCorrect) { - info.patchInfo.pAllocateStatelessPrivateSurface = nullptr; - std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; + SPatchAllocateStatelessPrivateSurface patch = {}; + populateKernelDescriptor(info.kernelDescriptor, patch); uint64_t tokenMask = 1 | 2 | 4; @@ -1409,7 +1408,8 @@ TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullPrivateSurfaceWhenSett } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullSamplerStateWhenSettingKernelDataThenDataAndOffsetsAreCorrect) { - info.patchInfo.samplerStateArray = nullptr; + SPatchSamplerStateArray samplerStateArray = {}; + populateKernelDescriptor(info.kernelDescriptor, samplerStateArray); std::unique_ptr kernelDataMemory(new char[4096]); @@ -1462,8 +1462,6 @@ TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenDisabledConcurrentExecutio TEST_F(ReflectionSurfaceHelperFixture, GivenNullBindingTableWhenSettingKernelDataThenDataIsCorrectlySet) { KernelInfo info; - info.patchInfo.bindingTableState = nullptr; - std::unique_ptr kernelDataMemory(new char[200]); IGIL_KernelAddressData *kernalAddressData = reinterpret_cast(kernelDataMemory.get()); MockKernel::ReflectionSurfaceHelperPublic::setKernelAddressData(kernelDataMemory.get(), 0, 1, 2, 3, 4, 5, 6, info, pPlatform->getClDevice(0)->getHardwareInfo()); @@ -1482,8 +1480,7 @@ TEST_F(ReflectionSurfaceHelperFixture, GivenSetBindingTableWhenSettingKernelData SPatchBindingTableState bindingTableStateInfo; bindingTableStateInfo.Offset = 0; bindingTableStateInfo.Count = 4; - - info.patchInfo.bindingTableState = &bindingTableStateInfo; + populateKernelDescriptor(info.kernelDescriptor, bindingTableStateInfo); std::unique_ptr kernelDataMemory(new char[200]); IGIL_KernelAddressData *kernalAddressData = reinterpret_cast(kernelDataMemory.get()); @@ -1501,8 +1498,6 @@ TEST_F(ReflectionSurfaceHelperFixture, GivenSetBindingTableWhenSettingKernelData TEST_F(ReflectionSurfaceHelperFixture, WhenPatchingBlocksCurbeThenAddressesAreSetCorrectly) { KernelInfo info; - info.patchInfo.bindingTableState = nullptr; - std::unique_ptr refletionSurfaceMemory(new char[4096]); IGIL_KernelDataHeader *header = reinterpret_cast(refletionSurfaceMemory.get()); header->m_numberOfKernels = 2; @@ -1549,8 +1544,6 @@ TEST_F(ReflectionSurfaceHelperFixture, WhenPatchingBlocksCurbeThenAddressesAreSe TEST_F(ReflectionSurfaceHelperFixture, GivenUndefinedOffsetsWhenPatchingBlocksCurbeThenAddressesAreSetCorrectly) { KernelInfo info; - info.patchInfo.bindingTableState = nullptr; - std::unique_ptr refletionSurfaceMemory(new char[4096]); IGIL_KernelDataHeader *header = reinterpret_cast(refletionSurfaceMemory.get()); header->m_numberOfKernels = 2; @@ -1948,7 +1941,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryWh auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); - uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset; + uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); @@ -1982,7 +1975,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryAn auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); - uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset; + uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); EXPECT_EQ(0u, *pCurbe); @@ -2015,7 +2008,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemory auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); - uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset; + uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset); uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); @@ -2058,7 +2051,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemory auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); - uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset; + uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset); uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); @@ -2093,14 +2086,14 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenNoKernelArgsWhenObtainingKernelRefl SPatchDataParameterStream dataParameterStream; dataParameterStream.Size = 0; dataParameterStream.DataParameterStreamSize = 0; - info.patchInfo.dataParameterStream = &dataParameterStream; + populateKernelDescriptor(info.kernelDescriptor, dataParameterStream); SPatchBindingTableState bindingTableState; bindingTableState.Count = 0; bindingTableState.Offset = 0; bindingTableState.Size = 0; bindingTableState.SurfaceStateOffset = 0; - info.patchInfo.bindingTableState = &bindingTableState; + populateKernelDescriptor(info.kernelDescriptor, bindingTableState); auto rootDeviceIndex = device1->getRootDeviceIndex(); KernelInfoContainer kernelInfos; @@ -2148,14 +2141,14 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenDeviceQueueKernelArgWhenObtainingKe SPatchDataParameterStream dataParameterStream; dataParameterStream.Size = 0; dataParameterStream.DataParameterStreamSize = 0; - info.patchInfo.dataParameterStream = &dataParameterStream; + populateKernelDescriptor(info.kernelDescriptor, dataParameterStream); SPatchBindingTableState bindingTableState; bindingTableState.Count = 0; bindingTableState.Offset = 0; bindingTableState.Size = 0; bindingTableState.SurfaceStateOffset = 0; - info.patchInfo.bindingTableState = &bindingTableState; + populateKernelDescriptor(info.kernelDescriptor, bindingTableState); KernelArgInfo argInfo; argInfo.isDeviceQueue = true; diff --git a/opencl/test/unit_test/kernel/kernel_slm_tests.cpp b/opencl/test/unit_test/kernel/kernel_slm_tests.cpp index 3dbfdf38a9..95771c5020 100644 --- a/opencl/test/unit_test/kernel/kernel_slm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_slm_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -22,21 +22,22 @@ struct KernelSLMAndBarrierTest : public ClDeviceFixture, ClDeviceFixture::SetUp(); program = std::make_unique(toClDeviceVector(*pClDevice)); + SPatchDataParameterStream dataParameterStream = {}; memset(&dataParameterStream, 0, sizeof(dataParameterStream)); dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData); + populateKernelDescriptor(kernelInfo.kernelDescriptor, dataParameterStream); + SPatchThreadPayload threadPayload; memset(&threadPayload, 0, sizeof(threadPayload)); threadPayload.LocalIDXPresent = 1; threadPayload.LocalIDYPresent = 1; threadPayload.LocalIDZPresent = 1; + populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload); kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa); - kernelInfo.patchInfo.dataParameterStream = &dataParameterStream; kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; - - kernelInfo.patchInfo.threadPayload = &threadPayload; } void TearDown() override { ClDeviceFixture::TearDown(); @@ -48,8 +49,6 @@ struct KernelSLMAndBarrierTest : public ClDeviceFixture, std::unique_ptr program; SKernelBinaryHeaderCommon kernelHeader; - SPatchDataParameterStream dataParameterStream; - SPatchThreadPayload threadPayload; KernelInfo kernelInfo; uint32_t kernelIsa[32]; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 8537574a87..426c94d926 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -422,23 +422,6 @@ TEST_F(KernelFromBinaryTests, givenArgumentDeclaredAsConstantWhenKernelIsCreated EXPECT_TRUE(pKernelInfo->kernelArgInfo[0].isReadOnly); } -TEST(PatchInfo, WhenPatchInfoIsCreatedThenMembersAreNullptr) { - PatchInfo patchInfo; - EXPECT_EQ(nullptr, patchInfo.interfaceDescriptorDataLoad); - EXPECT_EQ(nullptr, patchInfo.localsurface); - EXPECT_EQ(nullptr, patchInfo.mediavfestate); - EXPECT_EQ(nullptr, patchInfo.mediaVfeStateSlot1); - EXPECT_EQ(nullptr, patchInfo.interfaceDescriptorData); - EXPECT_EQ(nullptr, patchInfo.samplerStateArray); - EXPECT_EQ(nullptr, patchInfo.bindingTableState); - EXPECT_EQ(nullptr, patchInfo.dataParameterStream); - EXPECT_EQ(nullptr, patchInfo.threadPayload); - EXPECT_EQ(nullptr, patchInfo.pKernelAttributesInfo); - EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrivateSurface); - EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization); - EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization); -} - typedef Test KernelPrivateSurfaceTest; typedef Test KernelGlobalSurfaceTest; typedef Test KernelConstantSurfaceTest; @@ -546,11 +529,11 @@ TEST_F(KernelPrivateSurfaceTest, WhenChangingResidencyThenCsrResidencySizeIsUpda tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenSPS); SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; - pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenDPS); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; @@ -583,11 +566,11 @@ TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWh tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenSPS); SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; - pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenDPS); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; @@ -622,11 +605,11 @@ TEST_F(KernelPrivateSurfaceTest, WhenPrivateSurfaceAllocationFailsThenOutOfResou tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenSPS); SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; - pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenDPS); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; @@ -663,11 +646,11 @@ TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateS tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 4; tokenSPS.PerThreadPrivateMemorySize = 112; - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenSPS); SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; - pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenDPS); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; @@ -692,13 +675,12 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup constant memory - SPatchAllocateStatelessPrivateSurface AllocateStatelessPrivateMemorySurface; - AllocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 0; - AllocateStatelessPrivateMemorySurface.DataParamOffset = 0; - AllocateStatelessPrivateMemorySurface.DataParamSize = 8; - AllocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 16; - - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &AllocateStatelessPrivateMemorySurface; + SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateMemorySurface; + allocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 0; + allocateStatelessPrivateMemorySurface.DataParamOffset = 0; + allocateStatelessPrivateMemorySurface.DataParamSize = 8; + allocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 16; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessPrivateMemorySurface); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); @@ -724,7 +706,7 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->SurfaceStateHeapOffset)); + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); @@ -774,17 +756,18 @@ TEST_F(KernelPrivateSurfaceTest, givenNonNullDataParameterStreamWhenGettingConst SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; - pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenDPS); EXPECT_EQ(64u, pKernelInfo->getConstantBufferSize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4ThenReturnOutOfResources) { - auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); - pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); - auto pKernelInfo = std::make_unique(); - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); + + SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateSurface = {}; + allocateStatelessPrivateSurface.PerThreadPrivateMemorySize = std::numeric_limits::max(); + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessPrivateSurface); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; MockContext context; @@ -798,11 +781,12 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4And32BitAllocationsThenReturnOutOfResources) { - auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); - pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); - auto pKernelInfo = std::make_unique(); - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); + + SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateSurface = {}; + allocateStatelessPrivateSurface.PerThreadPrivateMemorySize = std::numeric_limits::max(); + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessPrivateSurface); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; MockContext context; @@ -816,11 +800,12 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize8And32BitAllocationsThenReturnOutOfResources) { - auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); - pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); - auto pKernelInfo = std::make_unique(); - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); + + SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateSurface = {}; + allocateStatelessPrivateSurface.PerThreadPrivateMemorySize = std::numeric_limits::max(); + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessPrivateSurface); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; MockContext context; @@ -839,15 +824,14 @@ TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalS auto pKernelInfo = std::make_unique(); // setup global memory - SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; - AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; - AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; + SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization allocateStatelessGlobalMemorySurfaceWithInitialization; + allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; + allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessGlobalMemorySurfaceWithInitialization); SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; - pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tempSPatchDataParameterStream); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; @@ -878,15 +862,14 @@ TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalS auto pKernelInfo = std::make_unique(); // setup global memory - SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; - AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; - AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; + SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization allocateStatelessGlobalMemorySurfaceWithInitialization; + allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; + allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessGlobalMemorySurfaceWithInitialization); SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; - pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tempSPatchDataParameterStream); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; @@ -917,12 +900,11 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup global memory - SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; - AllocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; - AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; - AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; + SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization allocateStatelessGlobalMemorySurfaceWithInitialization; + allocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; + allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; + allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessGlobalMemorySurfaceWithInitialization); char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); @@ -951,7 +933,7 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), - pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->SurfaceStateHeapOffset)); + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); @@ -995,15 +977,14 @@ TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConst auto pKernelInfo = std::make_unique(); // setup constant memory - SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; - AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; - AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; + SPatchAllocateStatelessConstantMemorySurfaceWithInitialization allocateStatelessConstantMemorySurfaceWithInitialization; + allocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; + allocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessConstantMemorySurfaceWithInitialization); SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; - pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tempSPatchDataParameterStream); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; @@ -1033,15 +1014,14 @@ TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConst auto pKernelInfo = std::make_unique(); // setup constant memory - SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; - AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; - AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; + SPatchAllocateStatelessConstantMemorySurfaceWithInitialization allocateStatelessConstantMemorySurfaceWithInitialization; + allocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; + allocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessConstantMemorySurfaceWithInitialization); SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; - pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tempSPatchDataParameterStream); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; @@ -1071,12 +1051,11 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup constant memory - SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; - AllocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; - AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; - AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; - - pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; + SPatchAllocateStatelessConstantMemorySurfaceWithInitialization allocateStatelessConstantMemorySurfaceWithInitialization; + allocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; + allocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; + allocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessConstantMemorySurfaceWithInitialization); char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); @@ -1105,7 +1084,7 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), - pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->SurfaceStateHeapOffset)); + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); @@ -2461,7 +2440,7 @@ struct KernelCrossThreadTests : Test { pKernelInfo = std::make_unique(); ASSERT_NE(nullptr, pKernelInfo); - pKernelInfo->patchInfo.dataParameterStream = &patchDataParameterStream; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, patchDataParameterStream); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; } @@ -2628,12 +2607,11 @@ TEST_F(KernelCrossThreadTests, GivenSlmStatisSizeWhenCreatingKernelThenSlmTotalS delete kernel; } TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCurbeIsPatchedProperly) { - SPatchAllocateStatelessPrivateSurface allocatePrivate; allocatePrivate.DataParamSize = 8; allocatePrivate.DataParamOffset = 0; allocatePrivate.PerThreadPrivateMemorySize = 1; - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &allocatePrivate; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocatePrivate); MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); @@ -2694,15 +2672,12 @@ TEST_F(KernelCrossThreadTests, WhenPatchingBlocksSimdSizeThenSimdSizeIsPatchedCo TEST(KernelInfoTest, WhenPatchingBorderColorOffsetThenPatchIsAppliedCorrectly) { KernelInfo info; - SPatchSamplerStateArray samplerState; - samplerState.BorderColorOffset = 3; - - info.patchInfo.samplerStateArray = nullptr; - EXPECT_EQ(0u, info.getBorderColorOffset()); - info.patchInfo.samplerStateArray = &samplerState; - + SPatchSamplerStateArray samplerState = {}; + samplerState.BorderColorOffset = 3; + samplerState.Count = 1; + populateKernelDescriptor(info.kernelDescriptor, samplerState); EXPECT_EQ(3u, info.getBorderColorOffset()); } @@ -3141,12 +3116,14 @@ TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivat auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals mockKernel(*device); + SPatchMediaVFEState mediaVFEstate; + mediaVFEstate.PerThreadScratchSpace = 512u; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0); + SPatchMediaVFEState mediaVFEstateSlot1; mediaVFEstateSlot1.PerThreadScratchSpace = 1024u; - mediaVFEstate.PerThreadScratchSpace = 512u; - mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; - mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstateSlot1; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstateSlot1, 1); EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize(device->getRootDeviceIndex())); } @@ -3155,7 +3132,6 @@ TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateSc auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals mockKernel(*device); - mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = nullptr; EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize(device->getRootDeviceIndex())); } @@ -3174,6 +3150,31 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitS EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size()); } +TEST(KernelTest, givenKernelWithPatchInfoCollecitonEnabledAndArgumentWithInvalidCrossThreadDataOffsetWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) { + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockKernelWithInternals kernel(*device); + MockGraphicsAllocation mockAllocation; + ArgDescPointer arg; + uint64_t ptr = 0; + kernel.mockKernel->patchWithImplicitSurface(&ptr, mockAllocation, device->getDevice(), arg); + EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); +} + +TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledAndValidArgumentWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) { + DebugManagerStateRestore restore; + DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); + + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockKernelWithInternals kernel(*device); + MockGraphicsAllocation mockAllocation; + ArgDescPointer arg; + arg.stateless = 0; + uint64_t crossThreadData = 0; + EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); + kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, device->getDevice(), arg); + EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size()); +} + TEST(KernelTest, givenKernelWithPatchInfoCollectionDisabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); @@ -3234,7 +3235,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse SPatchThreadPayload threadPayload = {}; threadPayload.OffsetToSkipPerThreadDataLoad = 128u; - mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, threadPayload); mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); @@ -3253,7 +3254,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet SPatchThreadPayload threadPayload = {}; threadPayload.OffsetToSkipPerThreadDataLoad = 128u; - mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, threadPayload); mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); @@ -3272,7 +3273,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe SPatchThreadPayload threadPayload = {}; threadPayload.OffsetToSkipPerThreadDataLoad = 128u; - mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; + populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, threadPayload); mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); @@ -3395,7 +3396,7 @@ TEST_F(KernelMultiRootDeviceTest, givenKernelWithPrivateSurfaceWhenInitializeThe tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; - pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; + populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenSPS); KernelInfoContainer kernelInfos; kernelInfos.resize(deviceFactory->rootDevices.size()); diff --git a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp index f8638175c5..c87a7542dc 100644 --- a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp @@ -132,50 +132,46 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsA crossThreadOffsetBlock += 8; - auto privateSurfaceBlock = std::make_unique(); - privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock; - privateSurfaceBlock->DataParamSize = 8; - privateSurfaceBlock->Size = 8; - privateSurfaceBlock->SurfaceStateHeapOffset = 0; - privateSurfaceBlock->Token = 0; - privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; - infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock.get(); + SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {}; + privateSurfaceBlock.DataParamOffset = crossThreadOffsetBlock; + privateSurfaceBlock.DataParamSize = 8; + privateSurfaceBlock.Size = 8; + privateSurfaceBlock.SurfaceStateHeapOffset = 0; + privateSurfaceBlock.Token = 0; + privateSurfaceBlock.PerThreadPrivateMemorySize = 1000; + populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock); crossThreadOffsetBlock += 8; - SPatchThreadPayload *threadPayloadBlock = new SPatchThreadPayload; - threadPayloadBlock->LocalIDXPresent = 0; - threadPayloadBlock->LocalIDYPresent = 0; - threadPayloadBlock->LocalIDZPresent = 0; - threadPayloadBlock->HeaderPresent = 0; - threadPayloadBlock->Size = 128; - - infoBlock->patchInfo.threadPayload = threadPayloadBlock; + SPatchThreadPayload threadPayloadBlock = {}; + threadPayloadBlock.LocalIDXPresent = 0; + threadPayloadBlock.LocalIDYPresent = 0; + threadPayloadBlock.LocalIDZPresent = 0; + threadPayloadBlock.HeaderPresent = 0; + threadPayloadBlock.Size = 128; + populateKernelDescriptor(infoBlock->kernelDescriptor, threadPayloadBlock); infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true; - SPatchDataParameterStream *streamBlock = new SPatchDataParameterStream; - streamBlock->DataParameterStreamSize = 0; - streamBlock->Size = 0; - infoBlock->patchInfo.dataParameterStream = streamBlock; + SPatchDataParameterStream streamBlock = {}; + streamBlock.DataParameterStreamSize = 0; + streamBlock.Size = 0; + populateKernelDescriptor(infoBlock->kernelDescriptor, streamBlock); - SPatchBindingTableState *bindingTable = new SPatchBindingTableState; - bindingTable->Count = 0; - bindingTable->Offset = 0; - bindingTable->Size = 0; - bindingTable->SurfaceStateOffset = 0; - infoBlock->patchInfo.bindingTableState = bindingTable; + SPatchBindingTableState bindingTable = {}; + bindingTable.Count = 0; + bindingTable.Offset = 0; + bindingTable.Size = 0; + bindingTable.SurfaceStateOffset = 0; + populateKernelDescriptor(infoBlock->kernelDescriptor, bindingTable); - SPatchInterfaceDescriptorData *idData = new SPatchInterfaceDescriptorData; - idData->BindingTableOffset = 0; - idData->KernelOffset = 0; - idData->Offset = 0; - idData->SamplerStateOffset = 0; - idData->Size = 0; - infoBlock->patchInfo.interfaceDescriptorData = idData; - - infoBlock->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr; - infoBlock->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr; + SPatchInterfaceDescriptorData idData = {}; + idData.BindingTableOffset = 0; + idData.KernelOffset = 0; + idData.Offset = 0; + idData.SamplerStateOffset = 0; + idData.Size = 0; + populateKernelDescriptor(infoBlock->kernelDescriptor, idData); infoBlock->heapInfo.pDsh = (void *)new uint64_t[64]; infoBlock->crossThreadData = new char[crossThreadOffsetBlock]; diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index b5dd3eaadc..5853a282b3 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -25,11 +25,24 @@ #include namespace NEO { +void populateKernelArgDescriptor(KernelDescriptor &dst, size_t argNum, const SPatchDataParameterBuffer &token); void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessPrintfSurface &token); void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnvironment &execEnv); void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessEventPoolSurface &token); void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessDefaultDeviceQueueSurface &token); void populateKernelDescriptor(KernelDescriptor &dst, const SPatchString &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateSystemThreadSurface &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateLocalSurface &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchInterfaceDescriptorData &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchMediaVFEState &token, uint32_t slot); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchBindingTableState &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchThreadPayload &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchDataParameterStream &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessPrivateSurface &token); +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateSyncBuffer &token); struct MockKernelObjForAuxTranslation : public KernelObjForAuxTranslation { MockKernelObjForAuxTranslation(Type type) : KernelObjForAuxTranslation(type, nullptr) { @@ -147,7 +160,6 @@ class MockKernel : public Kernel { } if (kernelInfoAllocated) { - delete kernelInfoAllocated->patchInfo.threadPayload; delete kernelInfoAllocated; } } @@ -162,14 +174,13 @@ class MockKernel : public Kernel { auto info = new KernelInfo(); const size_t crossThreadSize = 160; - SPatchThreadPayload *threadPayload = new SPatchThreadPayload; - threadPayload->LocalIDXPresent = 0; - threadPayload->LocalIDYPresent = 0; - threadPayload->LocalIDZPresent = 0; - threadPayload->HeaderPresent = 0; - threadPayload->Size = 128; - - info->patchInfo.threadPayload = threadPayload; + SPatchThreadPayload threadPayload = {}; + threadPayload.LocalIDXPresent = 0; + threadPayload.LocalIDYPresent = 0; + threadPayload.LocalIDZPresent = 0; + threadPayload.HeaderPresent = 0; + threadPayload.Size = 128; + populateKernelDescriptor(info->kernelDescriptor, threadPayload); info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = false; info->kernelDescriptor.kernelAttributes.numGrfRequired = grfNumber; @@ -214,6 +225,8 @@ class MockKernel : public Kernel { } if (newCrossThreadDataSize == 0) { + kernelDeviceInfos[rootDeviceIndex].crossThreadData = nullptr; + kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = 0; return; } kernelDeviceInfos[rootDeviceIndex].crossThreadData = mockCrossThreadDatas[rootDeviceIndex].data(); @@ -300,26 +313,27 @@ class MockKernelWithInternals { public: MockKernelWithInternals(const ClDeviceVector &deviceVector, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) { memset(&kernelHeader, 0, sizeof(SKernelBinaryHeaderCommon)); - memset(&threadPayload, 0, sizeof(SPatchThreadPayload)); memset(&dataParameterStream, 0, sizeof(SPatchDataParameterStream)); memset(&mediaVfeState, 0, sizeof(SPatchMediaVFEState)); memset(&mediaVfeStateSlot1, 0, sizeof(SPatchMediaVFEState)); + memset(&threadPayload, 0, sizeof(SPatchThreadPayload)); threadPayload.LocalIDXPresent = 1; threadPayload.LocalIDYPresent = 1; threadPayload.LocalIDZPresent = 1; + kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.pSsh = sshLocal; kernelInfo.heapInfo.pDsh = dshLocal; kernelInfo.heapInfo.SurfaceStateHeapSize = sizeof(sshLocal); - kernelInfo.patchInfo.dataParameterStream = &dataParameterStream; + populateKernelDescriptor(kernelInfo.kernelDescriptor, dataParameterStream); populateKernelDescriptor(kernelInfo.kernelDescriptor, execEnv); kernelInfo.kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; - kernelInfo.patchInfo.threadPayload = &threadPayload; - kernelInfo.patchInfo.mediavfestate = &mediaVfeState; - kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVfeStateSlot1; + populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload); + populateKernelDescriptor(kernelInfo.kernelDescriptor, mediaVfeState, 0); + populateKernelDescriptor(kernelInfo.kernelDescriptor, mediaVfeStateSlot1, 1); if (context == nullptr) { mockContext = new MockContext; @@ -432,14 +446,13 @@ class MockParentKernel : public Kernel { uint32_t crossThreadOffset = 0; uint32_t crossThreadOffsetBlock = 0; - SPatchThreadPayload *threadPayload = new SPatchThreadPayload; - threadPayload->LocalIDXPresent = 0; - threadPayload->LocalIDYPresent = 0; - threadPayload->LocalIDZPresent = 0; - threadPayload->HeaderPresent = 0; - threadPayload->Size = 128; - - info->patchInfo.threadPayload = threadPayload; + SPatchThreadPayload threadPayload = {}; + threadPayload.LocalIDXPresent = 0; + threadPayload.LocalIDYPresent = 0; + threadPayload.LocalIDZPresent = 0; + threadPayload.HeaderPresent = 0; + threadPayload.Size = 128; + populateKernelDescriptor(info->kernelDescriptor, threadPayload); info->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true; @@ -523,63 +536,57 @@ class MockParentKernel : public Kernel { crossThreadOffsetBlock += 8; } - infoBlock->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr; - infoBlock->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr; - if (addChildGlobalMemory) { - SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *globalMemoryBlock = new SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization; - globalMemoryBlock->DataParamOffset = crossThreadOffsetBlock; - globalMemoryBlock->DataParamSize = 8; - globalMemoryBlock->Size = 8; - globalMemoryBlock->SurfaceStateHeapOffset = 0; - globalMemoryBlock->Token = 0; - infoBlock->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = globalMemoryBlock; + SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization globalMemoryBlock = {}; + globalMemoryBlock.DataParamOffset = crossThreadOffsetBlock; + globalMemoryBlock.DataParamSize = 8; + globalMemoryBlock.Size = 8; + globalMemoryBlock.SurfaceStateHeapOffset = 0; + populateKernelDescriptor(infoBlock->kernelDescriptor, globalMemoryBlock); crossThreadOffsetBlock += 8; } if (addChildConstantMemory) { - SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *constantMemoryBlock = new SPatchAllocateStatelessConstantMemorySurfaceWithInitialization; - constantMemoryBlock->DataParamOffset = crossThreadOffsetBlock; - constantMemoryBlock->DataParamSize = 8; - constantMemoryBlock->Size = 8; - constantMemoryBlock->SurfaceStateHeapOffset = 0; - constantMemoryBlock->Token = 0; - infoBlock->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = constantMemoryBlock; + SPatchAllocateStatelessConstantMemorySurfaceWithInitialization constantMemoryBlock = {}; + constantMemoryBlock.DataParamOffset = crossThreadOffsetBlock; + constantMemoryBlock.DataParamSize = 8; + constantMemoryBlock.Size = 8; + constantMemoryBlock.SurfaceStateHeapOffset = 0; + populateKernelDescriptor(infoBlock->kernelDescriptor, constantMemoryBlock); crossThreadOffsetBlock += 8; } - SPatchThreadPayload *threadPayloadBlock = new SPatchThreadPayload; - threadPayloadBlock->LocalIDXPresent = 0; - threadPayloadBlock->LocalIDYPresent = 0; - threadPayloadBlock->LocalIDZPresent = 0; - threadPayloadBlock->HeaderPresent = 0; - threadPayloadBlock->Size = 128; - - infoBlock->patchInfo.threadPayload = threadPayloadBlock; + SPatchThreadPayload threadPayloadBlock = {}; + threadPayloadBlock.LocalIDXPresent = 0; + threadPayloadBlock.LocalIDYPresent = 0; + threadPayloadBlock.LocalIDZPresent = 0; + threadPayloadBlock.HeaderPresent = 0; + threadPayloadBlock.Size = 128; + populateKernelDescriptor(infoBlock->kernelDescriptor, threadPayloadBlock); infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true; infoBlock->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; infoBlock->kernelDescriptor.kernelAttributes.simdSize = 32; - SPatchDataParameterStream *streamBlock = new SPatchDataParameterStream; - streamBlock->DataParameterStreamSize = 0; - streamBlock->Size = 0; - infoBlock->patchInfo.dataParameterStream = streamBlock; + SPatchDataParameterStream streamBlock = {}; + streamBlock.DataParameterStreamSize = 0; + streamBlock.Size = 0; + populateKernelDescriptor(infoBlock->kernelDescriptor, streamBlock); - SPatchBindingTableState *bindingTable = new SPatchBindingTableState; - bindingTable->Count = 0; - bindingTable->Offset = 0; - bindingTable->Size = 0; - bindingTable->SurfaceStateOffset = 0; - infoBlock->patchInfo.bindingTableState = bindingTable; + SPatchBindingTableState bindingTable = {}; + bindingTable.Count = 0; + bindingTable.Offset = 0; + bindingTable.Size = 0; + bindingTable.SurfaceStateOffset = 0; + populateKernelDescriptor(infoBlock->kernelDescriptor, bindingTable); - SPatchInterfaceDescriptorData *idData = new SPatchInterfaceDescriptorData; - idData->BindingTableOffset = 0; - idData->KernelOffset = 0; - idData->Offset = 0; - idData->SamplerStateOffset = 0; - idData->Size = 0; - infoBlock->patchInfo.interfaceDescriptorData = idData; + SPatchInterfaceDescriptorData idData = {}; + idData.BindingTableOffset = 0; + idData.KernelOffset = 0; + idData.Offset = 0; + idData.SamplerStateOffset = 0; + idData.Size = 0; + populateKernelDescriptor(infoBlock->kernelDescriptor, idData); infoBlock->heapInfo.pDsh = (void *)new uint64_t[64]; infoBlock->crossThreadData = new char[crossThreadOffsetBlock > crossThreadSize ? crossThreadOffsetBlock : crossThreadSize]; @@ -599,18 +606,11 @@ class MockParentKernel : public Kernel { continue; } auto &kernelInfo = *pKernelInfo; - delete kernelInfo.patchInfo.threadPayload; delete &kernelInfo; BlockKernelManager *blockManager = program->getBlockKernelManager(); for (uint32_t i = 0; i < blockManager->getCount(); i++) { const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i); - delete blockInfo->patchInfo.threadPayload; - delete blockInfo->patchInfo.dataParameterStream; - delete blockInfo->patchInfo.bindingTableState; - delete blockInfo->patchInfo.interfaceDescriptorData; - delete blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization; - delete blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization; delete[](uint64_t *) blockInfo->heapInfo.pDsh; } } @@ -640,28 +640,16 @@ class MockSchedulerKernel : public SchedulerKernel { class MockDebugKernel : public MockKernel { public: MockDebugKernel(Program *program, KernelInfoContainer &kernelInfos) : MockKernel(program, kernelInfos) { - if (!kernelInfos[0]->patchInfo.pAllocateSystemThreadSurface) { - SPatchAllocateSystemThreadSurface *patchToken = new SPatchAllocateSystemThreadSurface; - - patchToken->BTI = 0; - patchToken->Offset = 0; - patchToken->PerThreadSystemThreadSurfaceSize = MockDebugKernel::perThreadSystemThreadSurfaceSize; - patchToken->Size = sizeof(SPatchAllocateSystemThreadSurface); - patchToken->Token = iOpenCL::PATCH_TOKEN_ALLOCATE_SIP_SURFACE; - - const_cast(kernelInfos[0])->patchInfo.pAllocateSystemThreadSurface = patchToken; - - systemThreadSurfaceAllocated = true; + if (!isValidOffset(kernelInfos[0]->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) { + SPatchAllocateSystemThreadSurface allocateSystemThreadSurface = {}; + allocateSystemThreadSurface.Offset = 0; + allocateSystemThreadSurface.PerThreadSystemThreadSurfaceSize = MockDebugKernel::perThreadSystemThreadSurfaceSize; + populateKernelDescriptor(const_cast(kernelInfos[0]->kernelDescriptor), allocateSystemThreadSurface); } } - ~MockDebugKernel() override { - if (systemThreadSurfaceAllocated) { - delete kernelInfos[0]->patchInfo.pAllocateSystemThreadSurface; - } - } + ~MockDebugKernel() override {} static const uint32_t perThreadSystemThreadSurfaceSize; - bool systemThreadSurfaceAllocated = false; }; } // namespace NEO diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index 68775cb3c0..2a1f36958a 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -41,15 +41,16 @@ struct ProfilingTests : public CommandEnqueueFixture, kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; + SPatchThreadPayload threadPayload = {}; memset(&threadPayload, 0, sizeof(threadPayload)); threadPayload.LocalIDXPresent = 1; threadPayload.LocalIDYPresent = 1; threadPayload.LocalIDZPresent = 1; + populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload); kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa); - kernelInfo.patchInfo.dataParameterStream = &dataParameterStream; - kernelInfo.patchInfo.threadPayload = &threadPayload; + populateKernelDescriptor(kernelInfo.kernelDescriptor, dataParameterStream); } void TearDown() override { @@ -60,7 +61,6 @@ struct ProfilingTests : public CommandEnqueueFixture, SKernelBinaryHeaderCommon kernelHeader = {}; SPatchDataParameterStream dataParameterStream = {}; - SPatchThreadPayload threadPayload = {}; KernelInfo kernelInfo; MockContext ctx; diff --git a/opencl/test/unit_test/program/kernel_data.cpp b/opencl/test/unit_test/program/kernel_data.cpp index 7ef0b573dc..6ad78ce4d8 100644 --- a/opencl/test/unit_test/program/kernel_data.cpp +++ b/opencl/test/unit_test/program/kernel_data.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/string.h" #include "opencl/source/platform/platform.h" @@ -38,20 +39,6 @@ TEST_F(KernelDataTest, GivenHeapsWhenBuildingThenProgramIsCorrect) { buildAndDecode(); } -TEST_F(KernelDataTest, GivenMediaInterfaceDescriptorLoadWhenBuildingThenProgramIsCorrect) { - iOpenCL::SPatchMediaInterfaceDescriptorLoad mediaIdLoad; - mediaIdLoad.Token = PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD; - mediaIdLoad.Size = sizeof(SPatchMediaInterfaceDescriptorLoad); - mediaIdLoad.InterfaceDescriptorDataOffset = 0xabcd; - - pPatchList = &mediaIdLoad; - patchListSize = mediaIdLoad.Size; - - buildAndDecode(); - - EXPECT_EQ_CONST(PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD, pKernelInfo->patchInfo.interfaceDescriptorDataLoad->Token); -} - TEST_F(KernelDataTest, GivenAllocateLocalSurfaceWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchAllocateLocalSurface allocateLocalSurface; allocateLocalSurface.Token = PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE; @@ -64,8 +51,7 @@ TEST_F(KernelDataTest, GivenAllocateLocalSurfaceWhenBuildingThenProgramIsCorrect buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE, pKernelInfo->patchInfo.localsurface->Token); - EXPECT_EQ_VAL(allocateLocalSurface.TotalInlineLocalMemorySize, pKernelInfo->patchInfo.localsurface->TotalInlineLocalMemorySize); + EXPECT_EQ_VAL(allocateLocalSurface.TotalInlineLocalMemorySize, pKernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize); } TEST_F(KernelDataTest, GivenAllocateStatelessConstantMemoryWithInitWhenBuildingThenProgramIsCorrect) { @@ -81,8 +67,7 @@ TEST_F(KernelDataTest, GivenAllocateStatelessConstantMemoryWithInitWhenBuildingT buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION, pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->Token); - EXPECT_EQ_VAL(0xddu, pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->SurfaceStateHeapOffset); + EXPECT_EQ_VAL(0xddu, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful); } TEST_F(KernelDataTest, GivenAllocateStatelessGlobalMemoryWithInitWhenBuildingThenProgramIsCorrect) { @@ -98,8 +83,7 @@ TEST_F(KernelDataTest, GivenAllocateStatelessGlobalMemoryWithInitWhenBuildingThe buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION, pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->Token); - EXPECT_EQ_VAL(0xddu, pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->SurfaceStateHeapOffset); + EXPECT_EQ_VAL(0xddu, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindful); } TEST_F(KernelDataTest, GivenPrintfStringWhenBuildingThenProgramIsCorrect) { @@ -146,9 +130,7 @@ TEST_F(KernelDataTest, GivenMediaVfeStateWhenBuildingThenProgramIsCorrect) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_MEDIA_VFE_STATE, pKernelInfo->patchInfo.mediavfestate->Token); - EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->patchInfo.mediavfestate->PerThreadScratchSpace); - EXPECT_EQ_VAL(MediaVFEState.ScratchSpaceOffset, pKernelInfo->patchInfo.mediavfestate->ScratchSpaceOffset); + EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); } TEST_F(KernelDataTest, WhenMediaVfeStateSlot1TokenIsParsedThenCorrectValuesAreSet) { @@ -163,9 +145,7 @@ TEST_F(KernelDataTest, WhenMediaVfeStateSlot1TokenIsParsedThenCorrectValuesAreSe buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1, pKernelInfo->patchInfo.mediaVfeStateSlot1->Token); - EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace); - EXPECT_EQ_VAL(MediaVFEState.ScratchSpaceOffset, pKernelInfo->patchInfo.mediaVfeStateSlot1->ScratchSpaceOffset); + EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); } TEST_F(KernelDataTest, GivenSyncBufferTokenWhenParsingProgramThenTokenIsFound) { @@ -181,27 +161,10 @@ TEST_F(KernelDataTest, GivenSyncBufferTokenWhenParsingProgramThenTokenIsFound) { buildAndDecode(); - EXPECT_EQ(token.Token, pKernelInfo->patchInfo.pAllocateSyncBuffer->Token); - EXPECT_EQ(token.SurfaceStateHeapOffset, pKernelInfo->patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset); - EXPECT_EQ(token.DataParamOffset, pKernelInfo->patchInfo.pAllocateSyncBuffer->DataParamOffset); - EXPECT_EQ(token.DataParamSize, pKernelInfo->patchInfo.pAllocateSyncBuffer->DataParamSize); -} - -TEST_F(KernelDataTest, GivenMediaInterfaceDescriptorDataWhenBuildingThenProgramIsCorrect) { - iOpenCL::SPatchInterfaceDescriptorData idData; - idData.Token = PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA; - idData.Size = sizeof(SPatchInterfaceDescriptorData); - idData.BindingTableOffset = 0xaa; - idData.KernelOffset = 0xbb; - idData.Offset = 0xcc; - idData.SamplerStateOffset = 0xdd; - - pPatchList = &idData; - patchListSize = idData.Size; - - buildAndDecode(); - - EXPECT_EQ_CONST(PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA, pKernelInfo->patchInfo.interfaceDescriptorData->Token); + EXPECT_TRUE(pKernelInfo->kernelDescriptor.kernelAttributes.flags.usesSyncBuffer); + EXPECT_EQ(token.SurfaceStateHeapOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindful); + EXPECT_EQ(token.DataParamOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.stateless); + EXPECT_EQ(token.DataParamSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.pointerSize); } TEST_F(KernelDataTest, GivenSamplerArgumentWhenBuildingThenProgramIsCorrect) { @@ -251,45 +214,22 @@ TEST_F(KernelDataTest, GivenBindingTableStateWhenBuildingThenProgramIsCorrect) { buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_BINDING_TABLE_STATE, pKernelInfo->patchInfo.bindingTableState->Token); + EXPECT_EQ_CONST(bindingTableState.Count, pKernelInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries); + EXPECT_EQ_CONST(bindingTableState.Offset, pKernelInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset); } TEST_F(KernelDataTest, GivenDataParameterStreamWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchDataParameterStream dataParameterStream; dataParameterStream.Token = PATCH_TOKEN_DATA_PARAMETER_STREAM; dataParameterStream.Size = sizeof(SPatchDataParameterStream); - dataParameterStream.DataParameterStreamSize = 0x10; + dataParameterStream.DataParameterStreamSize = 64; pPatchList = &dataParameterStream; patchListSize = dataParameterStream.Size; buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_STREAM, pKernelInfo->patchInfo.dataParameterStream->Token); -} - -TEST_F(KernelDataTest, GivenThreadPayloadWhenBuildingThenProgramIsCorrect) { - iOpenCL::SPatchThreadPayload threadPayload; - threadPayload.Token = PATCH_TOKEN_THREAD_PAYLOAD; - threadPayload.Size = sizeof(SPatchThreadPayload); - threadPayload.GetGlobalOffsetPresent = true; - threadPayload.GetGroupIDPresent = true; - threadPayload.GetLocalIDPresent = true; - threadPayload.HeaderPresent = true; - threadPayload.IndirectPayloadStorage = true; - threadPayload.LocalIDFlattenedPresent = true; - threadPayload.LocalIDXPresent = true; - threadPayload.LocalIDYPresent = true; - threadPayload.LocalIDZPresent = true; - threadPayload.OffsetToSkipPerThreadDataLoad = true; - threadPayload.PassInlineData = true; - - pPatchList = &threadPayload; - patchListSize = threadPayload.Size; - - buildAndDecode(); - - EXPECT_EQ_CONST(PATCH_TOKEN_THREAD_PAYLOAD, pKernelInfo->patchInfo.threadPayload->Token); + EXPECT_EQ_CONST(dataParameterStream.DataParameterStreamSize, pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize); } TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingThenProgramIsCorrect) { @@ -444,27 +384,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentRequiresSubgroupIndependentForwa EXPECT_EQ_VAL(1u, program->getSubgroupKernelInfoArray(rootDeviceIndex).size()); } -TEST_F(KernelDataTest, GivenKernelAttributesInfoWhenBuildingThenProgramIsCorrect) { - iOpenCL::SPatchKernelAttributesInfo kernelAttributesInfo; - kernelAttributesInfo.Token = PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO; - kernelAttributesInfo.AttributesSize = 0x10; - kernelAttributesInfo.Size = sizeof(SPatchKernelAttributesInfo) + kernelAttributesInfo.AttributesSize; - const std::string attributesValue = "dummy_attribute"; - - std::vector patchToken(sizeof(iOpenCL::SPatchKernelAttributesInfo) + kernelAttributesInfo.AttributesSize); - memcpy_s(patchToken.data(), patchToken.size(), &kernelAttributesInfo, sizeof(iOpenCL::SPatchKernelAttributesInfo)); - memcpy_s(patchToken.data() + sizeof(iOpenCL::SPatchKernelAttributesInfo), kernelAttributesInfo.AttributesSize, - attributesValue.data(), attributesValue.size()); - - pPatchList = patchToken.data(); - patchListSize = static_cast(patchToken.size()); - - buildAndDecode(); - - EXPECT_EQ(attributesValue, pKernelInfo->kernelDescriptor.kernelMetadata.kernelLanguageAttributes); - EXPECT_EQ_CONST(PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO, pKernelInfo->patchInfo.pKernelAttributesInfo->Token); -} - TEST_F(KernelDataTest, WhenDecodingExecutionEnvironmentTokenThenWalkOrderIsForcedToXMajor) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; @@ -541,17 +460,23 @@ TEST_P(DataParameterTest, GivenTokenTypeWhenBuildingThenProgramIsCorrect) { buildAndDecode(); - if (pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size() > 0) { - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Token); - EXPECT_EQ_VAL(GetParam(), pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Type); - if (pKernelInfo->kernelArgInfo.size() == dataParameterToken.ArgumentNumber + 1) { - if (GetParam() == DATA_PARAMETER_BUFFER_STATEFUL) { - EXPECT_TRUE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess); - } else { - EXPECT_FALSE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess); - } - } // no else - some params are skipped + if (DATA_PARAMETER_KERNEL_ARGUMENT == GetParam()) { + ASSERT_NE(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); + EXPECT_EQ(dataParameterToken.ArgumentNumber, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].argNum); + EXPECT_EQ(dataParameterToken.Offset, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].byValueElement.offset); + EXPECT_EQ(dataParameterToken.SourceOffset, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].byValueElement.sourceOffset); + EXPECT_EQ(dataParameterToken.DataSize, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].byValueElement.size); + } else { + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); } + + if (pKernelInfo->kernelArgInfo.size() == dataParameterToken.ArgumentNumber + 1) { + if (GetParam() == DATA_PARAMETER_BUFFER_STATEFUL) { + EXPECT_TRUE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess); + } else { + EXPECT_FALSE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess); + } + } // no else - some params are skipped } // note that we start at '2' because we test kernel arg tokens elsewhere @@ -577,7 +502,7 @@ TEST_F(KernelDataParameterTest, GivenDataParameterBufferOffsetWhenBuildingThenPr buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ_VAL(pKernelInfo->kernelArgInfo[1].offsetBufferOffset, dataParameterToken.Offset); } @@ -594,7 +519,7 @@ TEST_F(KernelDataParameterTest, givenDataParameterBufferStatefulWhenDecodingThen buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_TRUE(pKernelInfo->kernelArgInfo[1].pureStatefulBufferAccess); } @@ -616,7 +541,7 @@ TEST_F(KernelDataParameterTest, givenUnknownDataParameterWhenDecodedThenParamete buildAndDecode(); - EXPECT_EQ_VAL(0u, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ_VAL(0u, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); } TEST_F(KernelDataTest, GivenDataParameterSumOfLocalMemoryObjectArgumentSizesWhenBuildingThenProgramIsCorrect) { @@ -640,7 +565,7 @@ TEST_F(KernelDataTest, GivenDataParameterSumOfLocalMemoryObjectArgumentSizesWhen buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(alignment, pKernelInfo->kernelArgInfo[argumentNumber].slmAlignment); ASSERT_EQ(1U, pKernelInfo->kernelArgInfo[argumentNumber].kernelArgPatchInfoVector.size()); @@ -668,7 +593,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageWidthWhenBuildingThenProgramIsCorr buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImgWidth, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgWidth); } @@ -694,7 +619,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageHeightWhenBuildingThenProgramIsCor buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImgHeight, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgHeight); @@ -721,7 +646,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageDepthWhenBuildingThenProgramIsCorr buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImgDepth, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgDepth); @@ -748,7 +673,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageNumSamplersWhenBuildingThenProgram buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetNumSamples, pKernelInfo->kernelArgInfo[argumentNumber].offsetNumSamples); @@ -775,7 +700,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageNumMipLevelsWhenBuildingThenProgra buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetNumMipLevels, pKernelInfo->kernelArgInfo[argumentNumber].offsetNumMipLevels); @@ -810,7 +735,7 @@ TEST_F(KernelDataTest, givenFlatImageDataParamTokenWhenDecodingThenSetAllOffsets buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); }; @@ -848,7 +773,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageDataTypeWhenBuildingThenProgramIsC buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetChannelDataType, pKernelInfo->kernelArgInfo[argumentNumber].offsetChannelDataType); @@ -875,7 +800,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageChannelOrderWhenBuildingThenProgra buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetChannelOrder, pKernelInfo->kernelArgInfo[argumentNumber].offsetChannelOrder); @@ -902,7 +827,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageArraySizeWhenBuildingThenProgramIs buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImageArraySize, pKernelInfo->kernelArgInfo[argumentNumber].offsetArraySize); @@ -929,7 +854,7 @@ TEST_F(KernelDataTest, GivenDataParameterWorkDimensionsWhenBuildingThenProgramIs buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetWorkDim, pKernelInfo->workloadInfo.workDimOffset); @@ -956,7 +881,7 @@ TEST_F(KernelDataTest, GivenDataParameterSimdSizeWhenBuildingThenProgramIsCorrec buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetSimdSize, pKernelInfo->workloadInfo.simdSizeOffset); @@ -983,7 +908,7 @@ TEST_F(KernelDataTest, GivenParameterPrivateMemoryStatelessSizeWhenBuildingThenP buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); } @@ -1008,7 +933,7 @@ TEST_F(KernelDataTest, GivenDataParameterLocalMemoryStatelessWindowSizeWhenBuild buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); } @@ -1033,7 +958,7 @@ TEST_F(KernelDataTest, GivenDataParameterLocalMemoryStatelessWindowStartAddressW buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); } @@ -1058,7 +983,7 @@ TEST_F(KernelDataTest, GivenDataParameterNumWorkGroupsWhenBuildingThenProgramIsC buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetNumWorkGroups[argumentNumber], pKernelInfo->workloadInfo.numWorkGroupsOffset[argumentNumber]); @@ -1085,7 +1010,7 @@ TEST_F(KernelDataTest, GivenDataParameterMaxWorkgroupSizeWhenBuildingThenProgram buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetMaxWorkGroupSize, pKernelInfo->workloadInfo.maxWorkGroupSizeOffset); @@ -1113,7 +1038,7 @@ TEST_F(KernelDataTest, GivenDataParameterSamplerAddressModeWhenBuildingThenProgr buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(1U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[0].offsetSamplerAddressingMode); @@ -1141,7 +1066,7 @@ TEST_F(KernelDataTest, GivenDataParameterSamplerCoordinateSnapWaIsRequiredThenKe buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[1].offsetSamplerSnapWa); @@ -1169,7 +1094,7 @@ TEST_F(KernelDataTest, GivenDataParameterSamplerNormalizedCoordsThenKernelInfoIs buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[1].offsetSamplerNormalizedCoords); @@ -1207,9 +1132,7 @@ TEST_F(KernelDataTest, GivenDataParameterKernelArgumentWhenBuildingThenProgramIs buildAndDecode(); - EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Token); - EXPECT_EQ_VAL(DATA_PARAMETER_KERNEL_ARGUMENT, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Type); - + ASSERT_EQ(2U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(1u, pKernelInfo->kernelArgInfo.size()); ASSERT_EQ(2u, pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.size()); @@ -1262,7 +1185,7 @@ TEST_F(KernelDataTest, GivenPatchTokenSamplerStateArrayWhenBuildingThenProgramIs token.Size = static_cast(sizeof(SPatchSamplerStateArray)); token.Offset = 33; - token.Count = 0x1FF0; + token.Count = 0xF0; token.BorderColorOffset = 0x3FF0; pPatchList = &token; @@ -1270,11 +1193,9 @@ TEST_F(KernelDataTest, GivenPatchTokenSamplerStateArrayWhenBuildingThenProgramIs buildAndDecode(); - ASSERT_NE(nullptr, pKernelInfo->patchInfo.samplerStateArray); - - EXPECT_EQ_VAL(token.Offset, pKernelInfo->patchInfo.samplerStateArray->Offset); - EXPECT_EQ_VAL(token.Count, pKernelInfo->patchInfo.samplerStateArray->Count); - EXPECT_EQ_VAL(token.BorderColorOffset, pKernelInfo->patchInfo.samplerStateArray->BorderColorOffset); + EXPECT_EQ_VAL(token.Offset, pKernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset); + EXPECT_EQ_VAL(token.Count, pKernelInfo->kernelDescriptor.payloadMappings.samplerTable.numSamplers); + EXPECT_EQ_VAL(token.BorderColorOffset, pKernelInfo->kernelDescriptor.payloadMappings.samplerTable.borderColor); } TEST_F(KernelDataTest, GivenPatchTokenAllocateStatelessPrivateMemoryWhenBuildingThenProgramIsCorrect) { @@ -1292,12 +1213,10 @@ TEST_F(KernelDataTest, GivenPatchTokenAllocateStatelessPrivateMemoryWhenBuilding buildAndDecode(); - ASSERT_NE(nullptr, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface); - - EXPECT_EQ_VAL(token.SurfaceStateHeapOffset, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->SurfaceStateHeapOffset); - EXPECT_EQ_VAL(token.DataParamOffset, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamOffset); - EXPECT_EQ_VAL(token.DataParamSize, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamSize); - EXPECT_EQ_VAL(token.PerThreadPrivateMemorySize, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize); + EXPECT_EQ_VAL(token.SurfaceStateHeapOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful); + EXPECT_EQ_VAL(token.DataParamOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.stateless); + EXPECT_EQ_VAL(token.DataParamSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.pointerSize); + EXPECT_EQ_VAL(PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(token, pKernelInfo->kernelDescriptor.kernelAttributes.simdSize), pKernelInfo->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize); } TEST_F(KernelDataTest, GivenDataParameterVmeMbBlockTypeWhenBuildingThenProgramIsCorrect) { @@ -1321,7 +1240,7 @@ TEST_F(KernelDataTest, GivenDataParameterVmeMbBlockTypeWhenBuildingThenProgramIs buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeMbBlockType, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeMbBlockType); @@ -1348,7 +1267,7 @@ TEST_F(KernelDataTest, GivenDataParameterDataVmeSubpixelModeWhenBuildingThenProg buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeSubpixelMode, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSubpixelMode); @@ -1375,7 +1294,7 @@ TEST_F(KernelDataTest, GivenDataParameterVmeSadAdjustModeWhenBuildingThenProgram buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeSadAdjustMode, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSadAdjustMode); @@ -1402,7 +1321,7 @@ TEST_F(KernelDataTest, GivenDataParameterVmeSearchPathTypeWhenBuildingThenProgra buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeSearchPathType, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSearchPathType); @@ -1420,30 +1339,11 @@ TEST_F(KernelDataTest, GivenPatchTokenStateSipWhenBuildingThenProgramIsCorrect) buildAndDecode(); - EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); + EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ_VAL(token.SystemKernelOffset, pKernelInfo->systemKernelOffset); } -TEST_F(KernelDataTest, GivenPatchTokenAllocateSipSurfaceWhenBuildingThenProgramIsCorrect) { - SPatchAllocateSystemThreadSurface token; - token.Token = PATCH_TOKEN_ALLOCATE_SIP_SURFACE; - token.Size = static_cast(sizeof(SPatchAllocateSystemThreadSurface)); - token.Offset = 32; - token.BTI = 0; - token.PerThreadSystemThreadSurfaceSize = 0x10000; - - pPatchList = &token; - patchListSize = token.Size; - - buildAndDecode(); - - EXPECT_EQ(0u, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->BTI); - EXPECT_EQ(token.Offset, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->Offset); - EXPECT_EQ(token.Token, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->Token); - EXPECT_EQ(token.PerThreadSystemThreadSurfaceSize, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize); -} - TEST_F(KernelDataTest, givenSymbolTablePatchTokenThenLinkerInputIsCreated) { SPatchFunctionTableInfo token; token.Token = PATCH_TOKEN_PROGRAM_SYMBOL_TABLE; diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 432292c5c4..47bcab332c 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -1341,7 +1341,6 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex); - EXPECT_NE(nullptr, pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization); ASSERT_NE(nullptr, pProgram->getConstantSurface(pClDevice->getRootDeviceIndex())); uint32_t expected_values[] = {0xabcd5432u, 0xaabb5533u}; @@ -1377,7 +1376,7 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident auto crossThreadData = pKernel->getCrossThreadData(rootDeviceIndex); uint32_t *constBuffGpuAddr = reinterpret_cast(pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddressToPatch()); - uintptr_t *pDst = reinterpret_cast(crossThreadData + pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset); + uintptr_t *pDst = reinterpret_cast(crossThreadData + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless); EXPECT_EQ(*pDst, reinterpret_cast(constBuffGpuAddr)); @@ -1406,7 +1405,6 @@ TEST_F(PatchTokenTests, WhenBuildingProgramThenGwsIsSet) { auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex); - ASSERT_NE(nullptr, pKernelInfo->patchInfo.dataParameterStream); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[0]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[1]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[2]); @@ -1425,14 +1423,12 @@ TEST_F(PatchTokenTests, WhenBuildingProgramThenLwsIsSet) { auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex); - ASSERT_NE(nullptr, pKernelInfo->patchInfo.dataParameterStream); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[0]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[1]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[2]); pKernelInfo = pProgram->getKernelInfo("test_get_local_size", rootDeviceIndex); - ASSERT_NE(nullptr, pKernelInfo->patchInfo.dataParameterStream); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[0]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[1]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[2]); @@ -2072,14 +2068,14 @@ TEST_F(ProgramTests, GivenZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfaces KernelInfo *infoBlock = new KernelInfo; - SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface; - privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock; - privateSurfaceBlock->DataParamSize = 8; - privateSurfaceBlock->Size = 8; - privateSurfaceBlock->SurfaceStateHeapOffset = 0; - privateSurfaceBlock->Token = 0; - privateSurfaceBlock->PerThreadPrivateMemorySize = 0; - infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; + SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {}; + privateSurfaceBlock.DataParamOffset = crossThreadOffsetBlock; + privateSurfaceBlock.DataParamSize = 8; + privateSurfaceBlock.Size = 8; + privateSurfaceBlock.SurfaceStateHeapOffset = 0; + privateSurfaceBlock.Token = 0; + privateSurfaceBlock.PerThreadPrivateMemorySize = 0; + populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock); program->blockKernelManager->addBlockKernelInfo(infoBlock); @@ -2087,7 +2083,6 @@ TEST_F(ProgramTests, GivenZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfaces EXPECT_EQ(nullptr, program->getBlockKernelManager()->getPrivateSurface(0)); - delete privateSurfaceBlock; delete program; } @@ -2098,14 +2093,14 @@ TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfa KernelInfo *infoBlock = new KernelInfo; - SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface; - privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock; - privateSurfaceBlock->DataParamSize = 8; - privateSurfaceBlock->Size = 8; - privateSurfaceBlock->SurfaceStateHeapOffset = 0; - privateSurfaceBlock->Token = 0; - privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; - infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; + SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {}; + privateSurfaceBlock.DataParamOffset = crossThreadOffsetBlock; + privateSurfaceBlock.DataParamSize = 8; + privateSurfaceBlock.Size = 8; + privateSurfaceBlock.SurfaceStateHeapOffset = 0; + privateSurfaceBlock.Token = 0; + privateSurfaceBlock.PerThreadPrivateMemorySize = 1000; + populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock); program->blockKernelManager->addBlockKernelInfo(infoBlock); @@ -2113,7 +2108,6 @@ TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfa EXPECT_NE(nullptr, program->getBlockKernelManager()->getPrivateSurface(0)); - delete privateSurfaceBlock; delete program; } @@ -2124,14 +2118,14 @@ TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfa KernelInfo *infoBlock = new KernelInfo; - SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface; - privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock; - privateSurfaceBlock->DataParamSize = 8; - privateSurfaceBlock->Size = 8; - privateSurfaceBlock->SurfaceStateHeapOffset = 0; - privateSurfaceBlock->Token = 0; - privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; - infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; + SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {}; + privateSurfaceBlock.DataParamOffset = crossThreadOffsetBlock; + privateSurfaceBlock.DataParamSize = 8; + privateSurfaceBlock.Size = 8; + privateSurfaceBlock.SurfaceStateHeapOffset = 0; + privateSurfaceBlock.Token = 0; + privateSurfaceBlock.PerThreadPrivateMemorySize = 1000; + populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock); program->blockKernelManager->addBlockKernelInfo(infoBlock); @@ -2147,7 +2141,6 @@ TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfa EXPECT_EQ(privateSurface, privateSurface2); - delete privateSurfaceBlock; delete program; } @@ -2158,14 +2151,14 @@ TEST_F(ProgramTests, givenProgramWithBlockKernelsWhenfreeBlockResourcesisCalledT KernelInfo *infoBlock = new KernelInfo; - SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface; - privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock; - privateSurfaceBlock->DataParamSize = 8; - privateSurfaceBlock->Size = 8; - privateSurfaceBlock->SurfaceStateHeapOffset = 0; - privateSurfaceBlock->Token = 0; - privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; - infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; + SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {}; + privateSurfaceBlock.DataParamOffset = crossThreadOffsetBlock; + privateSurfaceBlock.DataParamSize = 8; + privateSurfaceBlock.Size = 8; + privateSurfaceBlock.SurfaceStateHeapOffset = 0; + privateSurfaceBlock.Token = 0; + privateSurfaceBlock.PerThreadPrivateMemorySize = 1000; + populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock); program->blockKernelManager->addBlockKernelInfo(infoBlock); @@ -2176,7 +2169,6 @@ TEST_F(ProgramTests, givenProgramWithBlockKernelsWhenfreeBlockResourcesisCalledT program->freeBlockResources(); - delete privateSurfaceBlock; delete program; } @@ -2964,17 +2956,16 @@ using ProgramMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(ProgramMultiRootDeviceTests, WhenPrivateSurfaceIsCreatedThenItHasCorrectRootDeviceIndex) { auto program = std::make_unique(context.get(), false, toClDeviceVector(*device1)); - - auto privateSurfaceBlock = std::make_unique(); - privateSurfaceBlock->DataParamOffset = 0; - privateSurfaceBlock->DataParamSize = 8; - privateSurfaceBlock->Size = 8; - privateSurfaceBlock->SurfaceStateHeapOffset = 0; - privateSurfaceBlock->Token = 0; - privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; - auto infoBlock = std::make_unique(); - infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock.get(); + + SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {}; + privateSurfaceBlock.DataParamOffset = 0; + privateSurfaceBlock.DataParamSize = 8; + privateSurfaceBlock.Size = 8; + privateSurfaceBlock.SurfaceStateHeapOffset = 0; + privateSurfaceBlock.Token = 0; + privateSurfaceBlock.PerThreadPrivateMemorySize = 1000; + populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock); program->blockKernelManager->addBlockKernelInfo(infoBlock.release()); program->allocateBlockPrivateSurfaces(*device1); diff --git a/opencl/test/unit_test/program/program_with_kernel_debug_tests.cpp b/opencl/test/unit_test/program/program_with_kernel_debug_tests.cpp index f6a09b2841..fecb016695 100644 --- a/opencl/test/unit_test/program/program_with_kernel_debug_tests.cpp +++ b/opencl/test/unit_test/program/program_with_kernel_debug_tests.cpp @@ -257,7 +257,7 @@ TEST_F(ProgramWithKernelDebuggingTest, givenProgramWithKernelDebugEnabledWhenBui EXPECT_EQ(CL_SUCCESS, retVal); auto kernelInfo = pProgram->getKernelInfo("CopyBuffer", pDevice->getRootDeviceIndex()); - EXPECT_NE(0u, kernelInfo->patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize); + EXPECT_NE(0u, kernelInfo->kernelDescriptor.kernelAttributes.perThreadSystemThreadSurfaceSize); } TEST_F(ProgramWithKernelDebuggingTest, givenKernelDebugEnabledWhenProgramIsBuiltThenDebugDataIsStored) { diff --git a/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp b/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp index 7562739df2..89f28639a8 100644 --- a/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp +++ b/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp @@ -23,6 +23,9 @@ #include using namespace NEO; +namespace NEO { +void populateKernelDescriptor(KernelDescriptor &dst, const SPatchDataParameterStream &token); +} class MockSchedulerKernel : public SchedulerKernel { public: @@ -31,14 +34,14 @@ class MockSchedulerKernel : public SchedulerKernel { static MockSchedulerKernel *create(Program &program, KernelInfo *&info) { info = new KernelInfo; - SPatchDataParameterStream dataParametrStream; - dataParametrStream.DataParameterStreamSize = 8; - dataParametrStream.Size = 8; + SPatchDataParameterStream dataParameterStream; + dataParameterStream.DataParameterStreamSize = 8; + dataParameterStream.Size = 8; + populateKernelDescriptor(info->kernelDescriptor, dataParameterStream); info->kernelDescriptor.kernelAttributes.simdSize = 32; info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = false; - info->patchInfo.dataParameterStream = &dataParametrStream; KernelArgInfo bufferArg; bufferArg.isBuffer = true; @@ -114,8 +117,8 @@ TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenCurbeSizeIsCorrect) { SPatchDataParameterStream dataParameterStream; dataParameterStream.DataParameterStreamSize = crossTrheadDataSize; + populateKernelDescriptor(info.kernelDescriptor, dataParameterStream); - info.patchInfo.dataParameterStream = &dataParameterStream; info.heapInfo.DynamicStateHeapSize = dshSize; KernelInfoContainer kernelInfos; @@ -279,8 +282,6 @@ TEST(SchedulerKernelTest, GivenNullKernelInfoWhenGettingCurbeSizeThenSizeIsCorre MockProgram program(toClDeviceVector(*device)); KernelInfo info; - info.patchInfo.dataParameterStream = nullptr; - KernelInfoContainer kernelInfos; kernelInfos.push_back(&info); MockSchedulerKernel kernel(&program, kernelInfos); diff --git a/shared/source/device_binary_format/patchtokens_decoder.h b/shared/source/device_binary_format/patchtokens_decoder.h index 373ead75fd..d147da2db2 100644 --- a/shared/source/device_binary_format/patchtokens_decoder.h +++ b/shared/source/device_binary_format/patchtokens_decoder.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -209,12 +209,9 @@ inline const uint8_t *getInlineData(const SPatchString *ptr) { return ptrOffset(reinterpret_cast(ptr), sizeof(SPatchString)); } -inline uint64_t getPerHwThreadPrivateSurfaceSize(const SPatchAllocateStatelessPrivateSurface *ptr, uint32_t simdSize) { - if (nullptr == ptr) { - return 0; - } - uint32_t multiplier = ptr->IsSimtThread ? simdSize : 1U; - return static_cast(ptr->PerThreadPrivateMemorySize) * multiplier; +inline uint64_t getPerHwThreadPrivateSurfaceSize(const SPatchAllocateStatelessPrivateSurface &ptr, uint32_t simdSize) { + uint32_t multiplier = ptr.IsSimtThread ? simdSize : 1U; + return static_cast(ptr.PerThreadPrivateMemorySize) * multiplier; } const KernelArgAttributesFromPatchtokens getInlineData(const SPatchKernelArgumentInfo *ptr); diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index e985c620a7..3d504ac5dc 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -70,6 +70,7 @@ struct KernelDescriptor final { uint8_t gpuPointerSize = 0; uint8_t simdSize = 8; uint8_t numLocalIdChannels = 3; + uint8_t localId[3] = {0U, 0U, 0U}; bool supportsBuffersBiggerThan4Gb() const { return Stateless == bufferAddressingMode; diff --git a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp index ce331c4580..c4a9567944 100644 --- a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp +++ b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp @@ -93,6 +93,9 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchInterfaceDescri void populateKernelDescriptor(KernelDescriptor &dst, const SPatchThreadPayload &token) { dst.kernelAttributes.flags.perThreadDataHeaderIsPresent = (0U != token.HeaderPresent); dst.kernelAttributes.numLocalIdChannels = token.LocalIDXPresent + token.LocalIDYPresent + token.LocalIDZPresent; + dst.kernelAttributes.localId[0] = token.LocalIDXPresent; + dst.kernelAttributes.localId[1] = token.LocalIDYPresent; + dst.kernelAttributes.localId[2] = token.LocalIDZPresent; dst.kernelAttributes.flags.usesFlattenedLocalIds = (0U != token.LocalIDFlattenedPresent); dst.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent = (0U != token.UnusedPerThreadConstantPresent); dst.kernelAttributes.flags.passInlineData = (0 != token.PassInlineData); @@ -154,8 +157,7 @@ void populatePointerKernelArg(ArgDescPointer &dst, const TokenT &src, KernelDesc void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessPrivateSurface &token) { dst.kernelAttributes.flags.usesPrivateMemory = true; - dst.kernelAttributes.perHwThreadPrivateMemorySize = token.PerThreadPrivateMemorySize; - dst.kernelAttributes.perHwThreadPrivateMemorySize = static_cast(PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(&token, dst.kernelAttributes.simdSize)); + dst.kernelAttributes.perHwThreadPrivateMemorySize = static_cast(PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(token, dst.kernelAttributes.simdSize)); populatePointerKernelArg(dst.payloadMappings.implicitArgs.privateMemoryAddress, token, dst.kernelAttributes.bufferAddressingMode); } @@ -197,7 +199,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchString &token) } template -inline void populateKernelDescriptorIfNotNull(KernelDescriptor &dst, const TokenT *token, ArgsT &&... args) { +inline void populateKernelDescriptorIfNotNull(KernelDescriptor &dst, const TokenT *token, ArgsT &&...args) { if (token != nullptr) { populateKernelDescriptor(dst, *token, std::forward(args)...); } diff --git a/shared/source/program/program_info.cpp b/shared/source/program/program_info.cpp index 6c88855811..9fd64de8b5 100644 --- a/shared/source/program/program_info.cpp +++ b/shared/source/program/program_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Intel Corporation + * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -21,10 +21,7 @@ ProgramInfo::~ProgramInfo() { size_t getMaxInlineSlmNeeded(const ProgramInfo &programInfo) { uint32_t ret = 0U; for (const auto &kernelInfo : programInfo.kernelInfos) { - if (nullptr == kernelInfo->patchInfo.localsurface) { - continue; - } - ret = std::max(ret, kernelInfo->patchInfo.localsurface->TotalInlineLocalMemorySize); + ret = std::max(ret, kernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize); } return ret; } diff --git a/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp b/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp index 1ec0f8d1b9..743fffb2be 100644 --- a/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp +++ b/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Intel Corporation + * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -271,7 +271,7 @@ TEST(KernelDescriptorFromPatchtokens, GivenImplicitArgsThenSetsProperPartsOfDesc kernelTokens.tokens.allocateStatelessPrivateSurface = &privateSurface; NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesPrivateMemory); - EXPECT_EQ(NEO::PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(&privateSurface, kernelDescriptor.kernelAttributes.simdSize), kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize); + EXPECT_EQ(NEO::PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(privateSurface, kernelDescriptor.kernelAttributes.simdSize), kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize); EXPECT_EQ(privateSurface.DataParamOffset, kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.stateless); EXPECT_EQ(privateSurface.DataParamSize, kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.pointerSize); EXPECT_EQ(privateSurface.SurfaceStateHeapOffset, kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful); diff --git a/shared/test/unit_test/program/program_info_tests.cpp b/shared/test/unit_test/program/program_info_tests.cpp index c840fc6fd9..79bbfcff80 100644 --- a/shared/test/unit_test/program/program_info_tests.cpp +++ b/shared/test/unit_test/program/program_info_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Intel Corporation + * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,6 +8,7 @@ #include "shared/source/program/program_info.h" #include "opencl/source/program/kernel_info.h" +#include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -40,9 +41,9 @@ TEST(GetMaxInlineSlmNeeded, GivenProgramWithKernelsThenReturnMaxOfInlineSlmNeede slmTokens[2].TotalInlineLocalMemorySize = 32; NEO::ProgramInfo programInfo; programInfo.kernelInfos = {new NEO::KernelInfo(), new NEO::KernelInfo(), new NEO::KernelInfo()}; - programInfo.kernelInfos[0]->patchInfo.localsurface = &slmTokens[0]; - programInfo.kernelInfos[1]->patchInfo.localsurface = &slmTokens[1]; - programInfo.kernelInfos[2]->patchInfo.localsurface = &slmTokens[2]; + populateKernelDescriptor(programInfo.kernelInfos[0]->kernelDescriptor, slmTokens[0]); + populateKernelDescriptor(programInfo.kernelInfos[1]->kernelDescriptor, slmTokens[1]); + populateKernelDescriptor(programInfo.kernelInfos[2]->kernelDescriptor, slmTokens[2]); EXPECT_EQ(64U, NEO::getMaxInlineSlmNeeded(programInfo)); }