From aa950a4a96cf2423eb2be58e6c79888b71e77a0b Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Fri, 1 Dec 2023 14:30:28 +0000 Subject: [PATCH] feature: Add heapless mode programming in l0 1/n Related-To: NEO-7621 Signed-off-by: Kamil Kopryk --- level_zero/core/source/cmdlist/cmdlist.h | 1 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 9 +- .../cmdlist/cmdlist_hw_skl_to_tgllp.inl | 5 +- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 5 +- .../test_cmdlist_append_launch_kernel_1.cpp | 3 +- .../test_cmdlist_append_launch_kernel_3.cpp | 4 +- ...ardware_commands_helper_xehp_and_later.inl | 7 +- .../command_container/command_encoder.h | 36 +++-- .../command_container/command_encoder.inl | 6 + .../command_encoder_bdw_and_later.inl | 9 +- .../command_encoder_xehp_and_later.inl | 102 +++++++++----- shared/source/gen11/command_encoder_gen11.cpp | 3 + .../gen12lp/command_encoder_gen12lp.cpp | 3 + shared/source/gen8/command_encoder_gen8.cpp | 3 + shared/source/gen9/command_encoder_gen9.cpp | 3 + .../gen11/hw_cmds_generated_gen11.inl | 3 + .../gen12lp/hw_cmds_generated_gen12lp.inl | 4 +- .../generated/gen8/hw_cmds_generated_gen8.inl | 3 + .../generated/gen9/hw_cmds_generated_gen9.inl | 3 + .../hw_cmds_generated_xe_hpc_core.inl | 3 + .../hw_cmds_generated_xe_hpg_core.inl | 3 + .../command_encoder_xe_hpc_core.cpp | 3 + .../command_encoder_xe_hpg_core.cpp | 10 +- .../command_encoder_tests.cpp | 6 +- .../encoders/test_encode_dispatch_kernel.cpp | 132 +++++++++++------- ...t_encode_dispatch_kernel_dg2_and_later.cpp | 6 +- ...t_encode_dispatch_kernel_pvc_and_later.cpp | 2 +- ..._encode_dispatch_kernel_xehp_and_later.cpp | 71 +++++----- .../encoders/test_encode_pvc_and_later.cpp | 4 +- .../pvc/test_encode_dispatch_kernel_pvc.cpp | 5 +- .../xe_hpc_core/test_encode_xe_hpc_core.cpp | 13 +- 31 files changed, 295 insertions(+), 175 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 34a96037c8..3ca91c55aa 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -445,6 +445,7 @@ struct CommandList : _ze_command_list_handle_t { bool dispatchCmdListBatchBufferAsPrimary = false; bool copyThroughLockedPtrEnabled = false; bool useOnlyGlobalTimestamps = false; + bool heaplessModeEnabled = false; }; using CommandListAllocatorFn = CommandList *(*)(uint32_t); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 73e21a8143..0d9cb78856 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -16,6 +16,7 @@ #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/blit_properties.h" +#include "shared/source/helpers/compiler_product_helper_base.inl" #include "shared/source/helpers/definitions/command_encoder_args.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/hw_info.h" @@ -210,6 +211,7 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment(); auto &productHelper = rootDeviceEnvironment.getHelper(); auto &gfxCoreHelper = neoDevice->getGfxCoreHelper(); + auto &compilerProductHelper = neoDevice->getCompilerProductHelper(); auto gmmHelper = rootDeviceEnvironment.getGmmHelper(); this->dcFlushSupport = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, rootDeviceEnvironment); @@ -230,7 +232,7 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, this->cmdListType == CommandListType::TYPE_REGULAR); this->useOnlyGlobalTimestamps = gfxCoreHelper.useOnlyGlobalTimestamps(); this->maxFillPaternSizeForCopyEngine = gfxCoreHelper.getMaxFillPaternSizeForCopyEngine(); - + this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled(); this->requiredStreamState.initSupport(rootDeviceEnvironment); this->finalStreamState.initSupport(rootDeviceEnvironment); @@ -3179,7 +3181,10 @@ void CommandListCoreFamily::programStateBaseAddress(NEO::CommandC false, // useGlobalAtomics this->partitionCount > 1, // multiOsContextCapable isRcs, // isRcs - this->doubleSbaWa}; // doubleSbaWa + this->doubleSbaWa, // doubleSbaWa + this->heaplessModeEnabled // heaplessModeEnabled + }; + NEO::EncodeStateBaseAddress::encode(encodeStateBaseAddressArgs); bool sbaTrackingEnabled = NEO::Debugger::isDebugEnabled(this->internalUsage) && this->device->getL0Debugger(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index e53db1480b..b4fc0acd27 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -212,10 +212,11 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K false, // isKernelUsingSystemAllocation cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList engineGroupType == NEO::EngineGroupType::renderCompute, // isRcs - this->dcFlushSupport // dcFlushEnable + this->dcFlushSupport, // dcFlushEnable + this->heaplessModeEnabled // isHeaplessModeEnabled }; - NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs); + NEO::EncodeDispatchKernel::encodeCommon(commandContainer, dispatchKernelArgs); if (!this->isFlushTaskSubmissionEnabled) { this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index d228cf7b1a..f3d5924989 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -298,7 +298,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K isKernelUsingSystemAllocation, // isKernelUsingSystemAllocation cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList engineGroupType == NEO::EngineGroupType::renderCompute, // isRcs - this->dcFlushSupport // dcFlushEnable + this->dcFlushSupport, // dcFlushEnable + this->heaplessModeEnabled // isHeaplessModeEnabled }; bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation); @@ -313,7 +314,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } } - NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs); + NEO::EncodeDispatchKernel::encodeCommon(commandContainer, dispatchKernelArgs); if (!this->isFlushTaskSubmissionEnabled) { this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index e8999926da..71fa2c869d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -161,6 +161,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAppendingKernelThenBbEndIsAddedAndNewCmdBufferAllocated, IsAtLeastSkl) { using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restorer; debugManager.flags.DispatchCmdlistCmdBufferPrimary.set(0); @@ -207,7 +208,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA false, // isRcs commandList->getDcFlushRequired(true) // dcFlushEnable }; - NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs); + NEO::EncodeDispatchKernel::template encode(commandContainer, dispatchKernelArgs); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, 0u); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 57b12cb2bd..c20e502041 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -616,6 +616,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA DebugManagerStateRestore restorer; NEO::debugManager.flags.EnableFlushTaskSubmission.set(0); using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + createKernel(); ze_result_t returnValue; @@ -657,7 +659,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA false, // isRcs commandList->getDcFlushRequired(true) // dcFlushEnable }; - EXPECT_THROW(NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs), std::exception); + EXPECT_THROW(NEO::EncodeDispatchKernel::template encode(commandContainer, dispatchKernelArgs), std::exception); } HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenReturnErrorInvalidArgument) { diff --git a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl index 5baa869833..fe1f2d2de2 100644 --- a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl @@ -105,12 +105,7 @@ size_t HardwareCommandsHelper::sendCrossThreadData( uint32_t sizeToCopy = sizeCrossThreadData; if (inlineDataProgrammingRequired == true) { - using InlineData = typename GfxFamily::INLINE_DATA; - uint32_t inlineDataSize = sizeof(InlineData); - - if constexpr (heaplessModeEnabled) { - inlineDataSize = 64; - } + constexpr uint32_t inlineDataSize = WalkerType::getInlineDataSize(); sizeToCopy = std::min(inlineDataSize, sizeCrossThreadData); dest = reinterpret_cast(walkerCmd->getInlineDataPointer()); diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 1f840ed657..483357f995 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -65,6 +65,7 @@ struct EncodeDispatchKernelArgs { bool isKernelDispatchedFromImmediateCmdList = false; bool isRcs = false; bool dcFlushEnable = false; + bool isHeaplessModeEnabled = false; }; enum class MiPredicateType : uint32_t { @@ -94,6 +95,9 @@ struct EncodeDispatchKernel { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; + static void encodeCommon(CommandContainer &container, EncodeDispatchKernelArgs &args); + + template static void encode(CommandContainer &container, EncodeDispatchKernelArgs &args); template @@ -103,6 +107,21 @@ struct EncodeDispatchKernel { static void appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); + template + static void encodeThreadData(WalkerType &walkerCmd, + const uint32_t *startWorkGroup, + const uint32_t *numWorkGroups, + const uint32_t *workGroupSizes, + uint32_t simd, + uint32_t localIdDimensions, + uint32_t threadsPerThreadGroup, + uint32_t threadExecutionMask, + bool localIdsGenerationByRuntime, + bool inlineDataProgrammingRequired, + bool isIndirect, + uint32_t requiredWorkGroupOrder, + const RootDeviceEnvironment &rootDeviceEnvironment); + template static void setGrfInfo(InterfaceDescriptorType *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); @@ -118,20 +137,6 @@ struct EncodeDispatchKernel { static bool inlineDataProgrammingRequired(const KernelDescriptor &kernelDesc); - static void encodeThreadData(WALKER_TYPE &walkerCmd, - const uint32_t *startWorkGroup, - const uint32_t *numWorkGroups, - const uint32_t *workGroupSizes, - uint32_t simd, - uint32_t localIdDimensions, - uint32_t threadsPerThreadGroup, - uint32_t threadExecutionMask, - bool localIdsGenerationByRuntime, - bool inlineDataProgrammingRequired, - bool isIndirect, - uint32_t requiredWorkGroupOrder, - const RootDeviceEnvironment &rootDeviceEnvironment); - template static void programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); @@ -146,7 +151,8 @@ struct EncodeDispatchKernel { template static void setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); - static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); + template + static void adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount); static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo); diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index e2704dd628..3fa48b19f4 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -537,6 +537,12 @@ template void EncodeSurfaceState::appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState) { } +template +inline void EncodeDispatchKernel::encodeCommon(CommandContainer &container, EncodeDispatchKernelArgs &args) { + using WALKER_TYPE = typename Family::WALKER_TYPE; + EncodeDispatchKernel::template encode(container, args); +} + template void *EncodeDispatchKernel::getInterfaceDescriptor(CommandContainer &container, IndirectHeap *childDsh, uint32_t &iddOffset) { diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 2fb19e74e9..abe14abfa2 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -48,6 +48,7 @@ void EncodeDispatchKernel::setGrfInfo(InterfaceDescriptorType *pInterfac } template +template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) { using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH; @@ -347,7 +348,8 @@ inline bool EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired(ui } template -void EncodeDispatchKernel::encodeThreadData(WALKER_TYPE &walkerCmd, +template +void EncodeDispatchKernel::encodeThreadData(WalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, @@ -375,7 +377,7 @@ void EncodeDispatchKernel::encodeThreadData(WALKER_TYPE &walkerCmd, walkerCmd.setThreadGroupIdStartingResumeZ(static_cast(startWorkGroup[2])); } - walkerCmd.setSimdSize(getSimdConfig(simd)); + walkerCmd.setSimdSize(getSimdConfig(simd)); auto localWorkSize = static_cast(workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2]); if (threadsPerThreadGroup == 0) { @@ -603,7 +605,8 @@ template void EncodeDispatchKernel::setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) {} template -void EncodeDispatchKernel::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {} +template +void EncodeDispatchKernel::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {} template size_t EncodeDispatchKernel::additionalSizeRequiredDsh(uint32_t iddCount) { diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 2269d60b0c..568a2a27bf 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -44,11 +44,12 @@ void EncodeDispatchKernel::setGrfInfo(InterfaceDescriptorType *pInterfac } template +template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) { - using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE; + using SHARED_LOCAL_MEMORY_SIZE = typename WalkerType::InterfaceDescriptorType::SHARED_LOCAL_MEMORY_SIZE; using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS; - using INLINE_DATA = typename Family::INLINE_DATA; + constexpr bool heaplessModeEnabled = Family::template isHeaplessMode(); const HardwareInfo &hwInfo = args.device->getHardwareInfo(); auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment(); @@ -73,7 +74,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis EncodeComputeMode::adjustPipelineSelect(container, kernelDescriptor); } - WALKER_TYPE walkerCmd = Family::cmdInitGpgpuWalker; + WalkerType walkerCmd = Family::template getInitGpuWalker(); auto &idd = walkerCmd.getInterfaceDescriptor(); EncodeDispatchKernel::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData, @@ -85,11 +86,20 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis { auto alloc = args.dispatchInterface->getIsaAllocation(); UNRECOVERABLE_IF(nullptr == alloc); - auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation(); - if (!localIdsGenerationByRuntime) { - offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad; + + if constexpr (heaplessModeEnabled) { + auto address = alloc->getGpuAddress() + args.dispatchInterface->getIsaOffsetInParentAllocation(); + if (!localIdsGenerationByRuntime) { + address += kernelDescriptor.entryPoints.skipPerThreadDataLoad; + } + idd.setKernelStartPointer(address); + } else { + auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation(); + if (!localIdsGenerationByRuntime) { + offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad; + } + idd.setKernelStartPointer(offset); } - idd.setKernelStartPointer(offset); } if (args.dispatchInterface->getKernelDescriptor().kernelAttributes.flags.usesAssert && args.device->getL0Debugger() != nullptr) { idd.setSoftwareExceptionEnable(1); @@ -112,32 +122,17 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis idd.setSharedLocalMemorySize(slmSize); auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries; - bool skipSshProgramming = false; + bool sshProgrammingRequired = true; auto &productHelper = args.device->getProductHelper(); if (productHelper.isSkippingStatefulInformationRequired(kernelDescriptor)) { bindingTableStateCount = 0u; - skipSshProgramming = true; + sshProgrammingRequired = false; } - uint32_t bindingTablePointer = 0u; - bool isBindlessKernel = NEO::KernelDescriptor::isBindlessAddressingKernel(kernelDescriptor); - - if (!skipSshProgramming) { - if (!isBindlessKernel) { - container.prepareBindfulSsh(); - if (bindingTableStateCount > 0u) { - auto ssh = args.surfaceStateHeap; - if (ssh == nullptr) { - ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - } - bindingTablePointer = static_cast(EncodeSurfaceState::pushBindingTableAndSurfaceStates( - *ssh, - args.dispatchInterface->getSurfaceStateHeapData(), - args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount, - kernelDescriptor.payloadMappings.bindingTable.tableOffset)); - } - } else { + if (sshProgrammingRequired) { + bool isBindlessKernel = NEO::KernelDescriptor::isBindlessAddressingKernel(kernelDescriptor); + if (isBindlessKernel) { bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr; auto sshHeapSize = args.dispatchInterface->getSurfaceStateHeapDataSize(); @@ -156,15 +151,32 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis memcpy_s(dstSurfaceState, sshHeapSize, args.dispatchInterface->getSurfaceStateHeapData(), sshHeapSize); args.dispatchInterface->patchBindlessOffsetsInCrossThreadData(bindlessSshBaseOffset); } + + } else { + if constexpr (heaplessModeEnabled == false) { + container.prepareBindfulSsh(); + if (bindingTableStateCount > 0u) { + auto ssh = args.surfaceStateHeap; + if (ssh == nullptr) { + ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); + } + auto bindingTablePointer = static_cast(EncodeSurfaceState::pushBindingTableAndSurfaceStates( + *ssh, + args.dispatchInterface->getSurfaceStateHeapData(), + args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount, + kernelDescriptor.payloadMappings.bindingTable.tableOffset)); + + idd.setBindingTablePointer(bindingTablePointer); + } + } } } - idd.setBindingTablePointer(bindingTablePointer); PreemptionHelper::programInterfaceDescriptorDataPreemption(&idd, args.preemptionMode); uint32_t samplerCount = 0; - if constexpr (Family::supportsSampler) { + if constexpr (Family::supportsSampler && heaplessModeEnabled == false) { if (args.device->getDeviceInfo().imageSupport) { uint32_t samplerStateOffset = 0; @@ -188,10 +200,12 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } } - EncodeDispatchKernel::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount); + if constexpr (heaplessModeEnabled == false) { + EncodeDispatchKernel::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount); + } uint64_t offsetThreadData = 0u; - const uint32_t inlineDataSize = sizeof(INLINE_DATA); + constexpr uint32_t inlineDataSize = WalkerType::getInlineDataSize(); auto crossThreadData = args.dispatchInterface->getCrossThreadData(); uint32_t inlineDataProgrammingOffset = 0u; @@ -231,6 +245,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis memcpy_s(ptr, sizeCrossThreadData, crossThreadData, sizeCrossThreadData); } + if (args.isIndirect) { auto gpuPtr = heap->getGraphicsAllocation()->getGpuAddress() + static_cast(heap->getUsed() - sizeThreadData - inlineDataProgrammingOffset); uint64_t implicitArgsGpuPtr = 0u; @@ -272,7 +287,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis args.partitionCount > 1, // multiOsContextCapable args.isRcs, // isRcs container.doubleSbaWaRef(), // doubleSbaWa - false, // heaplessModeEnabled + heaplessModeEnabled, // heaplessModeEnabled }; EncodeStateBaseAddress::encode(encodeStateBaseAddressArgs); container.setDirtyStateForAllHeaps(false); @@ -285,8 +300,17 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis EncodeSemaphore::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); } - walkerCmd.setIndirectDataStartAddress(static_cast(offsetThreadData)); - walkerCmd.setIndirectDataLength(sizeThreadData); + if constexpr (heaplessModeEnabled) { + auto inlineDataPointer = reinterpret_cast(walkerCmd.getInlineDataPointer()); + auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress; + auto heap = container.getIndirectHeap(HeapType::INDIRECT_OBJECT); + auto address = heap->getHeapGpuBase() + offsetThreadData; + std::memcpy(inlineDataPointer + indirectDataPointerAddress.offset, &address, indirectDataPointerAddress.pointerSize); + + } else { + walkerCmd.setIndirectDataStartAddress(static_cast(offsetThreadData)); + walkerCmd.setIndirectDataLength(sizeThreadData); + } EncodeDispatchKernel::encodeThreadData(walkerCmd, nullptr, @@ -377,7 +401,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis hwInfo); } else { args.partitionCount = 1; - auto buffer = listCmdBufferStream->getSpaceForCmd(); + auto buffer = listCmdBufferStream->getSpaceForCmd(); args.outWalkerPtr = buffer; *buffer = walkerCmd; } @@ -478,7 +502,8 @@ bool EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired(uint32_t } template -void EncodeDispatchKernel::encodeThreadData(WALKER_TYPE &walkerCmd, +template +void EncodeDispatchKernel::encodeThreadData(WalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, @@ -517,7 +542,7 @@ void EncodeDispatchKernel::encodeThreadData(WALKER_TYPE &walkerCmd, } walkerCmd.setExecutionMask(static_cast(executionMask)); - walkerCmd.setSimdSize(getSimdConfig(simd)); + walkerCmd.setSimdSize(getSimdConfig(simd)); walkerCmd.setMessageSimd(walkerCmd.getSimdSize()); @@ -818,7 +843,8 @@ inline void EncodeStoreMMIO::appendFlags(MI_STORE_REGISTER_MEM *storeReg } template -void EncodeDispatchKernel::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {} +template +void EncodeDispatchKernel::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {} template size_t EncodeDispatchKernel::additionalSizeRequiredDsh(uint32_t iddCount) { diff --git a/shared/source/gen11/command_encoder_gen11.cpp b/shared/source/gen11/command_encoder_gen11.cpp index 672036f6d6..b614f35fa8 100644 --- a/shared/source/gen11/command_encoder_gen11.cpp +++ b/shared/source/gen11/command_encoder_gen11.cpp @@ -79,6 +79,9 @@ template void EncodeDispatchKernel::appendAdditionalIDDFields::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel::setupPostSyncMocs(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); +template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); +template void EncodeDispatchKernel::encodeThreadData(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void EncodeDispatchKernel::adjustWalkOrder(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template struct EncodeStates; template struct EncodeMath; diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index edcc8a03c2..c3e08b424f 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -119,6 +119,9 @@ template void EncodeDispatchKernel::appendAdditionalIDDFields::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel::setupPostSyncMocs(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); +template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); +template void EncodeDispatchKernel::encodeThreadData(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void EncodeDispatchKernel::adjustWalkOrder(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template struct EncodeStates; template struct EncodeMath; diff --git a/shared/source/gen8/command_encoder_gen8.cpp b/shared/source/gen8/command_encoder_gen8.cpp index 4858d010fd..71cfe30f1a 100644 --- a/shared/source/gen8/command_encoder_gen8.cpp +++ b/shared/source/gen8/command_encoder_gen8.cpp @@ -59,6 +59,9 @@ template void EncodeDispatchKernel::appendAdditionalIDDFields::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel::setupPostSyncMocs(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); +template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); +template void EncodeDispatchKernel::encodeThreadData(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void EncodeDispatchKernel::adjustWalkOrder(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template struct EncodeStates; template struct EncodeMath; diff --git a/shared/source/gen9/command_encoder_gen9.cpp b/shared/source/gen9/command_encoder_gen9.cpp index 18d13b7eef..67faa935e7 100644 --- a/shared/source/gen9/command_encoder_gen9.cpp +++ b/shared/source/gen9/command_encoder_gen9.cpp @@ -64,6 +64,9 @@ template void EncodeDispatchKernel::appendAdditionalIDDFields::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel::setupPostSyncMocs(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); +template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); +template void EncodeDispatchKernel::encodeThreadData(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void EncodeDispatchKernel::adjustWalkOrder(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template struct EncodeStates; template struct EncodeMath; diff --git a/shared/source/generated/gen11/hw_cmds_generated_gen11.inl b/shared/source/generated/gen11/hw_cmds_generated_gen11.inl index e4ecea5244..f1bf286452 100644 --- a/shared/source/generated/gen11/hw_cmds_generated_gen11.inl +++ b/shared/source/generated/gen11/hw_cmds_generated_gen11.inl @@ -490,6 +490,9 @@ typedef struct tagGPGPU_WALKER { inline uint32_t getBottomExecutionMask() const { return (TheStructure.Common.BottomExecutionMask); } + static constexpr uint32_t getInlineDataSize() { // patched + return 0u; + } using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched } GPGPU_WALKER; diff --git a/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl b/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl index 112e8bf91e..09b3d1e7df 100644 --- a/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl +++ b/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl @@ -510,7 +510,9 @@ typedef struct tagGPGPU_WALKER { inline uint32_t getBottomExecutionMask() const { return TheStructure.Common.BottomExecutionMask; } - + static constexpr uint32_t getInlineDataSize() { // patched + return 0u; + } using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched } GPGPU_WALKER; diff --git a/shared/source/generated/gen8/hw_cmds_generated_gen8.inl b/shared/source/generated/gen8/hw_cmds_generated_gen8.inl index 1884ec10a5..9b9e72c3f3 100644 --- a/shared/source/generated/gen8/hw_cmds_generated_gen8.inl +++ b/shared/source/generated/gen8/hw_cmds_generated_gen8.inl @@ -476,6 +476,9 @@ typedef struct tagGPGPU_WALKER { inline uint32_t getBottomExecutionMask() const { return (TheStructure.Common.BottomExecutionMask); } + static constexpr uint32_t getInlineDataSize() { // patched + return 0u; + } using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched } GPGPU_WALKER; STATIC_ASSERT(60 == sizeof(GPGPU_WALKER)); diff --git a/shared/source/generated/gen9/hw_cmds_generated_gen9.inl b/shared/source/generated/gen9/hw_cmds_generated_gen9.inl index 216948706c..aa3c7e7388 100644 --- a/shared/source/generated/gen9/hw_cmds_generated_gen9.inl +++ b/shared/source/generated/gen9/hw_cmds_generated_gen9.inl @@ -485,6 +485,9 @@ typedef struct tagGPGPU_WALKER { inline uint32_t getBottomExecutionMask() const { return (TheStructure.Common.BottomExecutionMask); } + static constexpr uint32_t getInlineDataSize() { // patched + return 0u; + } using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched } GPGPU_WALKER; STATIC_ASSERT(60 == sizeof(GPGPU_WALKER)); diff --git a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl index cd023beab3..2ff0637787 100644 --- a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl +++ b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl @@ -5808,6 +5808,9 @@ typedef struct tagCOMPUTE_WALKER { inline uint32_t *getInlineDataPointer() { return reinterpret_cast(&TheStructure.Common.InlineData); } + static constexpr uint32_t getInlineDataSize() { // patched + return 32u; + } using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched } COMPUTE_WALKER; STATIC_ASSERT(156 == sizeof(COMPUTE_WALKER)); diff --git a/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl b/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl index 6930cd8697..9d3b692714 100644 --- a/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl +++ b/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl @@ -5579,6 +5579,9 @@ typedef struct tagCOMPUTE_WALKER { inline uint32_t *getInlineDataPointer() { return reinterpret_cast(&TheStructure.Common.InlineData); } + static constexpr uint32_t getInlineDataSize() { // patched + return 32u; + } using InterfaceDescriptorType = INTERFACE_DESCRIPTOR_DATA; // patched } COMPUTE_WALKER; STATIC_ASSERT(156 == sizeof(COMPUTE_WALKER)); diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index a150d79733..be0224c838 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -378,6 +378,9 @@ template void EncodeDispatchKernel::setGrfInfo::appendAdditionalIDDFields(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel::setupPostSyncMocs(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); +template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); +template void EncodeDispatchKernel::encodeThreadData(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void EncodeDispatchKernel::adjustWalkOrder(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template struct EncodeStates; template struct EncodeMath; diff --git a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp index 69c53fa86a..f549c708ca 100644 --- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp @@ -189,13 +189,14 @@ void EncodeSurfaceState::appendParamsForImageFromBuffer(R_SURFACE_STATE } template <> -void EncodeDispatchKernel::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) { +template +void EncodeDispatchKernel::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) { auto &productHelper = rootDeviceEnvironment.template getHelper(); if (productHelper.isAdjustWalkOrderAvailable(rootDeviceEnvironment.getReleaseHelper())) { if (HwWalkOrderHelper::compatibleDimensionOrders[requiredWorkGroupOrder] == HwWalkOrderHelper::linearWalk) { - walkerCmd.setDispatchWalkOrder(WALKER_TYPE::DISPATCH_WALK_ORDER::LINERAR_WALKER); + walkerCmd.setDispatchWalkOrder(WalkerType::DISPATCH_WALK_ORDER::LINERAR_WALKER); } else if (HwWalkOrderHelper::compatibleDimensionOrders[requiredWorkGroupOrder] == HwWalkOrderHelper::yOrderWalk) { - walkerCmd.setDispatchWalkOrder(WALKER_TYPE::DISPATCH_WALK_ORDER::Y_ORDER_WALKER); + walkerCmd.setDispatchWalkOrder(WalkerType::DISPATCH_WALK_ORDER::Y_ORDER_WALKER); } } } @@ -228,6 +229,9 @@ template void EncodeDispatchKernel::setGrfInfo::appendAdditionalIDDFields(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::WALKER_TYPE &walkerCmd); template void EncodeDispatchKernel::setupPostSyncMocs(Family::WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); +template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); +template void EncodeDispatchKernel::encodeThreadData(Family::WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void EncodeDispatchKernel::adjustWalkOrder(Family::WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template struct EncodeStates; template struct EncodeMath; diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index bb60cd5401..bd51359c0f 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -236,15 +236,15 @@ HWTEST2_F(CommandEncoderTests, givenRequiredWorkGroupOrderWhenCallAdjustWalkOrde WALKER_TYPE walkerOnStart{}; uint32_t yOrder = 2u; - EncodeDispatchKernel::adjustWalkOrder(walkerCmd, yOrder, rootDeviceEnvironment); + EncodeDispatchKernel::template adjustWalkOrder(walkerCmd, yOrder, rootDeviceEnvironment); EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change uint32_t linearOrder = 0u; - EncodeDispatchKernel::adjustWalkOrder(walkerCmd, linearOrder, rootDeviceEnvironment); + EncodeDispatchKernel::template adjustWalkOrder(walkerCmd, linearOrder, rootDeviceEnvironment); EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change uint32_t fakeOrder = 5u; - EncodeDispatchKernel::adjustWalkOrder(walkerCmd, fakeOrder, rootDeviceEnvironment); + EncodeDispatchKernel::template adjustWalkOrder(walkerCmd, fakeOrder, rootDeviceEnvironment); EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(WALKER_TYPE))); // no change } diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index b5c057b2b6..d03e13a28b 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -46,6 +46,7 @@ TEST_F(CommandEncodeStatesTest, givenCommandConatinerCreatedWithMaxNumAggregateI } HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWalkerCommandProgrammed) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); bool requiresUncachedMocs = false; @@ -56,17 +57,17 @@ HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWa dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); } - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); - using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto itorPC = find(commands.begin(), commands.end()); ASSERT_NE(itorPC, commands.end()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; debugManager.flags.ForceComputeWalkerPostSyncFlush.set(1); @@ -75,12 +76,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgr bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); - using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto itor = find(commands.begin(), commands.end()); ASSERT_NE(itor, commands.end()); @@ -91,6 +91,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgr using CommandEncodeStatesUncachedMocsTests = Test; HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndDirtyHeapsThenCorrectMocsIsSet) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; debugManager.flags.ForceL1Caching.set(0u); @@ -100,7 +101,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc bool requiresUncachedMocs = true; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, @@ -118,6 +119,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc } HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndNonDirtyHeapsThenCorrectMocsIsSet) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; debugManager.flags.ForceL1Caching.set(0u); @@ -127,7 +129,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc bool requiresUncachedMocs = true; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, @@ -145,6 +147,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUnc } HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndDirtyHeapsThenSbaIsNotProgrammed) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; debugManager.flags.ForceL1Caching.set(0u); @@ -154,7 +157,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_FALSE(dispatchArgs.requiresUncachedMocs); GenCmdList commands; @@ -173,6 +176,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon } HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndNonDirtyHeapsThenSbaIsNotProgrammed) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; debugManager.flags.ForceL1Caching.set(0u); @@ -182,7 +186,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_FALSE(dispatchArgs.requiresUncachedMocs); GenCmdList commands; @@ -196,6 +200,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon } HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNonUncachedMocsAndNonDirtyHeapsAndSlmSizeThenSbaIsNotProgrammed) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; debugManager.flags.ForceL1Caching.set(0u); @@ -209,7 +214,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_FALSE(dispatchArgs.requiresUncachedMocs); GenCmdList commands; @@ -223,6 +228,7 @@ HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithNon } HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhenDispatchKernelThenNextCommandBufferIsAdded) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -233,7 +239,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhen bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto cmdBuffersCountAfter = cmdContainer->getCmdBufferAllocations().size(); @@ -241,6 +247,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhen } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThanZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -251,7 +258,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThan bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); auto &gfxcoreHelper = this->getHelper(); @@ -262,13 +269,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThan } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, whenDispatchingKernelThenSetDenormMode) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); @@ -276,6 +284,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, whenDispatchingKernelThenSe } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDebuggingEnabledAndAssertInKernelWhenDispatchingKernelThenSwExceptionsAreEnabled) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -286,13 +295,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDebuggingEnabledAndAss dispatchInterface->kernelDescriptor.kernelAttributes.flags.usesAssert = true; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); EXPECT_TRUE(interfaceDescriptorData->getSoftwareExceptionEnable()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -303,7 +313,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroW bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); @@ -313,6 +323,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroW } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWhenDispatchingKernelThenBindingTableOffsetIsCorrect) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t numBindingTable = 1; @@ -337,7 +348,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWh bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); @@ -345,6 +356,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWh } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhenDispatchingKernelThenBindingTableOffsetIsZero) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t numBindingTable = 0; @@ -367,7 +379,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhen bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); @@ -375,6 +387,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhen } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchingKernelThensamplerStateWasCopied) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t numSamplers = 1; @@ -398,7 +411,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispa dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); @@ -412,6 +425,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispa } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDispatchingKernelThensamplerStateWasNotCopied) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t numSamplers = 0; @@ -433,7 +447,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDisp bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto interfaceDescriptorData = static_cast(cmdContainer->getIddBlock()); @@ -447,6 +461,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDisp } HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKernelThenCorrestMIStoreOffsetsSet) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; uint32_t dims[] = {2, 1, 1}; uint32_t offsets[] = {0x10, 0x20, 0x30}; @@ -459,7 +474,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKerne EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isIndirect = true; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -473,6 +488,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKerne } HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsSizeWhenDispatchingKernelThenMiMathEncoded) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_MATH = typename FamilyType::MI_MATH; uint32_t dims[] = {2, 1, 1}; uint32_t offsets[] = {0x10, 0x20, 0x30}; @@ -488,7 +504,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsSizeWhenDispatchingKernelT EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isIndirect = true; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -498,6 +514,7 @@ HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsSizeWhenDispatchingKernelT } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDebugFlagWhenDispatchingKernelThenValuesAreSetUpCorrectly) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; @@ -530,7 +547,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); @@ -563,7 +580,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); @@ -591,7 +608,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); @@ -612,6 +629,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedWhenDispatchKernelThenFlushNotAdded) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -622,7 +640,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -632,6 +650,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); cmdContainer->slmSizeRef() = 1; @@ -641,7 +660,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha bool requiresUncachedMocs = true; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -657,6 +676,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotChangedWhenDispatchKernelThenHeapsAreCleanAndFlushAdded) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; uint32_t dims[] = {2, 1, 1}; @@ -668,7 +688,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotCha bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -679,6 +699,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotCha } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatchKernelThenPCIsAddedBeforeSBA) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; @@ -692,7 +713,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatch EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList cmdList; CmdParse::parseCommandBuffer(cmdList, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -720,6 +741,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatch } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChangedWhenDispatchKernelThenFlushAdded) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -732,7 +754,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChange bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -743,6 +765,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChange } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenDispatchKernelThenMediaInterfaceDescriptorEncoded) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; @@ -757,7 +780,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -769,6 +792,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenDispatchKernelAndDynamicStateHeapDirtyThenStateBaseAddressEncodedAndMediaInterfaceDescriptorEncoded) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; @@ -787,7 +811,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -799,6 +823,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeroWhenD } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapIsDirtyThenSamplerStateWasCopiedAndStateBaseAddressEncoded) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; @@ -824,7 +849,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapI dshBeforeFlush->getSpace(dshBeforeFlush->getAvailableSpace() - NEO::EncodeDispatchKernel::getSizeRequiredDsh(kernelDescriptor, cmdContainer->getNumIddPerBlock())); auto cpuBaseBeforeFlush = dshBeforeFlush->getCpuBase(); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -848,6 +873,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenHeapI } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextIDDInBlockWhenHeapIsDirtyThenSamplerStateWasCopiedAndStateBaseAddressEncoded) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; @@ -879,7 +905,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextID auto cpuBaseBeforeFlush = dshBeforeFlush->getCpuBase(); auto usedBefore = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed(); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -903,6 +929,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneAndNextID } HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToNeverWhenEncodingWalkerThenCommandsToPatchAreNotPresent) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restorer; debugManager.flags.PauseOnEnqueue.set(-1); @@ -913,12 +940,13 @@ HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToNeverWhenEncodingWalke std::list cmdsToPatch; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.additionalCommands = &cmdsToPatch; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_EQ(cmdsToPatch.size(), 0u); } HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToAlwaysWhenEncodingWalkerThenCommandsToPatchAreFilled) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restorer; debugManager.flags.PauseOnEnqueue.set(-2); @@ -929,7 +957,7 @@ HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToAlwaysWhenEncodingWalk std::list cmdsToPatch; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.additionalCommands = &cmdsToPatch; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_EQ(cmdsToPatch.size(), 4u); } @@ -939,7 +967,7 @@ using EncodeDispatchKernelTest = Test; HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenSshFromContainerIsUsed, IsAtLeastSkl) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - using WALKER = typename FamilyType::WALKER_TYPE; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; uint32_t numBindingTable = 1; BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; @@ -961,7 +989,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenS if (EncodeDispatchKernel::isDshNeeded(pDevice->getDeviceInfo())) { dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); } - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto usedAfter = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed(); @@ -971,7 +999,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenS HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThenThenSshFromContainerIsUsed, IsAtLeastSkl) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - using WALKER = typename FamilyType::WALKER_TYPE; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; uint32_t numBindingTable = 1; RENDER_SURFACE_STATE state = FamilyType::cmdInitRenderSurfaceState; @@ -990,7 +1018,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThen auto usedBefore = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed(); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto usedAfter = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed(); @@ -999,13 +1027,14 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThen HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenProgrammingWalkerThenKernelStartPointerHasProperOffset, IsBeforeXeHpCore) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto dispatchInterface = std::make_unique(); dispatchInterface->getIsaOffsetInParentAllocationResult = 8 << INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_BIT_SHIFT; uint32_t dims[] = {2, 1, 1}; bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); auto idd = static_cast(cmdContainer->getIddBlock()); EXPECT_EQ(idd->getKernelStartPointer(), dispatchInterface->getIsaAllocation()->getGpuAddressToPatch() + dispatchInterface->getIsaOffsetInParentAllocation()); @@ -1025,7 +1054,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenP uint32_t dims[] = {2, 1, 1}; bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -1037,6 +1066,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenP } HWTEST2_F(EncodeDispatchKernelTest, givenPrintKernelDispatchParametersWhenEncodingKernelThenPrintKernelDispatchParams, IsAtLeastXeHpCore) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto dispatchInterface = std::make_unique(); uint32_t dims[] = {2, 1, 1}; @@ -1046,7 +1076,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenPrintKernelDispatchParametersWhenEncodi DebugManagerStateRestore restore; debugManager.flags.PrintKernelDispatchParameters.set(true); testing::internal::CaptureStdout(); // start capturing - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); std::string outputString = testing::internal::GetCapturedStdout(); // stop capturing EXPECT_NE(std::string::npos, outputString.find("kernel")); @@ -1062,6 +1092,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenPrintKernelDispatchParametersWhenEncodi HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatchingKernelThenSurfaceStateOffsetInCrossThreadDataIsNotPatched) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor; uint32_t numBindingTable = 1; BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; @@ -1113,7 +1144,7 @@ HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatching bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_EQ(pattern, *patchLocation); @@ -1141,7 +1172,7 @@ HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatching memset(ioh->getCpuBase(), 0, ioh->getMaxAvailableSpace()); dispatchArgs.dispatchInterface = dispatchInterface.get(); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_TRUE(memoryZeroed(ptrOffset(ioh->getCpuBase(), iohOffset), ioh->getMaxAvailableSpace() - iohOffset)); } @@ -1350,7 +1381,7 @@ using BindlessCommandEncodeStatesContainerTest = TestgetIndirectHeap(HeapType::SURFACE_STATE), nullptr); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*commandContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*commandContainer.get(), dispatchArgs); EXPECT_EQ(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); } @@ -1385,7 +1416,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindlessKernelAndBindles HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindfulKernelWhenBindlessModeEnabledThenCmdContainerHasSsh) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - using WALKER = typename FamilyType::WALKER_TYPE; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore dbgRestorer; debugManager.flags.UseBindlessMode.set(1); auto commandContainer = std::make_unique(); @@ -1414,7 +1445,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindfulKernelWhenBindles EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*commandContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*commandContainer.get(), dispatchArgs); EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); } @@ -1423,7 +1454,7 @@ using NgenGeneratorDispatchKernelEncodeTest = Test; HWTEST2_F(NgenGeneratorDispatchKernelEncodeTest, givenBindfulKernelAndIsNotGeneratedByIgcWhenEncodeDispatchKernelThenCmdContainerDoesNotHaveSsh, IsPVC) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; - + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; for (auto isGeneratedByIgc : {false, true}) { auto commandContainer = std::make_unique(); commandContainer->initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false); @@ -1453,7 +1484,7 @@ HWTEST2_F(NgenGeneratorDispatchKernelEncodeTest, givenBindfulKernelAndIsNotGener EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*commandContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*commandContainer.get(), dispatchArgs); if (isGeneratedByIgc) { EXPECT_NE(commandContainer->getIndirectHeap(HeapType::SURFACE_STATE), nullptr); @@ -1548,12 +1579,13 @@ HWTEST_F(CommandEncodeStatesTest, givenKernelInfoOfBindlessKernelWhenGettingRequ } HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWhenIsKernelDispatchedFromImmediateCmdListTrueThenGetHeapWithRequiredSizeAndAlignmentCalled) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); uint32_t dims[] = {1, 1, 1}; bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isKernelDispatchedFromImmediateCmdList = true; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_NE(0u, cmdContainer->getHeapWithRequiredSizeAndAlignmentCalled); } \ No newline at end of file diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp index 47fd30d64f..9beced76cc 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp @@ -37,7 +37,7 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeAndPVCA dispatchArgs.eventAddress = eventAddress; dispatchArgs.isTimestampEvent = true; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -62,7 +62,7 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeThenMoc dispatchArgs.isTimestampEvent = true; dispatchArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -131,7 +131,7 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenOverridePreferredSlmAllocatio bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp index 831498ea2a..216606efb9 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp @@ -40,7 +40,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenOverrideSlmTotalSizeDebugVari cmdContainer->reset(); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 0210f7738b..ce38b1fe89 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -48,7 +48,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterTha bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -75,7 +75,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenXeHpAndLaterWhenDispa EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -102,7 +102,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenXeHpDebuggingEnabledA EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -126,7 +126,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSimdSizeWhenDispatchi bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -149,7 +149,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZero bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -183,7 +183,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenOverrideSlmTotalSizeD cmdContainer->reset(); EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -221,7 +221,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenStatelessBufferAndIma bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -258,7 +258,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givennumBindingTableOneWhe bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -295,7 +295,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhe bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -350,7 +350,7 @@ HWTEST2_F(CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchKernelThensampl dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -382,7 +382,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAllocationWhenDi dispatchArgs.eventAddress = eventAddress; dispatchArgs.isTimestampEvent = true; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -407,7 +407,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAddressWhenEncod dispatchArgs.isTimestampEvent = true; dispatchArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -425,6 +425,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAddressWhenEncod HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenCleanHeapsWhenDispatchKernelThenFlushNotAdded) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); cmdContainer->slmSizeRef() = 1; @@ -433,7 +434,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenCleanHeapsWhenDispatc bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -474,7 +475,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -508,7 +509,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -533,7 +534,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -580,7 +581,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredWhe bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -607,7 +608,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredIsF bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -634,7 +635,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredAnd bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -663,7 +664,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInlineDataRequiredAnd bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -1071,7 +1072,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling, dispatchArgs.eventAddress = eventAddress; dispatchArgs.partitionCount = 2; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); size_t usedBuffer = cmdContainer->getCommandStream()->getUsed(); EXPECT_EQ(2u, dispatchArgs.partitionCount); @@ -1104,7 +1105,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling, givenCooperativ dispatchArgs.isInternal = isInternal; dispatchArgs.isCooperative = isCooperative; dispatchArgs.partitionCount = 2; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); size_t containerUsedAfterBase = cmdContainer->getCommandStream()->getUsed(); @@ -1146,7 +1147,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isInternal = isInternal; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); size_t containerUsedAfterBase = cmdContainer->getCommandStream()->getUsed(); @@ -1159,7 +1160,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp EXPECT_EQ(16u, baseWalkerCmd->getThreadGroupIdXDimension()); dispatchArgs.partitionCount = 2; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); size_t total = cmdContainer->getCommandStream()->getUsed(); size_t partitionedWalkerSize = total - containerUsedAfterBase; @@ -1210,7 +1211,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp dispatchArgs.partitionCount = 2; dispatchArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_EQ(2u, dispatchArgs.partitionCount); size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed(); @@ -1309,7 +1310,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, dispatchArgs.isInternal = isInternal; dispatchArgs.partitionCount = 2; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_EQ(2u, dispatchArgs.partitionCount); size_t partitionedWalkerSize = cmdContainer->getCommandStream()->getUsed(); @@ -1367,7 +1368,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp dispatchArgs.isInternal = isInternal; dispatchArgs.partitionCount = 2; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); size_t internalWalkerSize = cmdContainer->getCommandStream()->getUsed(); @@ -1394,7 +1395,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenNonTimestampEventWhen dispatchArgs.eventAddress = eventAddress; dispatchArgs.isTimestampEvent = true; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -1408,6 +1409,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenNonTimestampEventWhen HWTEST2_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDpasRequiredIsNotDefaultThenPipelineSelectCommandAdded, IsWithinXeGfxFamily) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -1420,7 +1422,7 @@ HWTEST2_F(CommandEncodeStatesTest, bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, @@ -1442,7 +1444,7 @@ HWTEST2_F(CommandEncodeStatesTest, HWTEST2_F(CommandEncodeStatesTest, givenDebugVariableWhenEncodeStateIsCalledThenSystolicValueIsOverwritten, IsWithinXeGfxFamily) { DebugManagerStateRestore restorer; - + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -1455,7 +1457,7 @@ HWTEST2_F(CommandEncodeStatesTest, bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, @@ -1477,6 +1479,7 @@ HWTEST2_F(CommandEncodeStatesTest, HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDispatchInterfaceWhenDpasRequiredIsSameAsDefaultThenPipelineSelectCommandNotAdded) { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -1487,7 +1490,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, @@ -1513,7 +1516,7 @@ struct CommandEncodeStatesImplicitScalingPrimaryBufferFixture : public CommandEn template void testBodyFindPrimaryBatchBuffer() { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; - + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; constexpr bool expectPrimary = flushTaskUsedForImmediate || usePrimaryBuffer; uint32_t dims[] = {16, 1, 1}; @@ -1525,7 +1528,7 @@ struct CommandEncodeStatesImplicitScalingPrimaryBufferFixture : public CommandEn dispatchArgs.eventAddress = eventAddress; dispatchArgs.partitionCount = 2; - EncodeDispatchKernel::encode(*BaseClass::cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*BaseClass::cmdContainer.get(), dispatchArgs); size_t usedBuffer = BaseClass::cmdContainer->getCommandStream()->getUsed(); EXPECT_EQ(2u, dispatchArgs.partitionCount); diff --git a/shared/test/unit_test/encoders/test_encode_pvc_and_later.cpp b/shared/test/unit_test/encoders/test_encode_pvc_and_later.cpp index b795760ac5..848db404e3 100644 --- a/shared/test/unit_test/encoders/test_encode_pvc_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_pvc_and_later.cpp @@ -296,6 +296,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalRegRe using CommandEncodeStatesXeHpcAndLaterTests = Test; HWTEST2_F(CommandEncodeStatesXeHpcAndLaterTests, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits, IsAtLeastXeHpcCore) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; debugManager.flags.ForceComputeWalkerPostSyncFlush.set(1); @@ -304,12 +305,11 @@ HWTEST2_F(CommandEncodeStatesXeHpcAndLaterTests, givenDebugFlagSetWhenProgrammin bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); - using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto itor = find(commands.begin(), commands.end()); ASSERT_NE(itor, commands.end()); diff --git a/shared/test/unit_test/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp index 4838365ed4..f4ba6f7e01 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp @@ -63,6 +63,7 @@ PVCTEST_F(CommandEncodeStatesPvcTest, GivenSmallSlmTotalSizesWhenSetAdditionalIn using EncodeKernelPvcTest = Test; PVCTEST_F(EncodeKernelPvcTest, givenRevisionBAndAboveWhenSpecialModeRequiredThenDontReprogramPipelineSelect) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; bool requiresUncachedMocs = false; auto hwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); @@ -92,7 +93,7 @@ PVCTEST_F(EncodeKernelPvcTest, givenRevisionBAndAboveWhenSpecialModeRequiredThen EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.preemptionMode = NEO::PreemptionMode::Initial; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); EXPECT_EQ(testInput.expectedValue, cmdContainer->lastPipelineSelectModeRequiredRef()); } } @@ -195,7 +196,7 @@ PVCTEST_F(EncodeKernelPvcTest, givenDefaultSettingForFenceAsPostSyncOperationInC dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.isHostScopeSignalEvent = true; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); diff --git a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp index a46cef3a0d..caa005c3b7 100644 --- a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp @@ -319,7 +319,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNoFenceAsPostSyncOperationInCo bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -345,7 +345,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenFenceAsPostSyncOperationInComp bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -378,7 +378,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isKernelUsingSystemAllocation = true; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer( @@ -414,7 +414,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenEven EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isHostScopeSignalEvent = true; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer( @@ -450,7 +450,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern dispatchArgs.isKernelUsingSystemAllocation = true; dispatchArgs.isHostScopeSignalEvent = true; - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); @@ -464,6 +464,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern } XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); cmdContainer->slmSizeRef() = 1; @@ -473,7 +474,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenCleanHeapsAndSlmNotChangedAndU bool requiresUncachedMocs = true; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs); + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());