From 410fd7d909b176138e9ea7d66436538f16f1879e Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Tue, 13 Sep 2022 12:47:58 +0000 Subject: [PATCH] Correct binding table prefetch Signed-off-by: Kamil Kopryk Related-To: NEO-6075 Binding table entry count was zeroed even when ForceBtpPrefetchMode debug flag was enabled --- .../test_cmdlist_append_launch_kernel_3.cpp | 45 ++++++++++--------- .../helpers/hardware_commands_helper_base.inl | 7 ++- .../command_container/command_encoder.h | 1 + .../command_container/command_encoder.inl | 12 +++-- .../command_encoder_bdw_and_later.inl | 2 +- .../command_encoder_xehp_and_later.inl | 2 +- shared/source/gen11/command_encoder_gen11.cpp | 2 +- .../gen12lp/command_encoder_gen12lp.cpp | 2 +- .../command_encoder_xe_hpc_core.cpp | 3 -- 9 files changed, 42 insertions(+), 34 deletions(-) diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 42315dc39b..6c45d3e445 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -32,33 +32,38 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBind using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - createKernel(); + for (auto debugKey : {-1, 0, 1}) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceBtpPrefetchMode.set(debugKey); - ze_group_count_t groupCount{1, 1, 1}; - ze_result_t returnValue; - std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); - CmdListKernelLaunchParams launchParams = {}; - commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + createKernel(); - auto commandStream = commandList->commandContainer.getCommandStream(); + ze_group_count_t groupCount{1, 1, 1}; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); + CmdListKernelLaunchParams launchParams = {}; + commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, commandStream->getCpuBase(), commandStream->getUsed())); + auto commandStream = commandList->commandContainer.getCommandStream(); - auto itorMIDL = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(itorMIDL, cmdList.end()); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, commandStream->getCpuBase(), commandStream->getUsed())); - auto cmd = genCmdCast(*itorMIDL); - ASSERT_NE(cmd, nullptr); + auto itorMIDL = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(itorMIDL, cmdList.end()); - auto dsh = NEO::ApiSpecificConfig::getBindlessConfiguration() ? device->getNEODevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : commandList->commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); - auto idd = static_cast(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress())); + auto cmd = genCmdCast(*itorMIDL); + ASSERT_NE(cmd, nullptr); - if (NEO::EncodeSurfaceState::doBindingTablePrefetch()) { - uint32_t numArgs = kernel->kernelImmData->getDescriptor().payloadMappings.bindingTable.numEntries; - EXPECT_EQ(numArgs, idd->getBindingTableEntryCount()); - } else { - EXPECT_EQ(0u, idd->getBindingTableEntryCount()); + auto dsh = NEO::ApiSpecificConfig::getBindlessConfiguration() ? device->getNEODevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : commandList->commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); + auto idd = static_cast(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress())); + + if (NEO::EncodeSurfaceState::doBindingTablePrefetch()) { + uint32_t numArgs = kernel->kernelImmData->getDescriptor().payloadMappings.bindingTable.numEntries; + EXPECT_EQ(numArgs, idd->getBindingTableEntryCount()); + } else { + EXPECT_EQ(0u, idd->getBindingTableEntryCount()); + } } } diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index cb36c855e8..2a9e8d05bc 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -251,10 +251,9 @@ size_t HardwareCommandsHelper::sendIndirectState( uint64_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable + interfaceDescriptorIndex * sizeof(INTERFACE_DESCRIPTOR_DATA); - auto bindingTablePrefetchSize = std::min(31u, static_cast(kernel.getNumberOfBindingTableStates())); - - if (!EncodeSurfaceState::doBindingTablePrefetch()) { - bindingTablePrefetchSize = 0; + auto bindingTablePrefetchSize = 0; + if (EncodeSurfaceState::doBindingTablePrefetch()) { + bindingTablePrefetchSize = std::min(31u, static_cast(kernel.getNumberOfBindingTableStates())); } HardwareCommandsHelper::sendInterfaceDescriptorData( diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 4af1d46006..ee828e1c9e 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -304,6 +304,7 @@ struct EncodeSurfaceState { static void getSshAlignedPointer(uintptr_t &ptr, size_t &offset); static bool doBindingTablePrefetch(); + static bool isBindingTablePrefetchPreferred(); static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount, const void *srcKernelSsh, size_t srcKernelSshSize, diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 248213fb6e..6e36b895f8 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -675,12 +675,18 @@ void EncodeIndirectParams::setWorkDimIndirect(CommandContainer &containe } } +template +bool EncodeSurfaceState::doBindingTablePrefetch() { + auto enableBindingTablePrefetech = isBindingTablePrefetchPreferred(); + if (DebugManager.flags.ForceBtpPrefetchMode.get() != -1) { + enableBindingTablePrefetech = static_cast(DebugManager.flags.ForceBtpPrefetchMode.get()); + } + return enableBindingTablePrefetech; +} + template void EncodeDispatchKernel::adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount) { auto enablePrefetch = EncodeSurfaceState::doBindingTablePrefetch(); - if (DebugManager.flags.ForceBtpPrefetchMode.get() != -1) { - enablePrefetch = static_cast(DebugManager.flags.ForceBtpPrefetchMode.get()); - } if (enablePrefetch) { interfaceDescriptor.setSamplerCount(static_cast((samplerCount + 3) / 4)); diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 2fa31bbe76..359b74dcc3 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -482,7 +482,7 @@ inline void EncodeSurfaceState::encodeExtraBufferParams(EncodeSurface } template -bool EncodeSurfaceState::doBindingTablePrefetch() { +bool EncodeSurfaceState::isBindingTablePrefetchPreferred() { return true; } diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index c8d7e0e8ca..25edb0417d 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -628,7 +628,7 @@ size_t EncodeMiFlushDW::getMiFlushDwWaSize() { } template -bool EncodeSurfaceState::doBindingTablePrefetch() { +bool EncodeSurfaceState::isBindingTablePrefetchPreferred() { return false; } diff --git a/shared/source/gen11/command_encoder_gen11.cpp b/shared/source/gen11/command_encoder_gen11.cpp index 22c81b86c6..ac82e3d17a 100644 --- a/shared/source/gen11/command_encoder_gen11.cpp +++ b/shared/source/gen11/command_encoder_gen11.cpp @@ -21,7 +21,7 @@ using Family = NEO::Gen11Family; namespace NEO { template <> -bool EncodeSurfaceState::doBindingTablePrefetch() { +bool EncodeSurfaceState::isBindingTablePrefetchPreferred() { return false; } diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index b7cc5620d4..4583639eda 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -83,7 +83,7 @@ void EncodeSurfaceState::encodeExtraBufferParams(EncodeSurfaceStateArgs } template <> -bool EncodeSurfaceState::doBindingTablePrefetch() { +bool EncodeSurfaceState::isBindingTablePrefetchPreferred() { return false; } diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index 148e2b9a2d..cf832cce05 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -297,9 +297,6 @@ void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTO template <> void EncodeDispatchKernel::adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount) { auto enablePrefetch = EncodeSurfaceState::doBindingTablePrefetch(); - if (DebugManager.flags.ForceBtpPrefetchMode.get() != -1) { - enablePrefetch = static_cast(DebugManager.flags.ForceBtpPrefetchMode.get()); - } if (enablePrefetch) { interfaceDescriptor.setBindingTableEntryCount(std::min(bindingTableEntryCount, 31u));