diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 1773e5535c..1315a0fa02 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -279,6 +279,11 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis EncodeDispatchKernel::adjustTimestampPacket(walkerCmd, hwInfo); } + if (DebugManager.flags.ForceComputeWalkerPostSyncFlush.get() == 1) { + postSync.setDataportPipelineFlush(true); + EncodeDispatchKernel::adjustTimestampPacket(walkerCmd, hwInfo); + } + walkerCmd.setPredicateEnable(args.isPredicate); auto threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 3d8fb268af..63a862e36a 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -223,6 +223,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, CopyHostPtrOnCpu, -1, "-1: default, 0: disable, DECLARE_DEBUG_VARIABLE(int32_t, ForceZeDeviceCanAccessPerReturnValue, -1, "-1: default, 0: zeDeviceCanAccessPeer always return false 1: zeDeviceCanAccessPeer always return true") DECLARE_DEBUG_VARIABLE(int32_t, AdjustThreadGroupDispatchSize, -1, "-1: default, 0: do not adjust thread group dispatch size 1: adjust thread group dispatch size (PVC)") DECLARE_DEBUG_VARIABLE(int32_t, ForceNonblockingExecbufferCalls, -1, "-1: default, 0: make execbuffer call blocking, 1: make execbuffer call nonblocking. Supported only in prelim i915 kernels.") +DECLARE_DEBUG_VARIABLE(int32_t, ForceComputeWalkerPostSyncFlush, -1, "-1: default, 0: disable 1: Enable all flushing bits in ComputeWalker->PostSync") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 73db64fec6..454d9d457f 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -486,4 +486,5 @@ ForceZeDeviceCanAccessPerReturnValue = -1 AdjustThreadGroupDispatchSize = -1 ForceNonblockingExecbufferCalls = -1 UseHighAlignmentForHeapExtended = -1 -ForceAutoGrfCompilationMode = -1 \ No newline at end of file +ForceAutoGrfCompilationMode = -1 +ForceComputeWalkerPostSyncFlush = -1 \ No newline at end of file diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index de5f569b69..bc99e88895 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -58,6 +58,28 @@ HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWa ASSERT_NE(itorPC, commands.end()); } +HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceComputeWalkerPostSyncFlush.set(1); + + uint32_t dims[] = {2, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + bool requiresUncachedMocs = false; + EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); + + EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); + + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + auto itor = find(commands.begin(), commands.end()); + ASSERT_NE(itor, commands.end()); + + auto walkerCmd = genCmdCast(*itor); + EXPECT_TRUE(walkerCmd->getPostSync().getDataportPipelineFlush()); +} + using CommandEncodeStatesUncachedMocsTests = Test; HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndDirtyHeapsThenCorrectMocsIsSet) { diff --git a/shared/test/unit_test/encoders/test_encode_pvc_and_later.cpp b/shared/test/unit_test/encoders/test_encode_pvc_and_later.cpp index 9416bae87f..149e1595bd 100644 --- a/shared/test/unit_test/encoders/test_encode_pvc_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_pvc_and_later.cpp @@ -6,7 +6,11 @@ */ #include "shared/source/command_container/command_encoder.h" +#include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/hw_test.h" +#include "shared/test/unit_test/fixtures/command_container_fixture.h" +#include "shared/test/unit_test/mocks/mock_dispatch_kernel_encoder_interface.h" using namespace NEO; @@ -196,4 +200,29 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalRegRe validateBaseProgramming(buffer, compareOperation, startAddress, indirect, compareReg1, compareReg2); } } +} + +using CommandEncodeStatesXeHpcAndLaterTests = Test; + +HWTEST2_F(CommandEncodeStatesXeHpcAndLaterTests, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits, IsAtLeastXeHpcCore) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceComputeWalkerPostSyncFlush.set(1); + + uint32_t dims[] = {2, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + bool requiresUncachedMocs = false; + EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); + + EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); + + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + auto itor = find(commands.begin(), commands.end()); + ASSERT_NE(itor, commands.end()); + + auto walkerCmd = genCmdCast(*itor); + EXPECT_TRUE(walkerCmd->getPostSync().getDataportPipelineFlush()); + EXPECT_TRUE(walkerCmd->getPostSync().getDataportSubsliceCacheFlush()); } \ No newline at end of file