From d2546d406272b8b4463b73c2988f752acd392d3c Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Fri, 26 Jul 2024 12:59:09 +0000 Subject: [PATCH] refactor: add forceComputeWalkerPostSyncFlushWithWrite function Signed-off-by: Kamil Kopryk --- .../command_container/command_encoder.h | 3 ++ .../command_encoder_bdw_and_later.inl | 5 +++ .../command_encoder_enablers.inl | 1 + .../command_encoder_xehp_and_later.inl | 18 +++++++++++ .../debug_settings/debug_variables_base.inl | 1 + shared/test/common/test_files/igdrcl.config | 3 +- ..._encode_dispatch_kernel_xehp_and_later.cpp | 32 +++++++++++++++++++ 7 files changed, 62 insertions(+), 1 deletion(-) diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 7615b350bb..0a81e3eed8 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -210,6 +210,9 @@ struct EncodeDispatchKernel { static size_t getInlineDataOffset(EncodeDispatchKernelArgs &args); static void *getImplicitArgsAddress(EncodeDispatchKernelArgs &args, const KernelDescriptor &kernelDescriptor); static size_t getScratchPtrOffsetOfImplicitArgs(); + + template + static void forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd); }; template diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 6947dbb454..d615a9892c 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -638,6 +638,11 @@ inline size_t EncodeDispatchKernel::getInlineDataOffset(EncodeDispatchKe return 0; } +template +template +void EncodeDispatchKernel::forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd) { +} + template size_t EncodeStates::getSshHeapSize() { return 64 * MemoryConstants::kiloByte; diff --git a/shared/source/command_container/command_encoder_enablers.inl b/shared/source/command_container/command_encoder_enablers.inl index a7c38ce3d7..045f0aba18 100644 --- a/shared/source/command_container/command_encoder_enablers.inl +++ b/shared/source/command_container/command_encoder_enablers.inl @@ -28,6 +28,7 @@ template void NEO::EncodeDispatchKernel::programInlineDataHeapless::encodeEuSchedulingPolicy(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy); template uint64_t NEO::EncodeDispatchKernel::getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args); template void NEO::EncodeDispatchKernel::patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired); +template void NEO::EncodeDispatchKernel::forceComputeWalkerPostSyncFlushWithWrite(Family::DefaultWalkerType &walkerCmd); template struct NEO::EncodeStates; template struct NEO::EncodeMath; diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index f23fa9a745..0141c65d39 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -354,6 +354,8 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis EncodeDispatchKernel::setupPostSyncForInOrderExec(walkerCmd, args); } else if (args.eventAddress) { EncodeDispatchKernel::setupPostSyncForRegularEvent(walkerCmd, args); + } else { + EncodeDispatchKernel::forceComputeWalkerPostSyncFlushWithWrite(walkerCmd); } if (debugManager.flags.ForceComputeWalkerPostSyncFlush.get() == 1) { @@ -938,6 +940,22 @@ inline size_t EncodeDispatchKernel::getInlineDataOffset(EncodeDispatchKe return offsetof(DefaultWalkerType, TheStructure.Common.InlineData); } +template +template +void EncodeDispatchKernel::forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd) { + using PostSyncType = typename WalkerType::PostSyncType; + using OperationType = typename PostSyncType::OPERATION; + + if (debugManager.flags.ForceComputeWalkerPostSyncFlushWithWrite.get() != -1) { + auto &postSync = walkerCmd.getPostSync(); + postSync.setDataportPipelineFlush(true); + postSync.setDataportSubsliceCacheFlush(true); + postSync.setDestinationAddress(static_cast(debugManager.flags.ForceComputeWalkerPostSyncFlushWithWrite.get())); + postSync.setOperation(OperationType::OPERATION_WRITE_IMMEDIATE_DATA); + postSync.setImmediateData(0u); + } +} + template size_t EncodeStates::getSshHeapSize() { return 2 * MemoryConstants::megaByte; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 78d330d049..0c98af7502 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -291,6 +291,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceSipClass, -1, "-1: default, otherwise based DECLARE_DEBUG_VARIABLE(int32_t, ForceScratchAndMTPBufferSizeMode, -1, "-1: default, 0: Full, 1: Min. BMG+: Reduce required memory for Scrach and MTP buffers on CCS context") DECLARE_DEBUG_VARIABLE(int32_t, CFEStackIDControl, -1, "Set Stack ID Control in CFE_STATE on Xe2+, -1 - do not set") DECLARE_DEBUG_VARIABLE(int32_t, StandaloneInOrderTimestampAllocationEnabled, -1, "-1: default, 0: disabled, 1: enabled. If enabled, use internal allocations, instead of Event pool for timestamps") +DECLARE_DEBUG_VARIABLE(int32_t, ForceComputeWalkerPostSyncFlushWithWrite, -1, "-1: ignore. >=0: Force PostSync cache flush and override postSync immediate write address to given value") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index b08ae0e5a4..e15bbe972c 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -619,6 +619,7 @@ OverrideNumHighPriorityContexts = -1 ForceScratchAndMTPBufferSizeMode = -1 ForcePostSyncL1Flush = -1 AllowNotZeroForCompressedOnWddm = -1 -ForceGmmSystemMemoryBufferForAllocations = 0 +ForceGmmSystemMemoryBufferForAllocations = 0 StandaloneInOrderTimestampAllocationEnabled = -1 +ForceComputeWalkerPostSyncFlushWithWrite = -1 # Please don't edit below this line diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 9ca193c9dc..8509b0e5f4 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -1616,3 +1616,35 @@ HWTEST2_F(CommandEncodeStatesTest, givenEncodeDispatchKernelWhenRequestingExtraP expectedConsumedSize = alignUp(expectedConsumedSize, NEO::EncodeDispatchKernel::getDefaultIOHAlignment()); EXPECT_EQ(expectedConsumedSize, heap->getUsed()); } + +HWTEST2_F(CommandEncodeStatesTest, givenForceComputeWalkerPostSyncFlushWithWriteWhenEncodeIsCalledThenPostSyncIsProgrammedCorrectly, IsAtLeastXeHpCore) { + + using DefaultWalkerType = typename FamilyType::DefaultWalkerType; + using PostSyncType = typename DefaultWalkerType::PostSyncType; + using OPERATION = typename PostSyncType::OPERATION; + + DebugManagerStateRestore restore; + debugManager.flags.ForceComputeWalkerPostSyncFlushWithWrite.set(0); + + uint32_t dims[] = {1, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); + + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); + auto it = find(commands.begin(), commands.end()); + ASSERT_NE(it, commands.end()); + + auto walker = genCmdCast(*it); + auto &postSync = walker->getPostSync(); + EXPECT_TRUE(postSync.getDataportPipelineFlush()); + EXPECT_TRUE(postSync.getDataportSubsliceCacheFlush()); + + uint64_t expectedAddress = 0u; + EXPECT_EQ(expectedAddress, postSync.getDestinationAddress()); + EXPECT_EQ(OPERATION::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); + uint64_t expectedData = 0u; + EXPECT_EQ(expectedData, postSync.getImmediateData()); +} \ No newline at end of file