From 88c5872682ebb1b77154da9faf043d2a639dbeee Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Wed, 14 Feb 2024 17:15:49 +0000 Subject: [PATCH] feature: debug flag to flush tlb before copy Related-To: HSD-18036669673 Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 7 +++++++ .../command_stream_receiver_hw_base.inl | 19 ++++++++++++++++++- .../debug_settings/debug_variables_base.inl | 1 + shared/test/common/test_files/igdrcl.config | 1 + 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index c478b821cd..9270471e18 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1467,6 +1467,13 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, srcAllocationStruct.alignedAllocationPtr, srcAllocationStruct.alloc, srcAllocationStruct.offset, size); } else { + if (NEO::debugManager.flags.FlushTlbBeforeCopy.get() == 1) { + NEO::PipeControlArgs args; + args.tlbInvalidation = true; + + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + } + if (ret == ZE_RESULT_SUCCESS && leftSize) { Builtin copyKernel = BuiltinTypeHelper::adjustBuiltinType(isStateless, isHeapless); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index f43973a8e7..6d27151ebf 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -188,10 +188,11 @@ CompletionStamp CommandStreamReceiverHw::flushBcsTask(LinearStream &c uint64_t taskStartAddress = commandStreamTask.getGpuBase() + commandStreamTaskStart; + NEO::EncodeDummyBlitWaArgs waArgs{false, const_cast(&(this->peekRootDeviceEnvironment()))}; + if (dispatchBcsFlags.flushTaskCount) { uint64_t postSyncAddress = getTagAllocation()->getGpuAddress(); TaskCountType postSyncData = peekTaskCount() + 1; - NEO::EncodeDummyBlitWaArgs waArgs{false, const_cast(&(this->peekRootDeviceEnvironment()))}; NEO::MiFlushArgs args{waArgs}; args.commandWithPostSync = true; args.notifyEnable = isUsedNotifyEnableForPostSync(); @@ -204,6 +205,14 @@ CompletionStamp CommandStreamReceiverHw::flushBcsTask(LinearStream &c programHardwareContext(commandStreamCSR); + if (debugManager.flags.FlushTlbBeforeCopy.get() == 1) { + MiFlushArgs tlbFlushArgs{waArgs}; + tlbFlushArgs.commandWithPostSync = true; + tlbFlushArgs.tlbFlush = true; + + EncodeMiFlushDW::programWithWa(commandStream, this->globalFenceAllocation->getGpuAddress(), 0, tlbFlushArgs); + } + if (globalFenceAllocation) { makeResident(*globalFenceAllocation); } @@ -1192,6 +1201,14 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert BlitCommandsHelper::encodeProfilingStartMmios(commandStream, *blitProperties.outputTimestampPacket); } + if (debugManager.flags.FlushTlbBeforeCopy.get() == 1) { + MiFlushArgs tlbFlushArgs{waArgs}; + tlbFlushArgs.commandWithPostSync = true; + tlbFlushArgs.tlbFlush = true; + + EncodeMiFlushDW::programWithWa(commandStream, this->globalFenceAllocation->getGpuAddress(), 0, tlbFlushArgs); + } + BlitCommandsHelper::dispatchBlitCommands(blitProperties, commandStream, waArgs); auto dummyAllocation = rootDeviceEnvironment->getDummyAllocation(); if (dummyAllocation) { diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 813e2a7c2e..6fbcde67c2 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -273,6 +273,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, InOrderDuplicatedCounterStorageEnabled, -1, "-1: DECLARE_DEBUG_VARIABLE(int32_t, SetProcessPowerThrottlingState, -1, "-1: default, 0: Disabled, 1: ECO, 2: HIGH. If set, will override process power throttling state on os context init. Windows only.") DECLARE_DEBUG_VARIABLE(int32_t, SetThreadPriority, -1, "-1: default, 0: Disabled, 1: Enabled. If set, will set thread priority to above normal on os context init. Windows only.") DECLARE_DEBUG_VARIABLE(int32_t, OverrideCpuCaching, -1, "-1: default, 1: DRM_XE_GEM_CPU_CACHING_WB, 2: DRM_XE_GEM_CPU_CACHING_WC") +DECLARE_DEBUG_VARIABLE(int32_t, FlushTlbBeforeCopy, -1, "-1: default, 0: Dont flush, 1: flush TLB as part of MI_FLUSH_DW/PIPE_CONTROL command before copy operation") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 865b7295b2..9b085cd35f 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -585,4 +585,5 @@ SetThreadPriority = -1 ExperimentalEnableHostAllocationCache = -1 OverridePatIndexForUncachedTypes = -1 OverridePatIndexForCachedTypes = -1 +FlushTlbBeforeCopy = -1 # Please don't edit below this line