fix: Add debug toggle to disable flush L3 for host usm

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
2025-12-30 09:58:55 +08:00 · 2025-05-06 16:49:26 +00:00
parent 8d1e83208b
commit ba85f7417d
4 changed files with 15 additions and 2 deletions
--- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl
+++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl
@@ -362,6 +362,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K

    auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType);

+    auto isFlushL3ForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation;
+    auto isFlushL3ForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation;
+    if (NEO::debugManager.flags.DisableFlushL3ForHostUsm.get() && isFlushL3ForHostUsmRequired) {
+        isFlushL3ForExternalAllocationRequired = true;
+        isFlushL3ForHostUsmRequired = false;
+    }
    NEO::EncodeKernelArgsExt dispatchKernelArgsExt = {};

    NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
@@ -390,8 +396,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
            .isUsingSystemAllocation = isKernelUsingSystemAllocation,
            .dcFlushEnable = this->dcFlushSupport,
            .interruptEvent = interruptEvent,
-            .isFlushL3ForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation,
-            .isFlushL3ForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation,
+            .isFlushL3ForExternalAllocationRequired = isFlushL3ForExternalAllocationRequired,
+            .isFlushL3ForHostUsmRequired = isFlushL3ForHostUsmRequired,
        },
        .preemptionMode = kernelPreemptionMode,
        .requiredPartitionDim = launchParams.requiredPartitionDim,
--- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl
+++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl
@@ -107,6 +107,11 @@ inline void HardwareInterface<GfxFamily>::programWalker(
            bool flushL3AfterPostSyncForHostUsm = kernelSystemAllocation;
            bool flushL3AfterPostSyncForExternalAllocation = kernel.isUsingSharedObjArgs();

+            if (debugManager.flags.DisableFlushL3ForHostUsm.get() && flushL3AfterPostSyncForHostUsm) {
+                flushL3AfterPostSyncForHostUsm = false;
+                flushL3AfterPostSyncForExternalAllocation = true;
+            }
+
            GpgpuWalkerHelper<GfxFamily>::template setupTimestampPacketFlushL3<WalkerType>(&walkerCmd, productHelper, flushL3AfterPostSyncForHostUsm, flushL3AfterPostSyncForExternalAllocation);
        }
    }
--- a/shared/source/debug_settings/debug_variables_base.inl
+++ b/shared/source/debug_settings/debug_variables_base.inl
@@ -627,6 +627,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPrefetch, false, "Enables prefetchi
 DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingDevMemPrefetch, false, "Enables prefetching of Device Memory chunks")
 DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPreferredLocationHint, false, "Enables preferred location advise on chunks")
 DECLARE_DEBUG_VARIABLE(bool, EnableCompatibilityMode, true, "Enables compatibility mode for platforms which can use precompiled base platform configuration")
+DECLARE_DEBUG_VARIABLE(bool, DisableFlushL3ForHostUsm, false, "Disables L3 flush for host usm")
 DECLARE_DEBUG_VARIABLE(int32_t, EnableBOChunking, -1, "Enables use of chunking of BOs in the KMD, mask: -1 = default, 0 = no chunking, 1 = shared allocations only, 2 = multi-tile device allocations only, 3 = shared and multi-tile device allocations .")
 DECLARE_DEBUG_VARIABLE(int32_t, DestroyAllocationsViaGmm, -1, "Use DeAllocate2 wrapper instead of raw GDI destroy allocations")
 DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use")
--- a/shared/test/common/test_files/igdrcl.config
+++ b/shared/test/common/test_files/igdrcl.config
@@ -545,6 +545,7 @@ EnableBOChunkingDevMemPrefetch = 0
 EnableBOChunkingPreferredLocationHint = 0
 DestroyAllocationsViaGmm = -1
 EnableCompatibilityMode = 1
+DisableFlushL3ForHostUsm = 0
 NumberOfBOChunks = 2
 SetBOChunkingSize = -1
 EnableBOChunking = -1