fix: Add debug toggle to disable flush L3 for host usm

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2025-05-06 16:49:26 +00:00
committed by Compute-Runtime-Automation
parent 8d1e83208b
commit ba85f7417d
4 changed files with 15 additions and 2 deletions

View File

@@ -362,6 +362,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType);
auto isFlushL3ForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation;
auto isFlushL3ForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation;
if (NEO::debugManager.flags.DisableFlushL3ForHostUsm.get() && isFlushL3ForHostUsmRequired) {
isFlushL3ForExternalAllocationRequired = true;
isFlushL3ForHostUsmRequired = false;
}
NEO::EncodeKernelArgsExt dispatchKernelArgsExt = {};
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
@@ -390,8 +396,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
.isUsingSystemAllocation = isKernelUsingSystemAllocation,
.dcFlushEnable = this->dcFlushSupport,
.interruptEvent = interruptEvent,
.isFlushL3ForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation,
.isFlushL3ForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation,
.isFlushL3ForExternalAllocationRequired = isFlushL3ForExternalAllocationRequired,
.isFlushL3ForHostUsmRequired = isFlushL3ForHostUsmRequired,
},
.preemptionMode = kernelPreemptionMode,
.requiredPartitionDim = launchParams.requiredPartitionDim,

View File

@@ -107,6 +107,11 @@ inline void HardwareInterface<GfxFamily>::programWalker(
bool flushL3AfterPostSyncForHostUsm = kernelSystemAllocation;
bool flushL3AfterPostSyncForExternalAllocation = kernel.isUsingSharedObjArgs();
if (debugManager.flags.DisableFlushL3ForHostUsm.get() && flushL3AfterPostSyncForHostUsm) {
flushL3AfterPostSyncForHostUsm = false;
flushL3AfterPostSyncForExternalAllocation = true;
}
GpgpuWalkerHelper<GfxFamily>::template setupTimestampPacketFlushL3<WalkerType>(&walkerCmd, productHelper, flushL3AfterPostSyncForHostUsm, flushL3AfterPostSyncForExternalAllocation);
}
}

View File

@@ -627,6 +627,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPrefetch, false, "Enables prefetchi
DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingDevMemPrefetch, false, "Enables prefetching of Device Memory chunks")
DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPreferredLocationHint, false, "Enables preferred location advise on chunks")
DECLARE_DEBUG_VARIABLE(bool, EnableCompatibilityMode, true, "Enables compatibility mode for platforms which can use precompiled base platform configuration")
DECLARE_DEBUG_VARIABLE(bool, DisableFlushL3ForHostUsm, false, "Disables L3 flush for host usm")
DECLARE_DEBUG_VARIABLE(int32_t, EnableBOChunking, -1, "Enables use of chunking of BOs in the KMD, mask: -1 = default, 0 = no chunking, 1 = shared allocations only, 2 = multi-tile device allocations only, 3 = shared and multi-tile device allocations .")
DECLARE_DEBUG_VARIABLE(int32_t, DestroyAllocationsViaGmm, -1, "Use DeAllocate2 wrapper instead of raw GDI destroy allocations")
DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use")

View File

@@ -545,6 +545,7 @@ EnableBOChunkingDevMemPrefetch = 0
EnableBOChunkingPreferredLocationHint = 0
DestroyAllocationsViaGmm = -1
EnableCompatibilityMode = 1
DisableFlushL3ForHostUsm = 0
NumberOfBOChunks = 2
SetBOChunkingSize = -1
EnableBOChunking = -1