diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index eca5110721..64928bd3bb 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -362,6 +362,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType); + auto isFlushL3ForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation; + auto isFlushL3ForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation; + if (NEO::debugManager.flags.DisableFlushL3ForHostUsm.get() && isFlushL3ForHostUsmRequired) { + isFlushL3ForExternalAllocationRequired = true; + isFlushL3ForHostUsmRequired = false; + } NEO::EncodeKernelArgsExt dispatchKernelArgsExt = {}; NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ @@ -390,8 +396,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K .isUsingSystemAllocation = isKernelUsingSystemAllocation, .dcFlushEnable = this->dcFlushSupport, .interruptEvent = interruptEvent, - .isFlushL3ForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation, - .isFlushL3ForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation, + .isFlushL3ForExternalAllocationRequired = isFlushL3ForExternalAllocationRequired, + .isFlushL3ForHostUsmRequired = isFlushL3ForHostUsmRequired, }, .preemptionMode = kernelPreemptionMode, .requiredPartitionDim = launchParams.requiredPartitionDim, diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index 3e656c7fd3..8fa989d4d4 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -107,6 +107,11 @@ inline void HardwareInterface::programWalker( bool flushL3AfterPostSyncForHostUsm = kernelSystemAllocation; bool flushL3AfterPostSyncForExternalAllocation = kernel.isUsingSharedObjArgs(); + if (debugManager.flags.DisableFlushL3ForHostUsm.get() && flushL3AfterPostSyncForHostUsm) { + flushL3AfterPostSyncForHostUsm = false; + flushL3AfterPostSyncForExternalAllocation = true; + } + GpgpuWalkerHelper::template setupTimestampPacketFlushL3(&walkerCmd, productHelper, flushL3AfterPostSyncForHostUsm, flushL3AfterPostSyncForExternalAllocation); } } diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index aa61a2772a..517c09b7ad 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -627,6 +627,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPrefetch, false, "Enables prefetchi DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingDevMemPrefetch, false, "Enables prefetching of Device Memory chunks") DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPreferredLocationHint, false, "Enables preferred location advise on chunks") DECLARE_DEBUG_VARIABLE(bool, EnableCompatibilityMode, true, "Enables compatibility mode for platforms which can use precompiled base platform configuration") +DECLARE_DEBUG_VARIABLE(bool, DisableFlushL3ForHostUsm, false, "Disables L3 flush for host usm") DECLARE_DEBUG_VARIABLE(int32_t, EnableBOChunking, -1, "Enables use of chunking of BOs in the KMD, mask: -1 = default, 0 = no chunking, 1 = shared allocations only, 2 = multi-tile device allocations only, 3 = shared and multi-tile device allocations .") DECLARE_DEBUG_VARIABLE(int32_t, DestroyAllocationsViaGmm, -1, "Use DeAllocate2 wrapper instead of raw GDI destroy allocations") DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 92c0343f1a..02a58ee0a8 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -545,6 +545,7 @@ EnableBOChunkingDevMemPrefetch = 0 EnableBOChunkingPreferredLocationHint = 0 DestroyAllocationsViaGmm = -1 EnableCompatibilityMode = 1 +DisableFlushL3ForHostUsm = 0 NumberOfBOChunks = 2 SetBOChunkingSize = -1 EnableBOChunking = -1