diff --git a/level_zero/core/source/helpers/api_specific_config_l0.cpp b/level_zero/core/source/helpers/api_specific_config_l0.cpp index 11a8004ae1..0df2f42647 100644 --- a/level_zero/core/source/helpers/api_specific_config_l0.cpp +++ b/level_zero/core/source/helpers/api_specific_config_l0.cpp @@ -110,4 +110,8 @@ bool ApiSpecificConfig::isGlobalStatelessEnabled(const RootDeviceEnvironment &ro return l0GfxCoreHelper.getHeapAddressModel(rootDeviceEnvironment) == HeapAddressModel::globalStateless; } +bool ApiSpecificConfig::isUpdateTagFromWaitEnabledForHeapless() { + return false; +} + } // namespace NEO diff --git a/level_zero/core/test/unit_tests/sources/helper/api_specific_config_l0_tests.cpp b/level_zero/core/test/unit_tests/sources/helper/api_specific_config_l0_tests.cpp index 007a1d4e69..0435102d98 100644 --- a/level_zero/core/test/unit_tests/sources/helper/api_specific_config_l0_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/helper/api_specific_config_l0_tests.cpp @@ -88,6 +88,10 @@ TEST(ImplicitScalingApiTests, givenLevelZeroApiUsedThenSupportEnabled) { EXPECT_TRUE(ImplicitScaling::apiSupport); } +TEST(ApiSpecificConfigL0Tests, WhenCheckingIsUpdateTagFromWaitEnabledForHeaplessThenFalseIsReturned) { + EXPECT_FALSE(ApiSpecificConfig::isUpdateTagFromWaitEnabledForHeapless()); +} + TEST(ApiSpecificConfigL0Tests, WhenGettingCompilerCacheFileExtensionThenReturnProperFileExtensionString) { EXPECT_EQ(0, strcmp(".l0_cache", ApiSpecificConfig::compilerCacheFileExtension().c_str())); } diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index c7c0fc5121..cf73b542de 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -1334,8 +1334,14 @@ bool CommandQueue::isWaitForTimestampsEnabled() const { auto &productHelper = getDevice().getProductHelper(); auto enabled = CommandQueue::isTimestampWaitEnabled(); - enabled &= productHelper.isTimestampWaitSupportedForQueues(false); - enabled &= !productHelper.isDcFlushAllowed(); + enabled &= productHelper.isTimestampWaitSupportedForQueues(this->heaplessModeEnabled); + + if (productHelper.isL3FlushAfterPostSyncRequired(this->heaplessModeEnabled)) { + enabled &= true; + } else { + enabled &= !productHelper.isDcFlushAllowed(); + } + enabled &= !getDevice().getRootDeviceEnvironment().isWddmOnLinux(); enabled &= !this->isOOQEnabled(); // TSP for OOQ dispatch is optional. We need to wait for task count. diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index da270ab153..56d0d48f67 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -69,6 +69,13 @@ class GpgpuWalkerHelper { TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); + template + static void setupTimestampPacketFlushL3( + WalkerType *walkerCmd, + const ProductHelper &productHelper, + bool flushL3AfterPostSyncForHostUsm, + bool flushL3AfterPostSyncForExternalAllocation); + static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd); private: diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index 791832fa73..d50596ff59 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -160,4 +160,11 @@ size_t EnqueueOperation::getSizeRequiredCSNonKernel(bool reserveProfi return size; } +template +template +void GpgpuWalkerHelper::setupTimestampPacketFlushL3(WalkerType *walkerCmd, + const ProductHelper &productHelper, + bool flushL3AfterPostSyncForHostUsm, + bool flushL3AfterPostSyncForExternalAllocation) { +} } // namespace NEO diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index 8bef16a864..8bf0042f9b 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -84,19 +84,35 @@ inline void HardwareInterface::programWalker( auto &device = commandQueue.getDevice(); auto &rootDeviceEnvironment = device.getRootDeviceEnvironment(); + bool kernelSystemAllocation = false; + if (kernel.isBuiltIn) { + kernelSystemAllocation = kernel.getDestinationAllocationInSystemMemory(); + } else { + kernelSystemAllocation = kernel.isAnyKernelArgumentUsingSystemMemory(); + } + TagNodeBase *timestampPacketNode = nullptr; if (walkerArgs.currentTimestampPacketNodes && (walkerArgs.currentTimestampPacketNodes->peekNodes().size() > walkerArgs.currentDispatchIndex)) { timestampPacketNode = walkerArgs.currentTimestampPacketNodes->peekNodes()[walkerArgs.currentDispatchIndex]; } + constexpr bool heaplessModeEnabled = GfxFamily::template isHeaplessMode(); + if (timestampPacketNode) { + GpgpuWalkerHelper::template setupTimestampPacket(&commandStream, &walkerCmd, timestampPacketNode, rootDeviceEnvironment); + + if constexpr (heaplessModeEnabled) { + auto &productHelper = rootDeviceEnvironment.getHelper(); + bool flushL3AfterPostSyncForHostUsm = kernelSystemAllocation; + bool flushL3AfterPostSyncForExternalAllocation = kernel.isUsingSharedObjArgs(); + + GpgpuWalkerHelper::template setupTimestampPacketFlushL3(&walkerCmd, productHelper, flushL3AfterPostSyncForHostUsm, flushL3AfterPostSyncForExternalAllocation); + } } auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); - constexpr bool heaplessModeEnabled = GfxFamily::template isHeaplessMode(); - if constexpr (heaplessModeEnabled == false) { if (auto kernelAllocation = kernelInfo.getGraphicsAllocation()) { EncodeMemoryPrefetch::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.kernelHeapSize, 0, rootDeviceEnvironment); @@ -135,13 +151,6 @@ inline void HardwareInterface::programWalker( scratchAddress, device); - bool kernelSystemAllocation = false; - if (kernel.isBuiltIn) { - kernelSystemAllocation = kernel.getDestinationAllocationInSystemMemory(); - } else { - kernelSystemAllocation = kernel.isAnyKernelArgumentUsingSystemMemory(); - } - EncodeWalkerArgs encodeWalkerArgs{ .kernelExecutionType = kernel.getExecutionType(), .requiredDispatchWalkOrder = kernelAttributes.dispatchWalkOrder, diff --git a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp index cec0f785ef..f934a28815 100644 --- a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp +++ b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp @@ -297,6 +297,7 @@ template void HardwareInterface::dispatchKernelCommands::allocateWalkerSpace(LinearStream &commandStream, const Kernel &kernel); template class GpgpuWalkerHelper; +template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation); template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder); diff --git a/opencl/source/helpers/api_specific_config_ocl.cpp b/opencl/source/helpers/api_specific_config_ocl.cpp index f8b868a1f5..f88742c036 100644 --- a/opencl/source/helpers/api_specific_config_ocl.cpp +++ b/opencl/source/helpers/api_specific_config_ocl.cpp @@ -89,4 +89,8 @@ bool ApiSpecificConfig::isGlobalStatelessEnabled(const RootDeviceEnvironment &ro return false; } +bool ApiSpecificConfig::isUpdateTagFromWaitEnabledForHeapless() { + return true; +} + } // namespace NEO diff --git a/opencl/source/xe2_hpg_core/gpgpu_walker_xe2_hpg_core.cpp b/opencl/source/xe2_hpg_core/gpgpu_walker_xe2_hpg_core.cpp index d34f13c550..f8311ce0bc 100644 --- a/opencl/source/xe2_hpg_core/gpgpu_walker_xe2_hpg_core.cpp +++ b/opencl/source/xe2_hpg_core/gpgpu_walker_xe2_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,6 +15,8 @@ namespace NEO { using Family = Xe2HpgCoreFamily; template class GpgpuWalkerHelper; + +template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation); template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder); diff --git a/opencl/source/xe3_core/gpgpu_walker_xe3_core.cpp b/opencl/source/xe3_core/gpgpu_walker_xe3_core.cpp index 34908bfa92..d576a319f9 100644 --- a/opencl/source/xe3_core/gpgpu_walker_xe3_core.cpp +++ b/opencl/source/xe3_core/gpgpu_walker_xe3_core.cpp @@ -16,6 +16,8 @@ namespace NEO { using Family = Xe3CoreFamily; template class GpgpuWalkerHelper; + +template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation); template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder); diff --git a/opencl/source/xe_hpc_core/gpgpu_walker_xe_hpc_core.cpp b/opencl/source/xe_hpc_core/gpgpu_walker_xe_hpc_core.cpp index 0dc4ed7619..42e235492c 100644 --- a/opencl/source/xe_hpc_core/gpgpu_walker_xe_hpc_core.cpp +++ b/opencl/source/xe_hpc_core/gpgpu_walker_xe_hpc_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -23,6 +23,8 @@ void GpgpuWalkerHelper::setSystolicModeEnable(Family::COMPUTE_WALKER *wa } template class GpgpuWalkerHelper; + +template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation); template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder); diff --git a/opencl/source/xe_hpg_core/gpgpu_walker_xe_hpg_core.cpp b/opencl/source/xe_hpg_core/gpgpu_walker_xe_hpg_core.cpp index 94736a70e9..1333b8a45f 100644 --- a/opencl/source/xe_hpg_core/gpgpu_walker_xe_hpg_core.cpp +++ b/opencl/source/xe_hpg_core/gpgpu_walker_xe_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -23,6 +23,8 @@ void GpgpuWalkerHelper::setSystolicModeEnable(Family::COMPUTE_WALKER *wa } template class GpgpuWalkerHelper; + +template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation); template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder); diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 7ec9ac884e..29ae81fefe 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -170,7 +170,18 @@ TEST(CommandQueue, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabledThenRe { debugManager.flags.EnableTimestampWaitForQueues.set(-1); const auto &productHelper = mockDevice->getProductHelper(); - EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), productHelper.isTimestampWaitSupportedForQueues(false) && !productHelper.isDcFlushAllowed()); + const auto &compilerProductHelper = mockDevice->getCompilerProductHelper(); + bool heaplessEnabled = compilerProductHelper.isHeaplessModeEnabled(); + + auto enabled = productHelper.isTimestampWaitSupportedForQueues(heaplessEnabled); + + if (productHelper.isL3FlushAfterPostSyncRequired(heaplessEnabled)) { + enabled &= true; + } else { + enabled &= !productHelper.isDcFlushAllowed(); + } + + EXPECT_EQ(enabled, cmdQ.isWaitForTimestampsEnabled()); } { diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 1cebaa8915..932dbf124d 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -2015,7 +2015,7 @@ TEST(EventTimestampTest, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabled { debugManager.flags.EnableTimestampWaitForEvents.set(-1); const auto &productHelper = mockDevice->getRootDeviceEnvironment().getHelper(); - EXPECT_EQ(event.isWaitForTimestampsEnabled(), productHelper.isTimestampWaitSupportedForEvents()); + EXPECT_EQ(event.isWaitForTimestampsEnabled(), productHelper.isTimestampWaitSupportedForEvents() && cmdQ.isTimestampWaitEnabled()); } { diff --git a/opencl/test/unit_test/helpers/api_specific_config_ocl_tests.cpp b/opencl/test/unit_test/helpers/api_specific_config_ocl_tests.cpp index bd316369cb..b7d80b1681 100644 --- a/opencl/test/unit_test/helpers/api_specific_config_ocl_tests.cpp +++ b/opencl/test/unit_test/helpers/api_specific_config_ocl_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -80,4 +80,8 @@ TEST(ApiSpecificConfigOclTests, WhenCheckingIfCompilerCacheIsEnabledByDefaultThe EXPECT_EQ(1u, ApiSpecificConfig::compilerCacheDefaultEnabled()); } +TEST(ApiSpecificConfigOclTests, WhenCheckingIsUpdateTagFromWaitEnabledForHeaplessThenTrueIsReturned) { + EXPECT_TRUE(ApiSpecificConfig::isUpdateTagFromWaitEnabledForHeapless()); +} + } // namespace NEO diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index bc94a5d76f..be7d871f50 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -24,6 +24,7 @@ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/page_table_mngr.h" +#include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/blit_properties.h" #include "shared/source/helpers/compiler_product_helper.h" @@ -1300,8 +1301,15 @@ inline SubmissionStatus CommandStreamReceiverHw::flushHandler(BatchBu template inline bool CommandStreamReceiverHw::isUpdateTagFromWaitEnabled() { auto &gfxCoreHelper = getGfxCoreHelper(); + auto &productHelper = this->peekRootDeviceEnvironment().template getHelper(); + auto enabled = gfxCoreHelper.isUpdateTaskCountFromWaitSupported(); - enabled &= this->isAnyDirectSubmissionEnabled(); + + if (productHelper.isL3FlushAfterPostSyncRequired(this->heaplessModeEnabled) && ApiSpecificConfig::isUpdateTagFromWaitEnabledForHeapless()) { + enabled &= true; + } else { + enabled &= this->isAnyDirectSubmissionEnabled(); + } switch (debugManager.flags.UpdateTaskCountFromWait.get()) { case 0: diff --git a/shared/source/helpers/api_specific_config.h b/shared/source/helpers/api_specific_config.h index 08197e9427..24475b91bd 100644 --- a/shared/source/helpers/api_specific_config.h +++ b/shared/source/helpers/api_specific_config.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -44,5 +44,6 @@ struct ApiSpecificConfig { static std::string compilerCacheLocation(); static std::string compilerCacheFileExtension(); static int64_t compilerCacheDefaultEnabled(); + static bool isUpdateTagFromWaitEnabledForHeapless(); }; } // namespace NEO diff --git a/shared/test/unit_test/api_specific_config_ult.cpp b/shared/test/unit_test/api_specific_config_ult.cpp index 4cb90056b6..927c4f2ed9 100644 --- a/shared/test/unit_test/api_specific_config_ult.cpp +++ b/shared/test/unit_test/api_specific_config_ult.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -113,6 +113,10 @@ std::string ApiSpecificConfig::compilerCacheFileExtension() { return ".cl_cache"; } +bool ApiSpecificConfig::isUpdateTagFromWaitEnabledForHeapless() { + return true; +} + int64_t ApiSpecificConfig::compilerCacheDefaultEnabled() { return 1l; } diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index a896dbb0fa..f199e51581 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -935,6 +935,9 @@ HWTEST_F(CommandStreamReceiverTest, givenCsrWhenUllsDisabledAndStopDirectSubmiss } HWTEST_F(CommandStreamReceiverTest, givenNoDirectSubmissionWhenCheckTaskCountFromWaitEnabledThenReturnsFalse) { + DebugManagerStateRestore restorer; + NEO::debugManager.flags.ForceL3FlushAfterPostSync.set(0); + auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(csr.isUpdateTagFromWaitEnabled()); } @@ -971,6 +974,10 @@ HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenCheckTaskCou } HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenCheckIfEnabledThenCanBeEnabledOnlyWithDirectSubmission) { + + DebugManagerStateRestore restorer; + NEO::debugManager.flags.ForceL3FlushAfterPostSync.set(0); + auto &csr = pDevice->getUltCommandStreamReceiver(); auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); @@ -989,6 +996,8 @@ HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitInMultiRootDevic DebugManagerStateRestore restorer; debugManager.flags.CreateMultipleRootDevices.set(2); + NEO::debugManager.flags.ForceL3FlushAfterPostSync.set(0); + TearDown(); SetUp(); auto &csr = pDevice->getUltCommandStreamReceiver();