diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 65401962f5..80594293a9 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -370,7 +370,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K auto isFlushL3ForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation; auto isFlushL3ForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation; - if (NEO::debugManager.flags.DisableFlushL3ForHostUsm.get() && isFlushL3ForHostUsmRequired) { + + if (NEO::debugManager.flags.RedirectFlushL3HostUsmToExternal.get() && isFlushL3ForHostUsmRequired) { isFlushL3ForExternalAllocationRequired = true; isFlushL3ForHostUsmRequired = false; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index 54ed8a952a..913ccbef5d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -650,7 +650,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThen using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; DebugManagerStateRestore restorer; - NEO::debugManager.flags.ForceL3FlushAfterPostSync.set(0); + NEO::debugManager.flags.EnableL3FlushAfterPostSync.set(0); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp index 8a6cbcfeaf..4848390ed9 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp @@ -45,7 +45,7 @@ struct AppendMemoryCopyMultiPacketEventFixture : public DeviceFixture { void setUp() { debugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync); debugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket); - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); if constexpr (multiTile == 1) { debugManager.flags.CreateMultipleSubDevices.set(2); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp index f0e6af6b92..03085f3972 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp @@ -45,7 +45,7 @@ struct AppendFillMultiPacketEventFixture : public AppendFillFixture { void setUp() { debugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync); debugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket); - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); if constexpr (multiTile == 1) { debugManager.flags.CreateMultipleSubDevices.set(2); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp index c770f3feb7..72149fcc4a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp @@ -1937,7 +1937,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenNonInOrderCmdListWhenPass HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenCmdsChainingFromAppendCopyWhenDispatchingKernelThenProgramSemaphoreOnce) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; DebugManagerStateRestore restorer; - NEO::debugManager.flags.ForceL3FlushAfterPostSync.set(1); + NEO::debugManager.flags.EnableL3FlushAfterPostSync.set(1); auto immCmdList = createImmCmdList(); bool heaplessEnabled = immCmdList->isHeaplessModeEnabled(); @@ -1991,7 +1991,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenCmdsChainingFromAppendCop using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; DebugManagerStateRestore restorer; - NEO::debugManager.flags.ForceL3FlushAfterPostSync.set(1); + NEO::debugManager.flags.EnableL3FlushAfterPostSync.set(1); auto immCmdList = createImmCmdList(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index f8905736b5..8561bd6a51 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -2302,7 +2302,7 @@ HWTEST_F(StandaloneInOrderTimestampAllocationTests, givenDebugFlagSetToZeroWhenA HWTEST2_F(StandaloneInOrderTimestampAllocationTests, givenNonWalkerCounterSignalingWhenPassedNonProfilingEventThenNotAssignAllocation, IsAtLeastXeHpCore) { DebugManagerStateRestore restorer; - NEO::debugManager.flags.ForceL3FlushAfterPostSync.set(0); + NEO::debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto eventPool = createEvents(1, false); auto eventHandle = events[0]->toHandle(); diff --git a/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp b/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp index 1d43892cd3..1d487e6e26 100644 --- a/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp @@ -1046,7 +1046,7 @@ struct L0GfxCoreHelperMultiPacketEventFixture { void setUp() { debugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync); debugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket); - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); } void tearDown() { diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index d7889c60db..2228907424 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -107,7 +107,7 @@ inline void HardwareInterface::programWalker( bool flushL3AfterPostSyncForHostUsm = kernelSystemAllocation || kernel.isAnyKernelArgumentUsingZeroCopyMemory(); bool flushL3AfterPostSyncForExternalAllocation = kernel.isUsingSharedObjArgs(); - if (debugManager.flags.DisableFlushL3ForHostUsm.get() && flushL3AfterPostSyncForHostUsm) { + if (debugManager.flags.RedirectFlushL3HostUsmToExternal.get() && flushL3AfterPostSyncForHostUsm) { flushL3AfterPostSyncForHostUsm = false; flushL3AfterPostSyncForExternalAllocation = true; } diff --git a/opencl/test/unit_test/api/cl_create_command_queue_tests.inl b/opencl/test/unit_test/api/cl_create_command_queue_tests.inl index 36f0d0e0a4..05aa79e2a9 100644 --- a/opencl/test/unit_test/api/cl_create_command_queue_tests.inl +++ b/opencl/test/unit_test/api/cl_create_command_queue_tests.inl @@ -71,7 +71,7 @@ TEST_F(ClCreateCommandQueueTest, givenOoqParametersWhenQueueIsCreatedThenQueueIs HWTEST_F(ClCreateCommandQueueTest, givenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToBatchingMode) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); using BaseType = typename CommandQueue::BaseType; cl_int retVal = CL_SUCCESS; @@ -123,7 +123,7 @@ HWTEST_F(ClCreateCommandQueueTest, givenForcedDispatchModeAndOoqParametersWhenQu HWTEST_F(ClCreateCommandQueueTest, givenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToNTo1SubmissionModel) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); using BaseType = typename CommandQueue::BaseType; cl_int retVal = CL_SUCCESS; diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index 550b77d1dd..7a51fd0846 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -561,7 +561,7 @@ HWTEST_F(DispatchFlagsTests, givenMockKernelWhenSettingAdditionalKernelExecInfoT HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenEnqueuedHandlerThenProgramPipeControl) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); std::unique_ptr> mockCmdQ(new MockCommandQueueWithCacheFlush(context, pClDevice, 0)); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index 9872bd3300..2d5fa4562d 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -1071,7 +1071,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTwoEnqueueProgrammedWithinS HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesSubmissionsAreFlushed) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto *mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); @@ -1095,7 +1095,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenFi HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenThressEnqueueKernelsAreCalledThenBatchesSubmissionsAreFlushed) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto *mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); @@ -1119,7 +1119,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenTh HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenWaitForEventsIsCalledThenBatchedSubmissionsAreFlushed) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto *mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); mockCsr->useNewResourceImplicitFlush = false; @@ -1147,7 +1147,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenWa HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenCommandIsFlushedThenFlushStampIsUpdatedInCommandQueueCsrAndEvent) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto *mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); @@ -1205,7 +1205,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenNo HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenCommandWithEventIsFollowedByCommandWithoutEventThenFlushStampIsUpdatedInCommandQueueCsrAndEvent) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto *mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); @@ -1296,7 +1296,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenWa HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenFinishIsCalledWithUnflushedTaskCountThenBatchedSubmissionsAreFlushed) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto *mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); @@ -1322,7 +1322,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenFi HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenOutOfOrderCommandQueueWhenEnqueueKernelIsMadeThenPipeControlPositionIsRecorded) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto ooq = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); @@ -1348,7 +1348,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenOutOfOrderCommandQueueW HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelIsMadeThenPipeControlPositionIsRecorded) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); @@ -1373,7 +1373,7 @@ HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelIsMadeThenP HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenInOrderCommandQueueWhenEnqueueKernelThatHasSharedObjectsAsArgIsMadeThenPipeControlPositionIsRecorded) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); @@ -1445,7 +1445,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenInOrderCommandQueueWhen HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenInOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeAndCommandStreamReceiverIsInNTo1ModeThenPipeControlPositionIsRecorded) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); const cl_queue_properties props[] = {0}; @@ -1477,7 +1477,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenInOrderCommandQueueWhen HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenOutOfOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeThenPipeControlPositionIsRecorded) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto *mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp index ed75535ebe..88a7cbb6c1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp @@ -216,7 +216,7 @@ TEST_F(EventTests, GivenNoEventsWhenEnqueuingKernelThenTaskLevelIsIncremented) { TEST_F(EventTests, WhenEnqueuingMarkerThenPassedEventHasTheSameLevelAsPreviousCommand) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp index 4593f54a2b..0bafef94ec 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp @@ -38,7 +38,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnque HWTEST_F(IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeControlIsInsertedBetweenWalkers) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; diff --git a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp index 94c7147aed..329cd87bd0 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp @@ -722,7 +722,7 @@ HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueWr using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); MockDefaultContext context{true}; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index 6838df7d9b..8aa5c7c951 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -537,7 +537,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandStreamReceiverFlushTaskTests, givenNothing HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCsrInBatchingModeWhenFlushTaskIsCalledThenFlushedTaskCountIsNotModifed) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); @@ -688,7 +688,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTask HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledGivenNumberOfTimesThenFlushIsCalled) { DebugManagerStateRestore restorer; debugManager.flags.PerformImplicitFlushEveryEnqueueCount.set(2); - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); @@ -723,7 +723,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCsrInBatchingModeWhenWaitForTaskCountIsCalledWithTaskCountThatWasNotYetFlushedThenBatchedCommandBuffersAreSubmitted) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); @@ -864,7 +864,7 @@ struct MockedMemoryManager : public OsAgnosticMemoryManager { HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTotalResourceUsedExhaustsTheBudgetThenDoImplicitFlush) { DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index 510c7f0830..bc7610a14b 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -620,7 +620,7 @@ HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferThenCommandBufferIsConstru HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferAndForceTlbFlushAfterCopyThenCommandBufferIsConstructedProperlyAndTlbFlushDetected) { DebugManagerStateRestore restorer; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); debugManager.flags.ForceTlbFlushWithTaskCountAfterCopy.set(1); using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto bcsOsContext = std::unique_ptr(OsContext::create(nullptr, pDevice->getRootDeviceIndex(), 0, @@ -677,7 +677,7 @@ HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferAndForceTlbFlushAfterCopyT HWTEST_F(BcsTests, givenProfilingDisabledWhenBlitBufferAndForceTlbFlushAfterCopyThenCommandBufferIsConstructedProperlyAndTlbFlushDetected) { DebugManagerStateRestore restorer; debugManager.flags.ForceTlbFlushWithTaskCountAfterCopy.set(1); - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto bcsOsContext = std::unique_ptr(OsContext::create(nullptr, pDevice->getRootDeviceIndex(), 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::regular}, pDevice->getDeviceBitfield()))); diff --git a/opencl/test/unit_test/command_stream/create_command_stream_receiver_tests.cpp b/opencl/test/unit_test/command_stream/create_command_stream_receiver_tests.cpp index 3f7c6c15d1..b5a9b50c35 100644 --- a/opencl/test/unit_test/command_stream/create_command_stream_receiver_tests.cpp +++ b/opencl/test/unit_test/command_stream/create_command_stream_receiver_tests.cpp @@ -22,7 +22,7 @@ struct CreateCommandStreamReceiverTest : public ::testing::TestWithParamgetProductHelper(); auto copyDefaultEngineType = productHelper.getDefaultCopyEngine(); auto mockCmdQ = static_cast *>(commandQueue.get()); diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index 4ea38f5f0f..4a1dcfbfcb 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -1108,7 +1108,7 @@ HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCount typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT; DebugManagerStateRestore restorer{}; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); pCmdQ->setPerfCountersEnabled(); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index a22abdc174..c1745616c9 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -421,7 +421,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseHighAlignmentForHeapExtended, -1, "-1: defaul DECLARE_DEBUG_VARIABLE(int32_t, DispatchCmdlistCmdBufferPrimary, -1, "-1: default, 0: dispatch command buffers as secondary, 1: dispatch command buffers as primary and chain") DECLARE_DEBUG_VARIABLE(int32_t, UseImmediateFlushTask, -1, "-1: default, 0: use regular flush task, 1: use immediate flush task") DECLARE_DEBUG_VARIABLE(int32_t, SkipDcFlushOnBarrierWithoutEvents, -1, "-1: default (enabled), 0: disabled, 1: enabled") -DECLARE_DEBUG_VARIABLE(int32_t, ForceL3FlushAfterPostSync, -1, "-1: default, 0: disabled, 1: enabled. If enabled flush L3 after post sync operation") +DECLARE_DEBUG_VARIABLE(int32_t, EnableL3FlushAfterPostSync, -1, "-1: default, 0: disabled, 1: enabled. If enabled flush L3 after post sync operation") DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceUsmAllocationPool, -1, "-1: default (enabled, 2MB), 0: disabled, >=1: enabled, size in MB") DECLARE_DEBUG_VARIABLE(int32_t, EnableHostUsmAllocationPool, -1, "-1: default (enabled, 2MB), 0: disabled, >=1: enabled, size in MB") DECLARE_DEBUG_VARIABLE(int32_t, UseLocalPreferredForCacheableBuffers, -1, "Use localPreferred for cacheable buffers") @@ -620,7 +620,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPrefetch, false, "Enables prefetchi DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingDevMemPrefetch, false, "Enables prefetching of Device Memory chunks") DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPreferredLocationHint, false, "Enables preferred location advise on chunks") DECLARE_DEBUG_VARIABLE(bool, EnableCompatibilityMode, true, "Enables compatibility mode for platforms which can use precompiled base platform configuration") -DECLARE_DEBUG_VARIABLE(bool, DisableFlushL3ForHostUsm, false, "Disables L3 flush for host usm") +DECLARE_DEBUG_VARIABLE(bool, RedirectFlushL3HostUsmToExternal, true, "If L3 flush for host usm is needed it will be rerouted to folllow the external allocation flush logic") DECLARE_DEBUG_VARIABLE(int32_t, EnableBOChunking, -1, "Enables use of chunking of BOs in the KMD, mask: -1 = default, 0 = no chunking, 1 = shared allocations only, 2 = multi-tile device allocations only, 3 = shared and multi-tile device allocations .") DECLARE_DEBUG_VARIABLE(int32_t, DestroyAllocationsViaGmm, -1, "Use DeAllocate2 wrapper instead of raw GDI destroy allocations") DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use") diff --git a/shared/test/common/os_interface/linux/drm_command_stream_fixture.h b/shared/test/common/os_interface/linux/drm_command_stream_fixture.h index fee5251e84..2615fd1bc5 100644 --- a/shared/test/common/os_interface/linux/drm_command_stream_fixture.h +++ b/shared/test/common/os_interface/linux/drm_command_stream_fixture.h @@ -39,7 +39,7 @@ class DrmCommandStreamTest : public ::testing::Test { template void setUpT() { // make sure this is disabled, we don't want to test this now - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); debugManager.flags.EnableForcePin.set(false); mock = new DrmMock(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); @@ -129,7 +129,7 @@ class DrmCommandStreamEnhancedTemplate : public ::testing::Test { this->dbgState = std::make_unique(); // make sure this is disabled, we don't want to test this now debugManager.flags.EnableForcePin.set(false); - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); mock = DrmType::create(*executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]).release(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique(); diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 378e989753..4f910b0799 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -538,7 +538,7 @@ EnableBOChunkingDevMemPrefetch = 0 EnableBOChunkingPreferredLocationHint = 0 DestroyAllocationsViaGmm = -1 EnableCompatibilityMode = 1 -DisableFlushL3ForHostUsm = 0 +RedirectFlushL3HostUsmToExternal = 1 NumberOfBOChunks = 2 SetBOChunkingSize = -1 EnableBOChunking = -1 @@ -595,7 +595,7 @@ ExperimentalEnableHostAllocationCache = -1 OverridePatIndexForUncachedTypes = -1 OverridePatIndexForCachedTypes = -1 FlushTlbBeforeCopy = -1 -ForceL3FlushAfterPostSync = -1 +EnableL3FlushAfterPostSync = -1 EnableUserFenceUponUnbind = -1 EnableWaitOnUserFenceAfterBindAndUnbind = -1 UseGemCreateExtInAllocateMemoryByKMD = -1 diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 41d2090d97..c231d08068 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -938,7 +938,7 @@ HWTEST_F(CommandStreamReceiverTest, givenCsrWhenUllsDisabledAndStopDirectSubmiss HWTEST_F(CommandStreamReceiverTest, givenNoDirectSubmissionWhenCheckTaskCountFromWaitEnabledThenReturnsFalse) { DebugManagerStateRestore restorer; - NEO::debugManager.flags.ForceL3FlushAfterPostSync.set(0); + NEO::debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(csr.isUpdateTagFromWaitEnabled()); @@ -978,7 +978,7 @@ HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenCheckTaskCou HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenCheckIfEnabledThenCanBeEnabledOnlyWithDirectSubmission) { DebugManagerStateRestore restorer; - NEO::debugManager.flags.ForceL3FlushAfterPostSync.set(0); + NEO::debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto &csr = pDevice->getUltCommandStreamReceiver(); auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); @@ -998,7 +998,7 @@ HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitInMultiRootDevic DebugManagerStateRestore restorer; debugManager.flags.CreateMultipleRootDevices.set(2); - NEO::debugManager.flags.ForceL3FlushAfterPostSync.set(0); + NEO::debugManager.flags.EnableL3FlushAfterPostSync.set(0); TearDown(); SetUp(); diff --git a/shared/test/unit_test/os_interface/linux/device_command_stream_tests.cpp b/shared/test/unit_test/os_interface/linux/device_command_stream_tests.cpp index fef2c38b0d..9dbe2c6596 100644 --- a/shared/test/unit_test/os_interface/linux/device_command_stream_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/device_command_stream_tests.cpp @@ -31,7 +31,7 @@ using namespace NEO; struct DeviceCommandStreamLeaksTest : ::testing::Test { void SetUp() override { - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); HardwareInfo *hwInfo = nullptr; executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); executionEnvironment->incRefInternal(); @@ -48,7 +48,7 @@ struct DeviceCommandStreamLeaksTest : ::testing::Test { HWTEST_F(DeviceCommandStreamLeaksTest, WhenCreatingDeviceCsrThenValidPointerIsReturned) { DebugManagerStateRestore restorer; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0, 1)); DrmMockSuccess mockDrm(mockFd, *executionEnvironment->rootDeviceEnvironments[0]); EXPECT_NE(nullptr, ptr); @@ -69,7 +69,7 @@ HWTEST_F(DeviceCommandStreamLeaksTest, givenDefaultDrmCsrWithAubDumWhenItIsCreat HWTEST_F(DeviceCommandStreamLeaksTest, givenDefaultDrmCsrWhenOsInterfaceIsNullptrThenValidateDrm) { DebugManagerStateRestore restorer; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0, 1)); auto drmCsr = (DrmCommandStreamReceiver *)ptr.get(); EXPECT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->osInterface); @@ -99,7 +99,7 @@ HWTEST_F(DeviceCommandStreamLeaksTest, givenEnabledGemCloseWorkerWhenCsrIsCreate ultHwConfig.useGemCloseWorker = true; DebugManagerStateRestore restorer; debugManager.flags.EnableGemCloseWorker.set(1u); - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); executionEnvironment->memoryManager = DrmMemoryManager::create(*executionEnvironment); @@ -117,7 +117,7 @@ HWTEST_F(DeviceCommandStreamLeaksTest, givenDefaultGemCloseWorkerWhenCsrIsCreate VariableBackup backup(&ultHwConfig); ultHwConfig.useGemCloseWorker = true; DebugManagerStateRestore restorer; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); executionEnvironment->memoryManager = DrmMemoryManager::create(*executionEnvironment); std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0, 1)); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_mm_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_mm_tests.cpp index 7d4e1ee972..dddc5d6a53 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_mm_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_mm_tests.cpp @@ -40,7 +40,7 @@ struct DrmCommandStreamMemExecTest : public DrmCommandStreamEnhancedTemplate backup(&apiTypeForUlts, ApiSpecificConfig::L0); MockDrmCsr csr(executionEnvironment, 0, 1); EXPECT_EQ(DispatchMode::immediateDispatch, csr.dispatchMode); @@ -88,7 +88,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenNoTagAddressWhenGettingCompletionA HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenExecBufferErrorWhenFlushInternalThenProperErrorIsReturned) { DebugManagerStateRestore restorer; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); mock->execBufferResult = -1; mock->baseErrno = false; @@ -1347,7 +1347,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, DebugManagerStateRestore restorer; debugManager.flags.EnableUserFenceForCompletionWait.set(0); debugManager.flags.OverrideNotifyEnableForTagUpdatePostSync.set(1); - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp index b0f254910f..455e70dba5 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp @@ -409,7 +409,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenCheckFlagsWhenFlushingThenSucceeds HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenCheckDrmFreeWhenFlushingThenSucceeds) { DebugManagerStateRestore restorer; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); mock->returnHandle = 17; auto &cs = csr->getCS(); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp index 9183ea1bcb..56c276d674 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp @@ -375,7 +375,7 @@ class DrmCommandStreamForceTileTest : public ::testing::Test { }; template void setUpT() { - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); mock = new DrmMock(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); @@ -482,7 +482,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenPrintIndicesEnabledWhenFlushThenPr struct DrmImplicitScalingCommandStreamTest : ::testing::Test { void SetUp() override { - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); @@ -537,7 +537,7 @@ struct DrmImplicitScalingCommandStreamTest : ::testing::Test { HWCMDTEST_F(IGFX_XE_HP_CORE, DrmImplicitScalingCommandStreamTest, givenTwoTilesWhenFlushIsCalledThenExecIsExecutedOnEveryTile) { DebugManagerStateRestore restorer; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); auto csr = createCsr(); @@ -638,7 +638,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmImplicitScalingCommandStreamTest, whenForceExecu DebugManagerStateRestore restorer; debugManager.flags.ForceExecutionTile.set(1); debugManager.flags.EnableWalkerPartition.set(0); - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); struct MockCsr : DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; @@ -680,7 +680,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmImplicitScalingCommandStreamTest, whenForceExecu HWCMDTEST_F(IGFX_XE_HP_CORE, DrmImplicitScalingCommandStreamTest, givenDisabledImplicitScalingWhenFlushingThenUseOnlyOneContext) { DebugManagerStateRestore debugRestore{}; debugManager.flags.EnableWalkerPartition.set(0); - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); struct MockCsr : DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; @@ -720,7 +720,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmImplicitScalingCommandStreamTest, givenDisabledI HWCMDTEST_F(IGFX_XE_HP_CORE, DrmImplicitScalingCommandStreamTest, givenMultiTileCsrWhenFlushThenVmHandleIdEqualsTileId) { DebugManagerStateRestore restorer; - debugManager.flags.ForceL3FlushAfterPostSync.set(0); + debugManager.flags.EnableL3FlushAfterPostSync.set(0); struct MockCsr : DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; int exec(const BatchBuffer &batchBuffer, uint32_t vmHandleId, uint32_t drmContextId, uint32_t index) override {