diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 3392e706b3..30ce100052 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -286,7 +286,12 @@ bool CommandListCoreFamilyImmediate::waitForEventsFromHost() { template bool CommandListCoreFamilyImmediate::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { - return (this->csr->directSubmissionRelaxedOrderingEnabled() && numWaitEvents > 0); + uint32_t minimalNumberOfClients = 2; + if (NEO::DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.get() != -1) { + minimalNumberOfClients = static_cast(NEO::DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.get()); + } + + return (this->csr->directSubmissionRelaxedOrderingEnabled() && numWaitEvents > 0 && this->csr->getNumClients() >= minimalNumberOfClients); } template diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 77dc127bef..35f2fedbbc 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -141,6 +141,7 @@ struct WhiteBox> using BaseClass::getHostPtrAlloc; using BaseClass::immediateCmdListHeapSharing; using BaseClass::isFlushTaskSubmissionEnabled; + using BaseClass::isRelaxedOrderingDispatchAllowed; using BaseClass::partitionCount; using BaseClass::pipeControlMultiKernelEventSync; using BaseClass::pipelineSelectStateTracking; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 39b95bcabd..6d55b26bf4 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1112,6 +1112,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh auto directSubmission = new MockDirectSubmissionHw>(*ultCsr); ultCsr->directSubmission.reset(directSubmission); + ultCsr->registerClient(); auto verifyFlags = [&ultCsr](ze_result_t result, bool dispatchFlag, bool bbFlag) { EXPECT_EQ(ZE_RESULT_SUCCESS, result); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index e0666ae55f..c5ffbcd45e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -9,6 +9,8 @@ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/libult/ult_command_stream_receiver.h" +#include "shared/test/common/mocks/mock_direct_submission_hw.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/hw_test.h" @@ -1320,6 +1322,42 @@ HWTEST2_F(CommandListCreate, whenGettingCommandsToPatchThenCorrectValuesAreRetur EXPECT_EQ(&commandList->commandsToPatch, &commandList->getCommandsToPatch()); } +HWTEST2_F(CommandListCreate, givenNumClientsWhenAskingIfRelaxedOrderingEnabledThenReturnCorrectValue, IsAtLeastXeHpcCore) { + DebugManagerStateRestore restore; + DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1); + + auto commandList = std::make_unique>>(); + commandList->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; + + auto ultCsr = static_cast *>(commandList->csr); + ultCsr->registerClient(); + ultCsr->recordFlusheBatchBuffer = true; + + auto directSubmission = new NEO::MockDirectSubmissionHw>(*ultCsr); + ultCsr->directSubmission.reset(directSubmission); + + EXPECT_EQ(1u, ultCsr->getNumClients()); + EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(1)); + + ultCsr->registerClient(); + + EXPECT_EQ(2u, ultCsr->getNumClients()); + EXPECT_TRUE(commandList->isRelaxedOrderingDispatchAllowed(1)); + + DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.set(4); + + EXPECT_EQ(2u, ultCsr->getNumClients()); + EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(1)); + + ultCsr->registerClient(); + EXPECT_EQ(3u, ultCsr->getNumClients()); + EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(1)); + + ultCsr->registerClient(); + EXPECT_EQ(4u, ultCsr->getNumClients()); + EXPECT_TRUE(commandList->isRelaxedOrderingDispatchAllowed(1)); +} + HWTEST2_F(CommandListCreate, givenNonEmptyCommandsToPatchWhenClearCommandsToPatchIsCalledThenCommandsAreCorrectlyCleared, IsAtLeastSkl) { using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE; diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp index ea02ae9d0e..175ed21af5 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp @@ -476,6 +476,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC context->allocHostMem(&hostDesc, size, alignment, &dstPtr); auto ultCsr = static_cast *>(commandList0->csr); ultCsr->recordFlusheBatchBuffer = true; + ultCsr->registerClient(); auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); @@ -534,6 +535,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC context->allocHostMem(&hostDesc, size, alignment, &dstPtr); auto ultCsr = static_cast *>(commandList0->csr); ultCsr->recordFlusheBatchBuffer = true; + ultCsr->registerClient(); auto directSubmission = new MockDirectSubmissionHw>(*ultCsr); ultCsr->directSubmission.reset(directSubmission); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 29e08404c2..906efbffc0 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -339,6 +339,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisablePrefetcher, -1, "-1: defa DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrdering, -1, "-1: default, 0 - disable, 1 - enable. If enabled, tasks sent to direct submission ring may be dispatched out of order") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingForBcs, -1, "-1: default, 0 - disable, 1 - enable. If set, enable RelaxedOrdering feature for BCS engine") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, -1, "-1: default, >0: Max gpu queue size. If limit is reached, scheduler wont consume new work") +DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingMinNumberOfClients, -1, "-1: default, >0: Enables RelaxedOrdering mode only if specified number of clients is assigned to given CSR.") DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers") /*FEATURE FLAGS*/ diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 982efa50ce..ee7677d74a 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -497,4 +497,5 @@ DirectSubmissionRelaxedOrderingQueueSizeLimit = -1 ExperimentalForceCopyThroughLock = -1 NumberOfRegularContextsPerEngine = -1 EnableMultipleRegularContextForBcs = -1 -AppendAubStreamContextFlags = -1 \ No newline at end of file +AppendAubStreamContextFlags = -1 +DirectSubmissionRelaxedOrderingMinNumberOfClients = -1 \ No newline at end of file