Enable RelaxedOrdering mode only if there are more than 2 clients

Related-To: NEO-7458

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2023-01-31 13:05:23 +00:00 committed by Compute-Runtime-Automation
parent 34b8f08fc6
commit 1b488224fb
7 changed files with 51 additions and 2 deletions

View File

@ -286,7 +286,12 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::waitForEventsFromHost() {
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const {
return (this->csr->directSubmissionRelaxedOrderingEnabled() && numWaitEvents > 0);
uint32_t minimalNumberOfClients = 2;
if (NEO::DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.get() != -1) {
minimalNumberOfClients = static_cast<uint32_t>(NEO::DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.get());
}
return (this->csr->directSubmissionRelaxedOrderingEnabled() && numWaitEvents > 0 && this->csr->getNumClients() >= minimalNumberOfClients);
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -141,6 +141,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::getHostPtrAlloc;
using BaseClass::immediateCmdListHeapSharing;
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::isRelaxedOrderingDispatchAllowed;
using BaseClass::partitionCount;
using BaseClass::pipeControlMultiKernelEventSync;
using BaseClass::pipelineSelectStateTracking;

View File

@ -1112,6 +1112,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
ultCsr->directSubmission.reset(directSubmission);
ultCsr->registerClient();
auto verifyFlags = [&ultCsr](ze_result_t result, bool dispatchFlag, bool bbFlag) {
EXPECT_EQ(ZE_RESULT_SUCCESS, result);

View File

@ -9,6 +9,8 @@
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/test_macros/hw_test.h"
@ -1320,6 +1322,42 @@ HWTEST2_F(CommandListCreate, whenGettingCommandsToPatchThenCorrectValuesAreRetur
EXPECT_EQ(&commandList->commandsToPatch, &commandList->getCommandsToPatch());
}
HWTEST2_F(CommandListCreate, givenNumClientsWhenAskingIfRelaxedOrderingEnabledThenReturnCorrectValue, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
commandList->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(commandList->csr);
ultCsr->registerClient();
ultCsr->recordFlusheBatchBuffer = true;
auto directSubmission = new NEO::MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
ultCsr->directSubmission.reset(directSubmission);
EXPECT_EQ(1u, ultCsr->getNumClients());
EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(1));
ultCsr->registerClient();
EXPECT_EQ(2u, ultCsr->getNumClients());
EXPECT_TRUE(commandList->isRelaxedOrderingDispatchAllowed(1));
DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.set(4);
EXPECT_EQ(2u, ultCsr->getNumClients());
EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(1));
ultCsr->registerClient();
EXPECT_EQ(3u, ultCsr->getNumClients());
EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(1));
ultCsr->registerClient();
EXPECT_EQ(4u, ultCsr->getNumClients());
EXPECT_TRUE(commandList->isRelaxedOrderingDispatchAllowed(1));
}
HWTEST2_F(CommandListCreate, givenNonEmptyCommandsToPatchWhenClearCommandsToPatchIsCalledThenCommandsAreCorrectlyCleared, IsAtLeastSkl) {
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;

View File

@ -476,6 +476,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(commandList0->csr);
ultCsr->recordFlusheBatchBuffer = true;
ultCsr->registerClient();
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
@ -534,6 +535,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(commandList0->csr);
ultCsr->recordFlusheBatchBuffer = true;
ultCsr->registerClient();
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
ultCsr->directSubmission.reset(directSubmission);

View File

@ -339,6 +339,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisablePrefetcher, -1, "-1: defa
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrdering, -1, "-1: default, 0 - disable, 1 - enable. If enabled, tasks sent to direct submission ring may be dispatched out of order")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingForBcs, -1, "-1: default, 0 - disable, 1 - enable. If set, enable RelaxedOrdering feature for BCS engine")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, -1, "-1: default, >0: Max gpu queue size. If limit is reached, scheduler wont consume new work")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingMinNumberOfClients, -1, "-1: default, >0: Enables RelaxedOrdering mode only if specified number of clients is assigned to given CSR.")
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
/*FEATURE FLAGS*/

View File

@ -497,4 +497,5 @@ DirectSubmissionRelaxedOrderingQueueSizeLimit = -1
ExperimentalForceCopyThroughLock = -1
NumberOfRegularContextsPerEngine = -1
EnableMultipleRegularContextForBcs = -1
AppendAubStreamContextFlags = -1
AppendAubStreamContextFlags = -1
DirectSubmissionRelaxedOrderingMinNumberOfClients = -1