performance: improve relaxed ordering task count tracking

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-12-18 10:10:45 +00:00
committed by Compute-Runtime-Automation
parent f357ada604
commit e8cfb38db4
8 changed files with 46 additions and 16 deletions

View File

@@ -1448,16 +1448,13 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAll
auto queueTaskCount = getCmdQImmediate(copyOffload)->getTaskCount();
auto csrTaskCount = csr->peekTaskCount();
if ((this->device->getNEODevice()->isInitDeviceWithFirstSubmissionSupported(csr->getType()) || this->heaplessStateInitEnabled) && csr->peekTaskCount() == 1) {
DEBUG_BREAK_IF(queueTaskCount != 0);
queueTaskCount = 1;
}
bool skipTaskCountCheck = (csrTaskCount - queueTaskCount == 1) && csr->isLatestFlushIsTaskCountUpdateOnly();
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get() != -1) {
relaxedOrderingCounterThreshold = static_cast<uint32_t>(NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get());
}
if (queueTaskCount == csrTaskCount) {
if (queueTaskCount == csrTaskCount || skipTaskCountCheck) {
relaxedOrderingCounter++;
} else {
// Submission from another queue. Reset counter and keep relaxed ordering allowed

View File

@@ -127,7 +127,7 @@ NEO::SubmissionStatus CommandQueueImp::submitBatchBuffer(size_t offset, NEO::Res
NEO::BatchBuffer batchBuffer(this->startingCmdBuffer->getGraphicsAllocation(), offset, 0, 0, nullptr, false,
NEO::getThrottleFromPowerSavingUint(csr->getUmdPowerHintValue()), NEO::QueueSliceCount::defaultSliceCount,
this->startingCmdBuffer->getUsed(), this->startingCmdBuffer, endingCmdPtr, csr->getNumClients(), true, false, true);
this->startingCmdBuffer->getUsed(), this->startingCmdBuffer, endingCmdPtr, csr->getNumClients(), true, false, true, false);
batchBuffer.disableFlatRingBuffer = true;
if (this->startingCmdBuffer != &this->commandStream) {

View File

@@ -2060,6 +2060,33 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingEnabledWhenSignalEventCalledT
verifyFlags(false, true); // relaxed ordering disabled == stalling semaphore
}
HWTEST2_F(InOrderCmdListTests, givenCounterHeuristicForRelaxedOrderingEnabledWhenSmallTaskIsFlushedThenIncrementCounter, IsAtLeastXeHpcCore) {
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
ultCsr->directSubmission.reset(directSubmission);
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto queue = immCmdList->getCmdQImmediate(false);
EXPECT_EQ(0u, queue->getTaskCount());
EXPECT_EQ(0u, immCmdList->relaxedOrderingCounter);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, immCmdList->relaxedOrderingCounter);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(2u, immCmdList->relaxedOrderingCounter);
ultCsr->flushTagUpdate();
EXPECT_NE(ultCsr->taskCount, queue->getTaskCount());
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(3u, immCmdList->relaxedOrderingCounter);
EXPECT_EQ(ultCsr->taskCount, queue->getTaskCount());
}
HWTEST2_F(InOrderCmdListTests, givenCounterHeuristicForRelaxedOrderingEnabledWhenAppendingThenEnableRelaxedOrderingCorrectly, IsAtLeastXeHpcCore) {
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);