performance: optimize RelaxedOrdering in-order Barrier programming

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2023-09-25 14:51:28 +00:00 committed by Compute-Runtime-Automation
parent c115eeb108
commit 529aa60563
2 changed files with 72 additions and 1 deletions

View File

@ -472,6 +472,8 @@ template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t ret = ZE_RESULT_SUCCESS;
bool isStallingOperation = true;
if (isInOrderExecutionEnabled()) {
if (isSkippingInOrderBarrierAllowed(hSignalEvent, numWaitEvents, phWaitEvents)) {
if (hSignalEvent) {
@ -482,6 +484,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_even
}
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
isStallingOperation = hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch);
}
checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize);
@ -491,7 +494,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_even
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
this->dependenciesPresent = true;
return flushImmediate(ret, true, !relaxedOrderingDispatch, relaxedOrderingDispatch, false, hSignalEvent);
return flushImmediate(ret, true, isStallingOperation, relaxedOrderingDispatch, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -1646,6 +1646,74 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingBarrierThenAllo
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds);
}
HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingBarrierWithFlushAndWithoutDependenciesThenDontMarkAsStalling, IsAtLeastXeHpcCore) {
bool useImmediateFlushTask = getHelper<L0GfxCoreHelper>().platformSupportsImmediateComputeFlushTask();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
auto commandList0 = zeUniquePtr(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
auto commandList = zeUniquePtr(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
whiteBoxCmdList->enableInOrderExecution();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t event = nullptr;
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
std::unique_ptr<L0::Event> eventObject(L0::Event::fromHandle(event));
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->csr);
ultCsr->recordFlusheBatchBuffer = true;
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
ultCsr->directSubmission.reset(directSubmission);
int client1, client2;
ultCsr->registerClient(&client1);
ultCsr->registerClient(&client2);
// Initialize NP state
commandList0->appendBarrier(nullptr, 1, &event, false);
if (useImmediateFlushTask) {
EXPECT_FALSE(ultCsr->recordedImmediateDispatchFlags.hasRelaxedOrderingDependencies);
EXPECT_TRUE(ultCsr->recordedImmediateDispatchFlags.hasStallingCmds);
} else {
EXPECT_TRUE(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies);
EXPECT_FALSE(ultCsr->recordedDispatchFlags.hasStallingCmds);
}
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies);
EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds);
ultCsr->unregisterClient(&client1);
ultCsr->unregisterClient(&client2);
commandList->appendBarrier(event, 0, nullptr, false);
if (useImmediateFlushTask) {
EXPECT_FALSE(ultCsr->recordedImmediateDispatchFlags.hasRelaxedOrderingDependencies);
EXPECT_FALSE(ultCsr->recordedImmediateDispatchFlags.hasStallingCmds);
} else {
EXPECT_FALSE(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies);
EXPECT_FALSE(ultCsr->recordedDispatchFlags.hasStallingCmds);
}
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies);
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds);
}
HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrderingThenProgramConditionalBbStart, IsAtLeastXeHpcCore) {
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;