performance: optimize RelaxedOrdering in-order Barrier programming
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
c115eeb108
commit
529aa60563
|
@ -472,6 +472,8 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
|||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
ze_result_t ret = ZE_RESULT_SUCCESS;
|
||||
|
||||
bool isStallingOperation = true;
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
if (isSkippingInOrderBarrierAllowed(hSignalEvent, numWaitEvents, phWaitEvents)) {
|
||||
if (hSignalEvent) {
|
||||
|
@ -482,6 +484,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_even
|
|||
}
|
||||
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
isStallingOperation = hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch);
|
||||
}
|
||||
|
||||
checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize);
|
||||
|
@ -491,7 +494,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_even
|
|||
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
this->dependenciesPresent = true;
|
||||
return flushImmediate(ret, true, !relaxedOrderingDispatch, relaxedOrderingDispatch, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, isStallingOperation, relaxedOrderingDispatch, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
|
|
@ -1646,6 +1646,74 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingBarrierThenAllo
|
|||
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingBarrierWithFlushAndWithoutDependenciesThenDontMarkAsStalling, IsAtLeastXeHpcCore) {
|
||||
bool useImmediateFlushTask = getHelper<L0GfxCoreHelper>().platformSupportsImmediateComputeFlushTask();
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
auto commandList0 = zeUniquePtr(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
auto commandList = zeUniquePtr(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
|
||||
whiteBoxCmdList->enableInOrderExecution();
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
ze_event_handle_t event = nullptr;
|
||||
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
|
||||
std::unique_ptr<L0::Event> eventObject(L0::Event::fromHandle(event));
|
||||
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->csr);
|
||||
ultCsr->recordFlusheBatchBuffer = true;
|
||||
|
||||
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
|
||||
ultCsr->directSubmission.reset(directSubmission);
|
||||
int client1, client2;
|
||||
ultCsr->registerClient(&client1);
|
||||
ultCsr->registerClient(&client2);
|
||||
|
||||
// Initialize NP state
|
||||
commandList0->appendBarrier(nullptr, 1, &event, false);
|
||||
|
||||
if (useImmediateFlushTask) {
|
||||
EXPECT_FALSE(ultCsr->recordedImmediateDispatchFlags.hasRelaxedOrderingDependencies);
|
||||
EXPECT_TRUE(ultCsr->recordedImmediateDispatchFlags.hasStallingCmds);
|
||||
} else {
|
||||
EXPECT_TRUE(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies);
|
||||
EXPECT_FALSE(ultCsr->recordedDispatchFlags.hasStallingCmds);
|
||||
}
|
||||
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies);
|
||||
EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds);
|
||||
|
||||
ultCsr->unregisterClient(&client1);
|
||||
ultCsr->unregisterClient(&client2);
|
||||
|
||||
commandList->appendBarrier(event, 0, nullptr, false);
|
||||
|
||||
if (useImmediateFlushTask) {
|
||||
EXPECT_FALSE(ultCsr->recordedImmediateDispatchFlags.hasRelaxedOrderingDependencies);
|
||||
EXPECT_FALSE(ultCsr->recordedImmediateDispatchFlags.hasStallingCmds);
|
||||
} else {
|
||||
EXPECT_FALSE(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies);
|
||||
EXPECT_FALSE(ultCsr->recordedDispatchFlags.hasStallingCmds);
|
||||
}
|
||||
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies);
|
||||
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrderingThenProgramConditionalBbStart, IsAtLeastXeHpcCore) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
|
||||
|
|
Loading…
Reference in New Issue