feature: Experimental support of immediate cmd list in-order execution [6/n]

Related-To: LOCI-4332

- Signal appendWaitOnEvents API call
- Signal appendBarrier call
- Handle sync allocation residency

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-05-10 17:13:49 +00:00
committed by Compute-Runtime-Automation
parent 5c988e8a76
commit 41478c5972
18 changed files with 136 additions and 52 deletions

View File

@@ -65,7 +65,7 @@ ze_result_t zeCommandListAppendWaitOnEvents(
ze_command_list_handle_t hCommandList,
uint32_t numEvents,
ze_event_handle_t *phEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents, false, true);
return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents, false, true, true);
}
ze_result_t zeEventHostSignal(

View File

@@ -139,7 +139,7 @@ struct CommandList : _ze_command_list_handle_t {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) = 0;
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) = 0;
virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc,

View File

@@ -167,7 +167,7 @@ struct CommandListCoreFamily : CommandListImp {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, bool relaxedOrderingAllowed);
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;

View File

@@ -2033,7 +2033,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
if (numWaitEvents > 0) {
if (phWaitEvents) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies, false);
} else {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
@@ -2095,9 +2095,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) {
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
signalInOrderCompletion &= this->inOrderExecutionEnabled;
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@@ -2166,6 +2168,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
}
if (signalInOrderCompletion) {
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(*commandContainer.getCommandStream(), this->inOrderDependencyCounterAllocation->getGpuAddress(),
this->inOrderDependencyCounter + 1, 0, false, false);
}
makeResidentDummyAllocation();
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@@ -2266,7 +2273,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
if (numWaitEvents > 0) {
if (phWaitEvents) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true, false);
} else {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}

View File

@@ -92,7 +92,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
NEO::GraphicsAllocation *srcAllocation,
size_t size, bool flushHost) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;

View File

@@ -543,7 +543,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) {
bool allSignaled = true;
for (auto i = 0u; i < numEvents; i++) {
allSignaled &= (!this->dcFlushSupport && Event::fromHandle(phWaitEvents[i])->isAlreadyCompleted());
@@ -555,7 +555,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
checkAvailableSpace(numEvents, false);
checkWaitEventsState(numEvents, phWaitEvents);
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies, signalInOrderCompletion);
this->dependenciesPresent = true;
return flushImmediate(ret, true, true, false, nullptr);
}
@@ -691,6 +691,8 @@ template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
if (inputRet == ZE_RESULT_SUCCESS) {
this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
if (this->isFlushTaskSubmissionEnabled) {
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
} else {

View File

@@ -385,7 +385,17 @@ void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
appendMultiTileBarrier(*neoDevice);
} else {
NEO::PipeControlArgs args = createBarrierFlags();
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
NEO::PostSyncMode postSyncMode = NEO::PostSyncMode::NoWrite;
uint64_t gpuWriteAddress = 0;
uint64_t writeValue = 0;
if (this->inOrderExecutionEnabled) {
postSyncMode = NEO::PostSyncMode::ImmediateData;
gpuWriteAddress = this->inOrderDependencyCounterAllocation->getGpuAddress();
writeValue = this->inOrderDependencyCounter + 1;
}
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), postSyncMode, gpuWriteAddress, writeValue, args);
}
}

View File

@@ -379,7 +379,7 @@ struct MockCommandList : public CommandList {
ADDMETHOD_NOBASE(appendWaitOnEvents, ze_result_t, ZE_RESULT_SUCCESS,
(uint32_t numEvents,
ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies));
ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion));
ADDMETHOD_NOBASE(appendWriteGlobalTimestamp, ze_result_t, ZE_RESULT_SUCCESS,
(uint64_t * dstptr,

View File

@@ -1052,7 +1052,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true), true, true);
verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true, false), true, true);
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, 0, nullptr), true, true);
@@ -1186,7 +1186,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false),
false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true), false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true, false), false, false);
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, numWaitlistEvents, waitlist),
false, false);
@@ -1402,7 +1402,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen
ASSERT_NE(nullptr, eventObject->csrs[0]);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList->appendBarrier(nullptr, 1, &event);
@@ -1465,7 +1465,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnExecutingCommandListsWhenCreatingImme
whiteBoxCmdList->csr = &mockCommandStreamReceiver;
static_cast<WhiteBox<::L0::CommandQueue> *>(whiteBoxCmdList->cmdQImmediate)->csr = &mockCommandStreamReceiver;
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
whiteBoxCmdList->csr = oldCsr;
@@ -1543,7 +1543,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom
whiteBoxCmdList->csr = &mockCommandStreamReceiver;
static_cast<WhiteBox<::L0::CommandQueue> *>(whiteBoxCmdList->cmdQImmediate)->csr = &mockCommandStreamReceiver;
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
whiteBoxCmdList->csr = oldCsr;
static_cast<WhiteBox<::L0::CommandQueue> *>(whiteBoxCmdList->cmdQImmediate)->csr = oldCsr;
@@ -1594,7 +1594,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom
auto oldCommandQueue = whiteBoxCmdList->cmdQImmediate;
whiteBoxCmdList->cmdQImmediate = &mockCommandQueue;
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
whiteBoxCmdList->cmdQImmediate = oldCommandQueue;
}
@@ -1644,7 +1644,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom
auto oldCommandQueue = whiteBoxCmdList->cmdQImmediate;
whiteBoxCmdList->cmdQImmediate = &mockCommandQueue;
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
whiteBoxCmdList->cmdQImmediate = oldCommandQueue;
}
@@ -1688,7 +1688,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen
ASSERT_NE(nullptr, eventObject->csrs[0]);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList->appendBarrier(nullptr, 1, &event);
@@ -1783,7 +1783,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangAndEnabledFlushTaskSubmissionFlagWhenCre
whiteBoxCmdList->csr = &mockCommandStreamReceiver;
static_cast<WhiteBox<::L0::CommandQueue> *>(whiteBoxCmdList->cmdQImmediate)->csr = &mockCommandStreamReceiver;
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
whiteBoxCmdList->csr = oldCsr;

View File

@@ -596,7 +596,7 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue));
ze_event_handle_t hEventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
context->destroy();
@@ -761,7 +761,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, true);
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(whiteBoxCmdList->csr->getNextBarrierCount(), 2u);
@@ -803,7 +803,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, false);
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, false, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(whiteBoxCmdList->csr->getNextBarrierCount(), 1u);
}

View File

@@ -612,7 +612,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWith
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle, false, true);
commandList->appendWaitOnEvents(1, &eventHandle, false, true, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -636,7 +636,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThen
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle, false, true);
commandList->appendWaitOnEvents(1, &eventHandle, false, true, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -667,7 +667,7 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenP
event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t events[] = {&event, &event2};
commandList->appendWaitOnEvents(2, events, false, true);
commandList->appendWaitOnEvents(2, events, false, true, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -720,7 +720,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW
ze_event_handle_t events[] = {&event, &event2};
size_t startOffset = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, false, true);
commandList->appendWaitOnEvents(2, events, false, true, false);
size_t endOffset = commandContainer.getCommandStream()->getUsed();
size_t usedBufferSize = (endOffset - startOffset);
@@ -767,7 +767,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW
ze_event_handle_t events[] = {&event, &event2};
size_t startOffset = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, false, true);
commandList->appendWaitOnEvents(2, events, false, true, false);
size_t endOffset = commandContainer.getCommandStream()->getUsed();
size_t usedBufferSize = (endOffset - startOffset);
@@ -808,7 +808,7 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueAndCopyOnly
ze_event_handle_t events[] = {&event, &event2};
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, false, true);
commandList->appendWaitOnEvents(2, events, false, true, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -990,7 +990,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhe
ze_event_handle_t events[] = {&event, &event2};
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events, false, true);
commandList->appendWaitOnEvents(2, events, false, true, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@@ -1023,7 +1023,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndTbxCsrWithCopyOnlyImmediateComm
event2.waitScope = 0;
ze_event_handle_t events[] = {&event, &event2};
auto ret = commandList->appendWaitOnEvents(2, events, false, true);
auto ret = commandList->appendWaitOnEvents(2, events, false, true, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
}

View File

@@ -2488,7 +2488,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendWaitOnE
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto eventHandle = event->toHandle();
cmdList.appendWaitOnEvents(1, &eventHandle, false, true);
cmdList.appendWaitOnEvents(1, &eventHandle, false, true, false);
EXPECT_TRUE(cmdList.dependenciesPresent);

View File

@@ -636,7 +636,7 @@ HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendWaitOnE
eventPool->createEvent(&eventDesc, &hEvent);
auto result = commandList->appendWaitOnEvents(1, &hEvent, false, true);
auto result = commandList->appendWaitOnEvents(1, &hEvent, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = cmdStream->getUsed();

View File

@@ -861,16 +861,16 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependen
EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter);
auto itorAlloc = std::find(immCmdList->getCmdContainer().getResidencyContainer().begin(),
immCmdList->getCmdContainer().getResidencyContainer().end(),
immCmdList->inOrderDependencyCounterAllocation);
EXPECT_NE(itorAlloc, immCmdList->getCmdContainer().getResidencyContainer().end());
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
ultCsr->storeMakeResidentAllocations = true;
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList->inOrderDependencyCounterAllocation]);
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList->inOrderDependencyCounterAllocation]);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsThenConfigureRegistersFirst, IsAtLeastSkl) {
@@ -964,6 +964,71 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1));
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEventsThenSignalSyncAllocation, IsAtLeastXeHpCore) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents(1);
auto eventHandle = events[0]->toHandle();
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, eventHandle, 0, nullptr, launchParams, false);
auto offset = cmdStream->getUsed();
zeCommandListAppendWaitOnEvents(immCmdList->toHandle(), 1, &eventHandle);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
ptrOffset(cmdStream->getCpuBase(), offset),
(cmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), sdiItor);
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierThenSignalSyncAllocation, IsAtLeastXeHpCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto offset = cmdStream->getUsed();
immCmdList->appendBarrier(nullptr, 0, nullptr);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
ptrOffset(cmdStream->getCpuBase(), offset),
(cmdStream->getUsed() - offset)));
auto pcItor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), pcItor);
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*pcItor);
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
auto lowAddress = static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL);
auto highAddress = static_cast<uint32_t>(gpuAddress >> 32);
EXPECT_EQ(lowAddress, pcCmd->getAddress());
EXPECT_EQ(highAddress, pcCmd->getAddressHigh());
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pcCmd->getPostSyncOperation());
EXPECT_EQ(2u, pcCmd->getImmediateData());
}
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
template <typename FamilyType>
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) {

View File

@@ -32,7 +32,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventThenSemaphoreWait
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
ze_event_handle_t hEventHandle = event->toHandle();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
@@ -275,7 +275,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe
ze_event_handle_t handles[2] = {event->toHandle(), event->toHandle()};
auto result = commandList->appendWaitOnEvents(2, handles, false, true);
auto result = commandList->appendWaitOnEvents(2, handles, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
@@ -306,7 +306,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe
HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventsThenEventGraphicsAllocationIsAddedToResidencyContainer) {
ze_event_handle_t hEventHandle = event->toHandle();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto &residencyContainer = commandList->getCmdContainer().getResidencyContainer();
@@ -333,7 +333,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_event_handle_t hEventHandle = event->toHandle();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
@@ -378,7 +378,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest
event->setPacketsInUse(3u);
ze_event_handle_t hEventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
@@ -441,7 +441,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest
ASSERT_EQ(9u, event->getPacketsInUse());
ze_event_handle_t hEventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
@@ -547,7 +547,7 @@ HWTEST_F(CommandListAppendWaitOnSecondaryBatchBufferEvent, givenCommandBufferIsE
ze_event_handle_t hEventHandle = event->toHandle();
auto oldCommandBuffer = commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation();
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
@@ -609,7 +609,7 @@ HWTEST2_F(MultTileCommandListAppendWaitOnEvent,
ze_event_handle_t eventHandle = event->toHandle();
auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
auto result = commandList->appendWaitOnEvents(1, &eventHandle, false, true);
auto result = commandList->appendWaitOnEvents(1, &eventHandle, false, true, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
@@ -648,7 +648,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO
ze_event_handle_t eventHandle = event->toHandle();
EXPECT_FALSE(cmdList.dependenciesPresent);
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true));
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true, false));
EXPECT_TRUE(cmdList.dependenciesPresent);
}
@@ -662,7 +662,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO
ze_event_handle_t eventHandle = event->toHandle();
EXPECT_FALSE(cmdList.dependenciesPresent);
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true));
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true, false));
EXPECT_FALSE(cmdList.dependenciesPresent);
}
@@ -743,7 +743,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto eventHandle = event->toHandle();
commandListImmediate->appendWaitOnEvents(1, &eventHandle, false, true);
commandListImmediate->appendWaitOnEvents(1, &eventHandle, false, true, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}

View File

@@ -1067,7 +1067,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
size_t sizeBefore = cmdStream->getUsed();
auto eventHandle = event->toHandle();
result = commandList->appendWaitOnEvents(1, &eventHandle, false, true);
result = commandList->appendWaitOnEvents(1, &eventHandle, false, true, false);
size_t sizeAfter = cmdStream->getUsed();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);

View File

@@ -277,7 +277,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediat
ASSERT_NE(nullptr, eventObject->csrs[0]);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
returnValue = commandList->appendBarrier(nullptr, 1, &event);
@@ -341,7 +341,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmedia
ASSERT_NE(nullptr, eventObject->csrs[0]);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
returnValue = commandList->appendBarrier(nullptr, 1, &event);

View File

@@ -796,7 +796,7 @@ ze_result_t OaMetricQueryImp::writeMetricQuery(CommandList &commandList, ze_even
commandList.getCmdContainer().addToResidencyContainer(pool.pAllocation);
// Wait for events before executing query.
commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true);
commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true, false);
if (metricQueriesSize) {