mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
feature: Experimental support of immediate cmd list in-order execution [6/n]
Related-To: LOCI-4332 - Signal appendWaitOnEvents API call - Signal appendBarrier call - Handle sync allocation residency Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
5c988e8a76
commit
41478c5972
@@ -65,7 +65,7 @@ ze_result_t zeCommandListAppendWaitOnEvents(
|
||||
ze_command_list_handle_t hCommandList,
|
||||
uint32_t numEvents,
|
||||
ze_event_handle_t *phEvents) {
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents, false, true);
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents, false, true, true);
|
||||
}
|
||||
|
||||
ze_result_t zeEventHostSignal(
|
||||
|
||||
@@ -139,7 +139,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
|
||||
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
|
||||
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
|
||||
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) = 0;
|
||||
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) = 0;
|
||||
virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc,
|
||||
|
||||
@@ -167,7 +167,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override;
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
|
||||
void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, bool relaxedOrderingAllowed);
|
||||
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
@@ -2033,7 +2033,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
|
||||
|
||||
if (numWaitEvents > 0) {
|
||||
if (phWaitEvents) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies, false);
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
@@ -2095,9 +2095,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) {
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
signalInOrderCompletion &= this->inOrderExecutionEnabled;
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
uint32_t callId = 0;
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
@@ -2166,6 +2168,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
|
||||
}
|
||||
|
||||
if (signalInOrderCompletion) {
|
||||
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(*commandContainer.getCommandStream(), this->inOrderDependencyCounterAllocation->getGpuAddress(),
|
||||
this->inOrderDependencyCounter + 1, 0, false, false);
|
||||
}
|
||||
|
||||
makeResidentDummyAllocation();
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
@@ -2266,7 +2273,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
|
||||
if (numWaitEvents > 0) {
|
||||
if (phWaitEvents) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true, false);
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
@@ -92,7 +92,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
NEO::GraphicsAllocation *srcAllocation,
|
||||
size_t size, bool flushHost) override;
|
||||
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override;
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
|
||||
|
||||
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
@@ -543,7 +543,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) {
|
||||
bool allSignaled = true;
|
||||
for (auto i = 0u; i < numEvents; i++) {
|
||||
allSignaled &= (!this->dcFlushSupport && Event::fromHandle(phWaitEvents[i])->isAlreadyCompleted());
|
||||
@@ -555,7 +555,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
|
||||
checkAvailableSpace(numEvents, false);
|
||||
checkWaitEventsState(numEvents, phWaitEvents);
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies);
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies, signalInOrderCompletion);
|
||||
this->dependenciesPresent = true;
|
||||
return flushImmediate(ret, true, true, false, nullptr);
|
||||
}
|
||||
@@ -691,6 +691,8 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
||||
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
|
||||
if (inputRet == ZE_RESULT_SUCCESS) {
|
||||
this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
|
||||
} else {
|
||||
|
||||
@@ -385,7 +385,17 @@ void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
appendMultiTileBarrier(*neoDevice);
|
||||
} else {
|
||||
NEO::PipeControlArgs args = createBarrierFlags();
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||
NEO::PostSyncMode postSyncMode = NEO::PostSyncMode::NoWrite;
|
||||
uint64_t gpuWriteAddress = 0;
|
||||
uint64_t writeValue = 0;
|
||||
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
postSyncMode = NEO::PostSyncMode::ImmediateData;
|
||||
gpuWriteAddress = this->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
writeValue = this->inOrderDependencyCounter + 1;
|
||||
}
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), postSyncMode, gpuWriteAddress, writeValue, args);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -379,7 +379,7 @@ struct MockCommandList : public CommandList {
|
||||
|
||||
ADDMETHOD_NOBASE(appendWaitOnEvents, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(uint32_t numEvents,
|
||||
ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies));
|
||||
ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion));
|
||||
|
||||
ADDMETHOD_NOBASE(appendWriteGlobalTimestamp, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(uint64_t * dstptr,
|
||||
|
||||
@@ -1052,7 +1052,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
|
||||
|
||||
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false, false);
|
||||
|
||||
verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true), true, true);
|
||||
verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true, false), true, true);
|
||||
|
||||
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, 0, nullptr), true, true);
|
||||
|
||||
@@ -1186,7 +1186,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
|
||||
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false),
|
||||
false, false);
|
||||
|
||||
verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true), false, false);
|
||||
verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true, false), false, false);
|
||||
|
||||
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, numWaitlistEvents, waitlist),
|
||||
false, false);
|
||||
@@ -1402,7 +1402,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen
|
||||
ASSERT_NE(nullptr, eventObject->csrs[0]);
|
||||
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
returnValue = commandList->appendBarrier(nullptr, 1, &event);
|
||||
@@ -1465,7 +1465,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnExecutingCommandListsWhenCreatingImme
|
||||
whiteBoxCmdList->csr = &mockCommandStreamReceiver;
|
||||
static_cast<WhiteBox<::L0::CommandQueue> *>(whiteBoxCmdList->cmdQImmediate)->csr = &mockCommandStreamReceiver;
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
|
||||
|
||||
whiteBoxCmdList->csr = oldCsr;
|
||||
@@ -1543,7 +1543,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom
|
||||
whiteBoxCmdList->csr = &mockCommandStreamReceiver;
|
||||
static_cast<WhiteBox<::L0::CommandQueue> *>(whiteBoxCmdList->cmdQImmediate)->csr = &mockCommandStreamReceiver;
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
|
||||
whiteBoxCmdList->csr = oldCsr;
|
||||
static_cast<WhiteBox<::L0::CommandQueue> *>(whiteBoxCmdList->cmdQImmediate)->csr = oldCsr;
|
||||
@@ -1594,7 +1594,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom
|
||||
auto oldCommandQueue = whiteBoxCmdList->cmdQImmediate;
|
||||
whiteBoxCmdList->cmdQImmediate = &mockCommandQueue;
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
|
||||
whiteBoxCmdList->cmdQImmediate = oldCommandQueue;
|
||||
}
|
||||
@@ -1644,7 +1644,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom
|
||||
auto oldCommandQueue = whiteBoxCmdList->cmdQImmediate;
|
||||
whiteBoxCmdList->cmdQImmediate = &mockCommandQueue;
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
|
||||
whiteBoxCmdList->cmdQImmediate = oldCommandQueue;
|
||||
}
|
||||
@@ -1688,7 +1688,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen
|
||||
ASSERT_NE(nullptr, eventObject->csrs[0]);
|
||||
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
returnValue = commandList->appendBarrier(nullptr, 1, &event);
|
||||
@@ -1783,7 +1783,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangAndEnabledFlushTaskSubmissionFlagWhenCre
|
||||
whiteBoxCmdList->csr = &mockCommandStreamReceiver;
|
||||
static_cast<WhiteBox<::L0::CommandQueue> *>(whiteBoxCmdList->cmdQImmediate)->csr = &mockCommandStreamReceiver;
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
|
||||
|
||||
whiteBoxCmdList->csr = oldCsr;
|
||||
|
||||
@@ -596,7 +596,7 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue));
|
||||
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
context->destroy();
|
||||
@@ -761,7 +761,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents
|
||||
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
auto eventHandle = event->toHandle();
|
||||
|
||||
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, true);
|
||||
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(whiteBoxCmdList->csr->getNextBarrierCount(), 2u);
|
||||
|
||||
@@ -803,7 +803,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEvents
|
||||
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
auto eventHandle = event->toHandle();
|
||||
|
||||
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, false);
|
||||
result = commandList->appendWaitOnEvents(1u, &eventHandle, false, false, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(whiteBoxCmdList->csr->getNextBarrierCount(), 1u);
|
||||
}
|
||||
|
||||
@@ -612,7 +612,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWith
|
||||
event.signalScope = 0;
|
||||
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
auto eventHandle = event.toHandle();
|
||||
commandList->appendWaitOnEvents(1, &eventHandle, false, true);
|
||||
commandList->appendWaitOnEvents(1, &eventHandle, false, true, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -636,7 +636,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThen
|
||||
event.signalScope = 0;
|
||||
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
auto eventHandle = event.toHandle();
|
||||
commandList->appendWaitOnEvents(1, &eventHandle, false, true);
|
||||
commandList->appendWaitOnEvents(1, &eventHandle, false, true, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -667,7 +667,7 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenP
|
||||
event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
commandList->appendWaitOnEvents(2, events, false, true);
|
||||
commandList->appendWaitOnEvents(2, events, false, true, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -720,7 +720,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
size_t startOffset = commandContainer.getCommandStream()->getUsed();
|
||||
commandList->appendWaitOnEvents(2, events, false, true);
|
||||
commandList->appendWaitOnEvents(2, events, false, true, false);
|
||||
size_t endOffset = commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
size_t usedBufferSize = (endOffset - startOffset);
|
||||
@@ -767,7 +767,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
size_t startOffset = commandContainer.getCommandStream()->getUsed();
|
||||
commandList->appendWaitOnEvents(2, events, false, true);
|
||||
commandList->appendWaitOnEvents(2, events, false, true, false);
|
||||
size_t endOffset = commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
size_t usedBufferSize = (endOffset - startOffset);
|
||||
@@ -808,7 +808,7 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueAndCopyOnly
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
auto used = commandContainer.getCommandStream()->getUsed();
|
||||
commandList->appendWaitOnEvents(2, events, false, true);
|
||||
commandList->appendWaitOnEvents(2, events, false, true, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -990,7 +990,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhe
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
auto used = commandContainer.getCommandStream()->getUsed();
|
||||
commandList->appendWaitOnEvents(2, events, false, true);
|
||||
commandList->appendWaitOnEvents(2, events, false, true, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
@@ -1023,7 +1023,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndTbxCsrWithCopyOnlyImmediateComm
|
||||
event2.waitScope = 0;
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
auto ret = commandList->appendWaitOnEvents(2, events, false, true);
|
||||
auto ret = commandList->appendWaitOnEvents(2, events, false, true, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
}
|
||||
|
||||
|
||||
@@ -2488,7 +2488,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendWaitOnE
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
auto eventHandle = event->toHandle();
|
||||
cmdList.appendWaitOnEvents(1, &eventHandle, false, true);
|
||||
cmdList.appendWaitOnEvents(1, &eventHandle, false, true, false);
|
||||
|
||||
EXPECT_TRUE(cmdList.dependenciesPresent);
|
||||
|
||||
|
||||
@@ -636,7 +636,7 @@ HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendWaitOnE
|
||||
|
||||
eventPool->createEvent(&eventDesc, &hEvent);
|
||||
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEvent, false, true);
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEvent, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = cmdStream->getUsed();
|
||||
|
||||
@@ -861,16 +861,16 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependen
|
||||
|
||||
EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter);
|
||||
|
||||
auto itorAlloc = std::find(immCmdList->getCmdContainer().getResidencyContainer().begin(),
|
||||
immCmdList->getCmdContainer().getResidencyContainer().end(),
|
||||
immCmdList->inOrderDependencyCounterAllocation);
|
||||
EXPECT_NE(itorAlloc, immCmdList->getCmdContainer().getResidencyContainer().end());
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
ultCsr->storeMakeResidentAllocations = true;
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter);
|
||||
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList->inOrderDependencyCounterAllocation]);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter);
|
||||
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList->inOrderDependencyCounterAllocation]);
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsThenConfigureRegistersFirst, IsAtLeastSkl) {
|
||||
@@ -964,6 +964,71 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1));
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEventsThenSignalSyncAllocation, IsAtLeastXeHpCore) {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
auto eventPool = createEvents(1);
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, eventHandle, 0, nullptr, launchParams, false);
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
zeCommandListAppendWaitOnEvents(immCmdList->toHandle(), 1, &eventHandle);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), sdiItor);
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierThenSignalSyncAllocation, IsAtLeastXeHpCore) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendBarrier(nullptr, 0, nullptr);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
auto pcItor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), pcItor);
|
||||
|
||||
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*pcItor);
|
||||
|
||||
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
auto lowAddress = static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL);
|
||||
auto highAddress = static_cast<uint32_t>(gpuAddress >> 32);
|
||||
|
||||
EXPECT_EQ(lowAddress, pcCmd->getAddress());
|
||||
EXPECT_EQ(highAddress, pcCmd->getAddressHigh());
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pcCmd->getPostSyncOperation());
|
||||
EXPECT_EQ(2u, pcCmd->getImmediateData());
|
||||
}
|
||||
|
||||
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
|
||||
template <typename FamilyType>
|
||||
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) {
|
||||
|
||||
@@ -32,7 +32,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventThenSemaphoreWait
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -275,7 +275,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe
|
||||
|
||||
ze_event_handle_t handles[2] = {event->toHandle(), event->toHandle()};
|
||||
|
||||
auto result = commandList->appendWaitOnEvents(2, handles, false, true);
|
||||
auto result = commandList->appendWaitOnEvents(2, handles, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -306,7 +306,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe
|
||||
|
||||
HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventsThenEventGraphicsAllocationIsAddedToResidencyContainer) {
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &residencyContainer = commandList->getCmdContainer().getResidencyContainer();
|
||||
@@ -333,7 +333,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe
|
||||
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -378,7 +378,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest
|
||||
|
||||
event->setPacketsInUse(3u);
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
@@ -441,7 +441,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest
|
||||
ASSERT_EQ(9u, event->getPacketsInUse());
|
||||
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
|
||||
result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
@@ -547,7 +547,7 @@ HWTEST_F(CommandListAppendWaitOnSecondaryBatchBufferEvent, givenCommandBufferIsE
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
|
||||
auto oldCommandBuffer = commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation();
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true);
|
||||
auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -609,7 +609,7 @@ HWTEST2_F(MultTileCommandListAppendWaitOnEvent,
|
||||
ze_event_handle_t eventHandle = event->toHandle();
|
||||
|
||||
auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
auto result = commandList->appendWaitOnEvents(1, &eventHandle, false, true);
|
||||
auto result = commandList->appendWaitOnEvents(1, &eventHandle, false, true, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
@@ -648,7 +648,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO
|
||||
ze_event_handle_t eventHandle = event->toHandle();
|
||||
|
||||
EXPECT_FALSE(cmdList.dependenciesPresent);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true, false));
|
||||
EXPECT_TRUE(cmdList.dependenciesPresent);
|
||||
}
|
||||
|
||||
@@ -662,7 +662,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO
|
||||
ze_event_handle_t eventHandle = event->toHandle();
|
||||
|
||||
EXPECT_FALSE(cmdList.dependenciesPresent);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true, false));
|
||||
EXPECT_FALSE(cmdList.dependenciesPresent);
|
||||
}
|
||||
|
||||
@@ -743,7 +743,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
|
||||
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
auto eventHandle = event->toHandle();
|
||||
commandListImmediate->appendWaitOnEvents(1, &eventHandle, false, true);
|
||||
commandListImmediate->appendWaitOnEvents(1, &eventHandle, false, true, false);
|
||||
|
||||
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
|
||||
}
|
||||
|
||||
@@ -1067,7 +1067,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
|
||||
size_t sizeBefore = cmdStream->getUsed();
|
||||
auto eventHandle = event->toHandle();
|
||||
result = commandList->appendWaitOnEvents(1, &eventHandle, false, true);
|
||||
result = commandList->appendWaitOnEvents(1, &eventHandle, false, true, false);
|
||||
size_t sizeAfter = cmdStream->getUsed();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
|
||||
@@ -277,7 +277,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediat
|
||||
ASSERT_NE(nullptr, eventObject->csrs[0]);
|
||||
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
|
||||
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
|
||||
|
||||
returnValue = commandList->appendBarrier(nullptr, 1, &event);
|
||||
@@ -341,7 +341,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmedia
|
||||
ASSERT_NE(nullptr, eventObject->csrs[0]);
|
||||
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]);
|
||||
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true);
|
||||
returnValue = commandList->appendWaitOnEvents(1, &event, false, true, false);
|
||||
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
|
||||
|
||||
returnValue = commandList->appendBarrier(nullptr, 1, &event);
|
||||
|
||||
@@ -796,7 +796,7 @@ ze_result_t OaMetricQueryImp::writeMetricQuery(CommandList &commandList, ze_even
|
||||
commandList.getCmdContainer().addToResidencyContainer(pool.pAllocation);
|
||||
|
||||
// Wait for events before executing query.
|
||||
commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true);
|
||||
commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true, false);
|
||||
|
||||
if (metricQueriesSize) {
|
||||
|
||||
|
||||
Reference in New Issue
Block a user