fix: ensure regular event completion in in-order mode 2

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski 2024-12-16 08:39:03 +00:00 committed by Compute-Runtime-Automation
parent 085988c5e9
commit 9c2e5f5568
4 changed files with 44 additions and 16 deletions

View File

@ -195,7 +195,7 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event
this->commandContainer.addToResidencyContainer(inOrderExecInfo->getHostCounterAllocation());
if (signalEvent) {
if (signalEvent->isCounterBased() || nonWalkerInOrderCmdsChaining) {
if (signalEvent->isCounterBased() || nonWalkerInOrderCmdsChaining || isImmediateType()) {
signalEvent->updateInOrderExecState(inOrderExecInfo, inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset());
} else {
signalEvent->unsetInOrderExecInfo();

View File

@ -78,7 +78,7 @@ struct EventImp : public Event {
const uint32_t count, const ze_kernel_timestamp_result_t *pKernelTimestampsBuffer);
void copyDataToEventAlloc(void *dstHostAddr, uint64_t dstGpuVa, size_t copySize, const void *copyData);
void copyTbxData(uint64_t dstGpuVa, size_t copySize);
void synchronizeCounterBasedTimestampCompletionWithTimeout();
void synchronizeTimestampCompletionWithTimeout();
};
} // namespace L0

View File

@ -762,7 +762,7 @@ void EventImp<TagSizeT>::resetDeviceCompletionData(bool resetAllPackets) {
}
template <typename TagSizeT>
void EventImp<TagSizeT>::synchronizeCounterBasedTimestampCompletionWithTimeout() {
void EventImp<TagSizeT>::synchronizeTimestampCompletionWithTimeout() {
std::chrono::high_resolution_clock::time_point startTime = std::chrono::high_resolution_clock::now();
constexpr uint64_t timeoutMs = 1000 * 5; // 5s
uint64_t timeDiff = 0;
@ -787,8 +787,8 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestamp(ze_kernel_timestamp_result_
assignKernelEventCompletionData(getHostAddress());
calculateProfilingData();
if (isCounterBased() && (contextEndTS == Event::STATE_CLEARED || contextEndTS == 0)) {
synchronizeCounterBasedTimestampCompletionWithTimeout();
if (contextEndTS == Event::STATE_CLEARED || contextEndTS == 0) {
synchronizeTimestampCompletionWithTimeout();
}
auto eventTsSetFunc = [&](uint64_t &timestampFieldToCopy, uint64_t &timestampFieldForWriting) {

View File

@ -460,7 +460,7 @@ HWTEST2_F(InOrderCmdListTests, givenCounterBasedTimestampEventWhenQueryingTimest
uint64_t notReadyData = Event::STATE_CLEARED;
void assignKernelEventCompletionData(void *address) override {
auto completionAddress = reinterpret_cast<uint64_t *>(getCompletionFieldHostAddress());
auto completionAddress = reinterpret_cast<uint64_t *>(ptrOffset(getHostAddress(), NEO::TimestampPackets<uint64_t, 1>::getContextEndOffset()));
assignKernelEventCompletionDataCalled++;
if (assignKernelEventCompletionDataCalled <= assignKernelEventCompletionDataFailCounter) {
*completionAddress = notReadyData;
@ -477,6 +477,7 @@ HWTEST2_F(InOrderCmdListTests, givenCounterBasedTimestampEventWhenQueryingTimest
auto eventPool = createEvents<FamilyType>(1, true);
auto event1 = std::make_unique<MyMockEvent>(eventPool.get(), device);
auto event2 = std::make_unique<MyMockEvent>(eventPool.get(), device);
auto event3 = std::make_unique<MyMockEvent>(eventPool.get(), device);
event1->enableCounterBasedMode(true, ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE);
event1->assignKernelEventCompletionDataFailCounter = 2;
@ -487,26 +488,37 @@ HWTEST2_F(InOrderCmdListTests, givenCounterBasedTimestampEventWhenQueryingTimest
event2->setUsingContextEndOffset(true);
event2->notReadyData = 0;
event3->disableImplicitCounterBasedMode();
event3->assignKernelEventCompletionDataFailCounter = 2;
event3->setUsingContextEndOffset(true);
cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, event1->toHandle(), 0, nullptr, launchParams, false);
cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, event2->toHandle(), 0, nullptr, launchParams, false);
cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, event3->toHandle(), 0, nullptr, launchParams, false);
*reinterpret_cast<uint64_t *>(event1->getCompletionFieldHostAddress()) = Event::STATE_CLEARED;
*reinterpret_cast<uint64_t *>(event2->getCompletionFieldHostAddress()) = 0;
*reinterpret_cast<uint64_t *>(event3->getCompletionFieldHostAddress()) = 0;
event1->getInOrderExecInfo()->setLastWaitedCounterValue(2);
event2->getInOrderExecInfo()->setLastWaitedCounterValue(2);
event3->getInOrderExecInfo()->setLastWaitedCounterValue(3);
EXPECT_EQ(ZE_RESULT_SUCCESS, event1->queryStatus());
EXPECT_EQ(ZE_RESULT_SUCCESS, event2->queryStatus());
EXPECT_EQ(ZE_RESULT_SUCCESS, event3->queryStatus());
ze_kernel_timestamp_result_t kernelTimestamps = {};
EXPECT_EQ(0u, event1->assignKernelEventCompletionDataCalled);
EXPECT_EQ(0u, event2->assignKernelEventCompletionDataCalled);
EXPECT_EQ(0u, event3->assignKernelEventCompletionDataCalled);
event1->queryKernelTimestamp(&kernelTimestamps);
event2->queryKernelTimestamp(&kernelTimestamps);
event3->queryKernelTimestamp(&kernelTimestamps);
EXPECT_EQ(event1->assignKernelEventCompletionDataFailCounter + 1, event1->assignKernelEventCompletionDataCalled);
EXPECT_EQ(event2->assignKernelEventCompletionDataFailCounter + 1, event2->assignKernelEventCompletionDataCalled);
EXPECT_EQ(event3->assignKernelEventCompletionDataFailCounter + 1, event3->assignKernelEventCompletionDataCalled);
}
HWTEST2_F(InOrderCmdListTests, givenInterruptableEventsWhenExecutingOnDifferentCsrThenAssignItToEventOnExecute, IsAtLeastXeHpCore) {
@ -787,16 +799,16 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventWhenAppendEventResetCalledThenRe
}
HWTEST2_F(InOrderCmdListTests, givenRegularEventWithTemporaryInOrderDataAssignmentWhenCallingSynchronizeOrResetThenUnset, MatchAny) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto cmdList = createRegularCmdList<gfxCoreFamily>(false);
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
auto hostAddress = static_cast<uint64_t *>(cmdList->inOrderExecInfo->getDeviceCounterAllocation()->getUnderlyingBuffer());
auto eventPool = createEvents<FamilyType>(1, true);
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
auto nonWalkerSignallingSupported = immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get());
auto nonWalkerSignallingSupported = cmdList->isInOrderNonWalkerSignalingRequired(events[0].get());
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(nonWalkerSignallingSupported, events[0]->inOrderExecInfo.get() != nullptr);
@ -812,7 +824,7 @@ HWTEST2_F(InOrderCmdListTests, givenRegularEventWithTemporaryInOrderDataAssignme
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1));
EXPECT_EQ(events[0]->inOrderExecInfo.get(), nullptr);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(nonWalkerSignallingSupported, events[0]->inOrderExecInfo.get() != nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->reset());
@ -822,16 +834,16 @@ HWTEST2_F(InOrderCmdListTests, givenRegularEventWithTemporaryInOrderDataAssignme
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventThenSetInOrderParamsOnlyWhenChainingIsRequired, MatchAny) {
uint32_t counterOffset = 64;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
immCmdList->inOrderExecInfo->setAllocationOffset(counterOffset);
auto cmdList = createRegularCmdList<gfxCoreFamily>(false);
cmdList->inOrderExecInfo->setAllocationOffset(counterOffset);
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_FALSE(events[0]->isCounterBased());
if (immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get())) {
if (cmdList->isInOrderNonWalkerSignalingRequired(events[0].get())) {
EXPECT_EQ(events[0]->inOrderExecSignalValue, 1u);
EXPECT_NE(events[0]->inOrderExecInfo.get(), nullptr);
EXPECT_EQ(events[0]->inOrderAllocationOffset, counterOffset);
@ -841,7 +853,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventThenSetInOrde
EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u);
}
auto copyImmCmdList = createCopyOnlyImmCmdList<gfxCoreFamily>();
auto copyImmCmdList = createRegularCmdList<gfxCoreFamily>(true);
uint32_t copyData = 0;
void *deviceAlloc = nullptr;
@ -859,6 +871,19 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventThenSetInOrde
context->freeMem(deviceAlloc);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventAndImmediateCmdListThenSetInOrderParams, MatchAny) {
auto cmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_FALSE(events[0]->isCounterBased());
EXPECT_EQ(events[0]->inOrderExecSignalValue, 1u);
EXPECT_NE(events[0]->inOrderExecInfo.get(), nullptr);
}
HWTEST2_F(InOrderCmdListTests, givenRegularEventWithInOrderExecInfoWhenReusedOnRegularCmdListThenUnsetInOrderData, MatchAny) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();
@ -866,6 +891,9 @@ HWTEST2_F(InOrderCmdListTests, givenRegularEventWithInOrderExecInfoWhenReusedOnR
events[0]->makeCounterBasedImplicitlyDisabled(eventPool->getAllocation());
auto nonWalkerSignallingSupported = immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get());
if (!nonWalkerSignallingSupported) {
GTEST_SKIP();
}
EXPECT_TRUE(immCmdList->isInOrderExecutionEnabled());