performance: optimize counter based waiting schemes

- store latest waited counter value.
- do not wait on values that are already completed.
- disable mechanism when counter overflows.

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2024-04-25 15:08:55 +00:00
committed by Compute-Runtime-Automation
parent 205f8d2ffd
commit 4aa7c6c99e
4 changed files with 88 additions and 11 deletions

View File

@@ -223,21 +223,24 @@ ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
return ZE_RESULT_NOT_READY;
}
const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getBaseHostAddress(), this->inOrderAllocationOffset);
auto waitValue = getInOrderExecSignalValueWithSubmissionCounter();
bool signaled = true;
for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
signaled = false;
break;
if (!inOrderExecInfo->isCounterAlreadyDone(waitValue)) {
bool signaled = true;
const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getBaseHostAddress(), this->inOrderAllocationOffset);
for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
signaled = false;
break;
}
hostAddress = ptrOffset(hostAddress, sizeof(uint64_t));
}
hostAddress = ptrOffset(hostAddress, sizeof(uint64_t));
}
if (!signaled) {
return ZE_RESULT_NOT_READY;
if (!signaled) {
return ZE_RESULT_NOT_READY;
}
inOrderExecInfo->setLastWaitedCounterValue(waitValue);
}
handleSuccessfulHostSynchronization();

View File

@@ -301,6 +301,41 @@ HWTEST2_F(InOrderCmdListTests, givenCmdListsWhenDispatchingThenUseInternalTaskCo
}
}
HWTEST2_F(InOrderCmdListTests, givenCounterBasedEventsWhenHostWaitsAreCalledThenLatestWaitIsRecorded, IsAtLeastSkl) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents<FamilyType>(2, false);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[1]->toHandle(), 0, nullptr, launchParams, false);
auto inOrderExecInfo = events[1]->getInOrderExecInfo();
*inOrderExecInfo->getBaseHostAddress() = 2u;
auto status = events[1]->hostSynchronize(-1);
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
auto counterValue = events[1]->inOrderExecSignalValue;
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
// setting lower counter ignored
inOrderExecInfo->setLastWaitedCounterValue(counterValue - 1);
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
status = events[0]->hostSynchronize(-1);
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
// setting offset disables mechanism
inOrderExecInfo->setAllocationOffset(4u);
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(0u));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
}
HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenEventHostSyncCalledThenCallWaitUserFence, IsAtLeastXeHpCore) {
NEO::debugManager.flags.WaitForUserFenceOnEventHostSynchronize.set(1);