mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
performance: optimize counter based waiting schemes
- store latest waited counter value. - do not wait on values that are already completed. - disable mechanism when counter overflows. Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
205f8d2ffd
commit
4aa7c6c99e
@@ -223,21 +223,24 @@ ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getBaseHostAddress(), this->inOrderAllocationOffset);
|
||||
auto waitValue = getInOrderExecSignalValueWithSubmissionCounter();
|
||||
bool signaled = true;
|
||||
|
||||
for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) {
|
||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
|
||||
signaled = false;
|
||||
break;
|
||||
if (!inOrderExecInfo->isCounterAlreadyDone(waitValue)) {
|
||||
bool signaled = true;
|
||||
const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getBaseHostAddress(), this->inOrderAllocationOffset);
|
||||
for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) {
|
||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
|
||||
signaled = false;
|
||||
break;
|
||||
}
|
||||
|
||||
hostAddress = ptrOffset(hostAddress, sizeof(uint64_t));
|
||||
}
|
||||
|
||||
hostAddress = ptrOffset(hostAddress, sizeof(uint64_t));
|
||||
}
|
||||
|
||||
if (!signaled) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
if (!signaled) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
inOrderExecInfo->setLastWaitedCounterValue(waitValue);
|
||||
}
|
||||
|
||||
handleSuccessfulHostSynchronization();
|
||||
|
||||
@@ -301,6 +301,41 @@ HWTEST2_F(InOrderCmdListTests, givenCmdListsWhenDispatchingThenUseInternalTaskCo
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenCounterBasedEventsWhenHostWaitsAreCalledThenLatestWaitIsRecorded, IsAtLeastSkl) {
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
auto eventPool = createEvents<FamilyType>(2, false);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[1]->toHandle(), 0, nullptr, launchParams, false);
|
||||
|
||||
auto inOrderExecInfo = events[1]->getInOrderExecInfo();
|
||||
*inOrderExecInfo->getBaseHostAddress() = 2u;
|
||||
|
||||
auto status = events[1]->hostSynchronize(-1);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
|
||||
|
||||
auto counterValue = events[1]->inOrderExecSignalValue;
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
|
||||
|
||||
// setting lower counter ignored
|
||||
inOrderExecInfo->setLastWaitedCounterValue(counterValue - 1);
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
|
||||
|
||||
status = events[0]->hostSynchronize(-1);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
|
||||
|
||||
// setting offset disables mechanism
|
||||
inOrderExecInfo->setAllocationOffset(4u);
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenEventHostSyncCalledThenCallWaitUserFence, IsAtLeastXeHpCore) {
|
||||
NEO::debugManager.flags.WaitForUserFenceOnEventHostSynchronize.set(1);
|
||||
|
||||
|
||||
@@ -80,12 +80,20 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
||||
|
||||
void reset();
|
||||
bool isExternalMemoryExecInfo() const { return deviceCounterNode == nullptr; }
|
||||
void setLastWaitedCounterValue(uint64_t value) {
|
||||
lastWaitedCounterValue = std::max(value, lastWaitedCounterValue);
|
||||
}
|
||||
|
||||
bool isCounterAlreadyDone(uint64_t waitValue) const {
|
||||
return lastWaitedCounterValue >= waitValue && this->allocationOffset == 0u;
|
||||
}
|
||||
|
||||
protected:
|
||||
NEO::MemoryManager &memoryManager;
|
||||
NEO::TagNodeBase *deviceCounterNode = nullptr;
|
||||
NEO::TagNodeBase *hostCounterNode = nullptr;
|
||||
uint64_t counterValue = 0;
|
||||
uint64_t lastWaitedCounterValue = 0;
|
||||
|
||||
uint64_t regularCmdListSubmissionCounter = 0;
|
||||
uint64_t deviceAddress = 0;
|
||||
|
||||
@@ -184,6 +184,37 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
|
||||
tempNode2->returnTag();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncoderTests, givenInOrderExecutionInfoWhenSetLastCounterValueIsCalledThenItReturnsProperQueries) {
|
||||
MockDevice mockDevice;
|
||||
|
||||
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
MockMemoryManager memoryManager(mockExecutionEnvironment);
|
||||
|
||||
MockTagAllocator<DeviceAllocNodeType<true>> tagAllocator(0, mockDevice.getMemoryManager());
|
||||
auto node = tagAllocator.getTag();
|
||||
|
||||
auto inOrderExecInfo = std::make_unique<InOrderExecInfo>(node, nullptr, memoryManager, 2, 0, true, false);
|
||||
inOrderExecInfo->setLastWaitedCounterValue(1u);
|
||||
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||
|
||||
inOrderExecInfo->setLastWaitedCounterValue(0u);
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||
|
||||
inOrderExecInfo->setLastWaitedCounterValue(3u);
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(3u));
|
||||
|
||||
inOrderExecInfo->setAllocationOffset(4u);
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(3u));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenPatchingThenSetCorrectValues) {
|
||||
MockDevice mockDevice;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user