mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
performance: optimize counter based waiting schemes
- store latest waited counter value. - do not wait on values that are already completed. - disable mechanism when counter overflows. Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
205f8d2ffd
commit
4aa7c6c99e
@@ -223,21 +223,24 @@ ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
|
|||||||
return ZE_RESULT_NOT_READY;
|
return ZE_RESULT_NOT_READY;
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getBaseHostAddress(), this->inOrderAllocationOffset);
|
|
||||||
auto waitValue = getInOrderExecSignalValueWithSubmissionCounter();
|
auto waitValue = getInOrderExecSignalValueWithSubmissionCounter();
|
||||||
bool signaled = true;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) {
|
if (!inOrderExecInfo->isCounterAlreadyDone(waitValue)) {
|
||||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
|
bool signaled = true;
|
||||||
signaled = false;
|
const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getBaseHostAddress(), this->inOrderAllocationOffset);
|
||||||
break;
|
for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) {
|
||||||
|
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
|
||||||
|
signaled = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
hostAddress = ptrOffset(hostAddress, sizeof(uint64_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
hostAddress = ptrOffset(hostAddress, sizeof(uint64_t));
|
if (!signaled) {
|
||||||
}
|
return ZE_RESULT_NOT_READY;
|
||||||
|
}
|
||||||
if (!signaled) {
|
inOrderExecInfo->setLastWaitedCounterValue(waitValue);
|
||||||
return ZE_RESULT_NOT_READY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
handleSuccessfulHostSynchronization();
|
handleSuccessfulHostSynchronization();
|
||||||
|
|||||||
@@ -301,6 +301,41 @@ HWTEST2_F(InOrderCmdListTests, givenCmdListsWhenDispatchingThenUseInternalTaskCo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(InOrderCmdListTests, givenCounterBasedEventsWhenHostWaitsAreCalledThenLatestWaitIsRecorded, IsAtLeastSkl) {
|
||||||
|
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||||
|
auto eventPool = createEvents<FamilyType>(2, false);
|
||||||
|
|
||||||
|
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||||
|
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[1]->toHandle(), 0, nullptr, launchParams, false);
|
||||||
|
|
||||||
|
auto inOrderExecInfo = events[1]->getInOrderExecInfo();
|
||||||
|
*inOrderExecInfo->getBaseHostAddress() = 2u;
|
||||||
|
|
||||||
|
auto status = events[1]->hostSynchronize(-1);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
|
||||||
|
|
||||||
|
auto counterValue = events[1]->inOrderExecSignalValue;
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue));
|
||||||
|
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
|
||||||
|
|
||||||
|
// setting lower counter ignored
|
||||||
|
inOrderExecInfo->setLastWaitedCounterValue(counterValue - 1);
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue));
|
||||||
|
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
|
||||||
|
|
||||||
|
status = events[0]->hostSynchronize(-1);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||||
|
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
|
||||||
|
|
||||||
|
// setting offset disables mechanism
|
||||||
|
inOrderExecInfo->setAllocationOffset(4u);
|
||||||
|
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||||
|
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenEventHostSyncCalledThenCallWaitUserFence, IsAtLeastXeHpCore) {
|
HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenEventHostSyncCalledThenCallWaitUserFence, IsAtLeastXeHpCore) {
|
||||||
NEO::debugManager.flags.WaitForUserFenceOnEventHostSynchronize.set(1);
|
NEO::debugManager.flags.WaitForUserFenceOnEventHostSynchronize.set(1);
|
||||||
|
|
||||||
|
|||||||
@@ -80,12 +80,20 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
|||||||
|
|
||||||
void reset();
|
void reset();
|
||||||
bool isExternalMemoryExecInfo() const { return deviceCounterNode == nullptr; }
|
bool isExternalMemoryExecInfo() const { return deviceCounterNode == nullptr; }
|
||||||
|
void setLastWaitedCounterValue(uint64_t value) {
|
||||||
|
lastWaitedCounterValue = std::max(value, lastWaitedCounterValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isCounterAlreadyDone(uint64_t waitValue) const {
|
||||||
|
return lastWaitedCounterValue >= waitValue && this->allocationOffset == 0u;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
NEO::MemoryManager &memoryManager;
|
NEO::MemoryManager &memoryManager;
|
||||||
NEO::TagNodeBase *deviceCounterNode = nullptr;
|
NEO::TagNodeBase *deviceCounterNode = nullptr;
|
||||||
NEO::TagNodeBase *hostCounterNode = nullptr;
|
NEO::TagNodeBase *hostCounterNode = nullptr;
|
||||||
uint64_t counterValue = 0;
|
uint64_t counterValue = 0;
|
||||||
|
uint64_t lastWaitedCounterValue = 0;
|
||||||
|
|
||||||
uint64_t regularCmdListSubmissionCounter = 0;
|
uint64_t regularCmdListSubmissionCounter = 0;
|
||||||
uint64_t deviceAddress = 0;
|
uint64_t deviceAddress = 0;
|
||||||
|
|||||||
@@ -184,6 +184,37 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
|
|||||||
tempNode2->returnTag();
|
tempNode2->returnTag();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandEncoderTests, givenInOrderExecutionInfoWhenSetLastCounterValueIsCalledThenItReturnsProperQueries) {
|
||||||
|
MockDevice mockDevice;
|
||||||
|
|
||||||
|
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||||
|
MockMemoryManager memoryManager(mockExecutionEnvironment);
|
||||||
|
|
||||||
|
MockTagAllocator<DeviceAllocNodeType<true>> tagAllocator(0, mockDevice.getMemoryManager());
|
||||||
|
auto node = tagAllocator.getTag();
|
||||||
|
|
||||||
|
auto inOrderExecInfo = std::make_unique<InOrderExecInfo>(node, nullptr, memoryManager, 2, 0, true, false);
|
||||||
|
inOrderExecInfo->setLastWaitedCounterValue(1u);
|
||||||
|
|
||||||
|
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u));
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||||
|
|
||||||
|
inOrderExecInfo->setLastWaitedCounterValue(0u);
|
||||||
|
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u));
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||||
|
|
||||||
|
inOrderExecInfo->setLastWaitedCounterValue(3u);
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||||
|
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(3u));
|
||||||
|
|
||||||
|
inOrderExecInfo->setAllocationOffset(4u);
|
||||||
|
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||||
|
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(3u));
|
||||||
|
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenPatchingThenSetCorrectValues) {
|
HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenPatchingThenSetCorrectValues) {
|
||||||
MockDevice mockDevice;
|
MockDevice mockDevice;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user