mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
performance: Modify wait flow when signal event is used for sub copy
Related-To: NEO-13003 Signed-off-by: Bellekallu Rajkiran <bellekallu.rajkiran@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
7861610e52
commit
791558ba74
@@ -185,11 +185,21 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event
|
||||
|
||||
this->addResidency(inOrderExecInfo->getDeviceCounterAllocation(), inOrderExecInfo->getHostCounterAllocation());
|
||||
|
||||
if (signalEvent && signalEvent->getInOrderIncrementValue(this->partitionCount) == 0) {
|
||||
if (signalEvent->isCounterBased() || nonWalkerInOrderCmdsChaining || (isImmediateType() && this->duplicatedInOrderCounterStorageEnabled)) {
|
||||
assignInOrderExecInfoToEvent(signalEvent);
|
||||
if (signalEvent) {
|
||||
if (signalEvent->getInOrderIncrementValue(this->partitionCount) == 0) {
|
||||
if (signalEvent->isCounterBased() || nonWalkerInOrderCmdsChaining || (isImmediateType() && this->duplicatedInOrderCounterStorageEnabled)) {
|
||||
assignInOrderExecInfoToEvent(signalEvent);
|
||||
} else {
|
||||
signalEvent->unsetInOrderExecInfo();
|
||||
}
|
||||
} else {
|
||||
signalEvent->unsetInOrderExecInfo();
|
||||
auto incrementValue = signalEvent->getInOrderIncrementValue(1);
|
||||
auto currentUsage = signalEvent->getInOrderExecInfo()->getAggregatedEventUsageCounter();
|
||||
|
||||
if ((currentUsage + incrementValue) > signalEvent->getInOrderExecBaseSignalValue()) {
|
||||
signalEvent->getInOrderExecInfo()->resetAggregatedEventUsageCounter();
|
||||
}
|
||||
signalEvent->getInOrderExecInfo()->addAggregatedEventUsageCounter(incrementValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -120,14 +120,12 @@ ze_result_t BcsSplit::appendSplitCall(CommandListCoreFamilyImmediate<gfxCoreFami
|
||||
const bool dualStreamCopyOffload = cmdList->isDualStreamCopyOffloadOperation(cmdList->isCopyOffloadEnabled());
|
||||
|
||||
if (useSignalEventForSubcopy && cmdList->isInOrderExecutionEnabled()) {
|
||||
for (size_t i = 0; i < cmdListsForSplit.size(); i++) {
|
||||
auto subCmdList = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(cmdListsForSplit[i]);
|
||||
auto &subInOrderExecInfo = subCmdList->getInOrderExecInfo();
|
||||
cmdList->appendWaitOnInOrderDependency(subInOrderExecInfo, nullptr,
|
||||
subInOrderExecInfo->getCounterValue(),
|
||||
subInOrderExecInfo->getAllocationOffset(),
|
||||
hasRelaxedOrderingDependencies, false, false, false, dualStreamCopyOffload);
|
||||
}
|
||||
auto currentCounter = signalEvent->getInOrderExecInfo()->getAggregatedEventUsageCounter();
|
||||
auto expectedCounter = currentCounter + signalEvent->getInOrderIncrementValue(1);
|
||||
cmdList->appendWaitOnInOrderDependency(signalEvent->getInOrderExecInfo(), nullptr,
|
||||
expectedCounter,
|
||||
signalEvent->getInOrderAllocationOffset(),
|
||||
hasRelaxedOrderingDependencies, false, false, false, dualStreamCopyOffload);
|
||||
}
|
||||
|
||||
if (!useSignalEventForSubcopy) {
|
||||
|
||||
@@ -120,6 +120,7 @@ void InOrderExecInfo::initializeAllocationsFromHost() {
|
||||
void InOrderExecInfo::reset() {
|
||||
resetCounterValue();
|
||||
regularCmdListSubmissionCounter = 0;
|
||||
aggregatedEventUsageCounter = 0;
|
||||
allocationOffset = 0;
|
||||
|
||||
initializeAllocationsFromHost();
|
||||
|
||||
@@ -88,6 +88,10 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
||||
uint64_t getRegularCmdListSubmissionCounter() const { return regularCmdListSubmissionCounter; }
|
||||
void addRegularCmdListSubmissionCounter(uint64_t addValue) { regularCmdListSubmissionCounter += addValue; }
|
||||
|
||||
uint64_t getAggregatedEventUsageCounter() const { return aggregatedEventUsageCounter; }
|
||||
void addAggregatedEventUsageCounter(uint64_t addValue) { aggregatedEventUsageCounter += addValue; }
|
||||
void resetAggregatedEventUsageCounter() { aggregatedEventUsageCounter = 0; }
|
||||
|
||||
bool isRegularCmdList() const { return regularCmdList; }
|
||||
bool isHostStorageDuplicated() const { return duplicatedHostStorage; }
|
||||
bool isAtomicDeviceSignalling() const { return atomicDeviceSignalling; }
|
||||
@@ -132,6 +136,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
||||
|
||||
uint64_t counterValue = 0;
|
||||
uint64_t regularCmdListSubmissionCounter = 0;
|
||||
uint64_t aggregatedEventUsageCounter = 0;
|
||||
uint64_t deviceAddress = 0;
|
||||
uint64_t *hostAddress = nullptr;
|
||||
uint32_t numDevicePartitionsToWait = 0;
|
||||
|
||||
@@ -1066,3 +1066,29 @@ HWTEST_F(CommandEncoderTests, whenGetScratchPtrOffsetOfImplicitArgsIsCalledThenZ
|
||||
auto scratchOffset = EncodeDispatchKernel<FamilyType>::getScratchPtrOffsetOfImplicitArgs();
|
||||
EXPECT_EQ(0u, scratchOffset);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenAggregatedEventUsageCounterIsUsedThenVerifyCorrectBehavior) {
|
||||
MockDevice mockDevice;
|
||||
|
||||
uint64_t counterValue = 20;
|
||||
uint64_t *hostAddress = &counterValue;
|
||||
uint64_t gpuAddress = castToUint64(ptrOffset(&counterValue, 64));
|
||||
|
||||
MockGraphicsAllocation deviceAlloc(nullptr, gpuAddress, 1);
|
||||
|
||||
auto inOrderExecInfo = InOrderExecInfo::createFromExternalAllocation(mockDevice, &deviceAlloc, gpuAddress, nullptr, hostAddress, counterValue, 1, 1);
|
||||
|
||||
EXPECT_EQ(0u, inOrderExecInfo->getAggregatedEventUsageCounter());
|
||||
|
||||
inOrderExecInfo->addAggregatedEventUsageCounter(5);
|
||||
EXPECT_EQ(5u, inOrderExecInfo->getAggregatedEventUsageCounter());
|
||||
|
||||
inOrderExecInfo->addAggregatedEventUsageCounter(10);
|
||||
EXPECT_EQ(15u, inOrderExecInfo->getAggregatedEventUsageCounter());
|
||||
|
||||
inOrderExecInfo->resetAggregatedEventUsageCounter();
|
||||
EXPECT_EQ(0u, inOrderExecInfo->getAggregatedEventUsageCounter());
|
||||
|
||||
inOrderExecInfo->addAggregatedEventUsageCounter(7);
|
||||
EXPECT_EQ(7u, inOrderExecInfo->getAggregatedEventUsageCounter());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user