performance: Allow power efficient waits with ULLS light

Resolves: HSD-18041633458
Related-To: NEO-13922

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2025-03-05 14:50:30 +00:00 committed by Compute-Runtime-Automation
parent a93cecac36
commit 61fe9ebb8f
7 changed files with 29 additions and 4 deletions

View File

@ -50,6 +50,7 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
constexpr static size_t ullsLightTimeout = 2'000'000;
std::chrono::steady_clock::time_point lastUllsLightExecTimestamp{};
int boHandleForExec = 0;
std::vector<BufferObject *> residency{};
std::vector<ExecObject> execObjectsStorage{};

View File

@ -144,6 +144,7 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
this->handleResidency();
} else {
this->lastUllsLightExecTimestamp = std::chrono::steady_clock::now();
this->boHandleForExec = bb->peekHandle();
}
auto currentBase = this->ringCommandStream.getGraphicsAllocation()->getGpuAddress();
@ -257,7 +258,7 @@ uint64_t DrmDirectSubmission<GfxFamily, Dispatcher>::updateTagValue(bool require
this->currentTagData.tagValue++;
this->ringBuffers[this->currentRingBuffer].completionFence = this->currentTagData.tagValue;
}
return 0ull;
return boHandleForExec;
}
template <typename GfxFamily, typename Dispatcher>

View File

@ -120,6 +120,9 @@ int Drm::ioctl(DrmIoctl request, void *arg) {
end = std::chrono::steady_clock::now();
long long elapsedTime = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
static std::mutex mtx;
std::lock_guard lock(mtx);
IoctlStatisticsEntry ioctlData{};
auto ioctlDataIt = this->ioctlStatistics.find(request);
if (ioctlDataIt != this->ioctlStatistics.end()) {

View File

@ -22,7 +22,7 @@ int ProductHelperHw<gfxProduct>::configureHardwareCustom(HardwareInfo *hwInfo, O
kmdNotifyProperties.enableKmdNotify = true;
kmdNotifyProperties.delayKmdNotifyMicroseconds = 150;
kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission = true;
kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds = 20;
kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds = 28000;
return 0;
}

View File

@ -994,6 +994,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmi
batchBuffer.startOffset = 4;
uint8_t bbStart[64];
batchBuffer.endCmdPtr = &bbStart[0];
static_cast<DrmMockCustom *>(static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr)->drm)->isVmBindAvailableCall.callParent = false;
auto flushStamp = csr->obtainCurrentFlushStamp();
csr->flush(batchBuffer, csr->getResidencyAllocations());
@ -1005,6 +1006,25 @@ HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmi
static_cast<MockDrmDirectSubmission<FamilyType> *>(directSubmission)->currentTagData.tagValue = 0u;
}
HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionLightWhenFlushThenFlushStampIsUpdated) {
auto &cs = csr->getCS();
CommandStreamReceiverHw<FamilyType>::addBatchBufferEnd(cs, nullptr);
EncodeNoop<FamilyType>::alignToCacheLine(cs);
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
batchBuffer.startOffset = 4;
uint8_t bbStart[64];
batchBuffer.endCmdPtr = &bbStart[0];
auto flushStamp = csr->obtainCurrentFlushStamp();
csr->flush(batchBuffer, csr->getResidencyAllocations());
EXPECT_NE(csr->obtainCurrentFlushStamp(), flushStamp);
auto directSubmission = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr)->directSubmission.get();
ASSERT_NE(nullptr, directSubmission);
static_cast<MockDrmDirectSubmission<FamilyType> *>(directSubmission)->currentTagData.tagValue = 0u;
}
HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionWhenFlushThenCommandBufferAllocationIsNotAddedToHandlerResidencySet) {
mock->bindAvailable = true;
auto &cs = csr->getCS();

View File

@ -36,7 +36,7 @@ ARLTEST_F(ArlProductHelperLinux, GivenArlWhenConfigureHardwareCustomThenKmdNotif
EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_EQ(150ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds);
EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission);
EXPECT_EQ(20ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds);
EXPECT_EQ(28000ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds);
}
ARLTEST_F(ArlProductHelperLinux, givenArlWhenIsBlitterForImagesSupportedIsCalledThenTrueIsReturned) {

View File

@ -36,7 +36,7 @@ MTLTEST_F(MtlProductHelperLinux, GivenMtlWhenConfigureHardwareCustomThenKmdNotif
EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify);
EXPECT_EQ(150ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds);
EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission);
EXPECT_EQ(20ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds);
EXPECT_EQ(28000ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds);
}
MTLTEST_F(MtlProductHelperLinux, givenMtlWhenIsBlitterForImagesSupportedIsCalledThenTrueIsReturned) {