From 61fe9ebb8f9f34e31cdcc1c344ae91f739963e92 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Wed, 5 Mar 2025 14:50:30 +0000 Subject: [PATCH] performance: Allow power efficient waits with ULLS light Resolves: HSD-18041633458 Related-To: NEO-13922 Signed-off-by: Lukasz Jobczyk --- .../linux/drm_direct_submission.h | 1 + .../linux/drm_direct_submission.inl | 3 ++- shared/source/os_interface/linux/drm_neo.cpp | 3 +++ .../linux/product_helper_xe_lpg_linux.inl | 2 +- .../linux/drm_command_stream_tests_1.cpp | 20 +++++++++++++++++++ .../arl/linux/product_helper_tests_arl.cpp | 2 +- .../linux/product_helper_tests_mtl_linux.cpp | 2 +- 7 files changed, 29 insertions(+), 4 deletions(-) diff --git a/shared/source/direct_submission/linux/drm_direct_submission.h b/shared/source/direct_submission/linux/drm_direct_submission.h index 72c9e4533d..03b49f4604 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.h +++ b/shared/source/direct_submission/linux/drm_direct_submission.h @@ -50,6 +50,7 @@ class DrmDirectSubmission : public DirectSubmissionHw { constexpr static size_t ullsLightTimeout = 2'000'000; std::chrono::steady_clock::time_point lastUllsLightExecTimestamp{}; + int boHandleForExec = 0; std::vector residency{}; std::vector execObjectsStorage{}; diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index 0a1668660a..eb11b28de6 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -144,6 +144,7 @@ bool DrmDirectSubmission::submit(uint64_t gpuAddress, siz this->handleResidency(); } else { this->lastUllsLightExecTimestamp = std::chrono::steady_clock::now(); + this->boHandleForExec = bb->peekHandle(); } auto currentBase = this->ringCommandStream.getGraphicsAllocation()->getGpuAddress(); @@ -257,7 +258,7 @@ uint64_t DrmDirectSubmission::updateTagValue(bool require this->currentTagData.tagValue++; this->ringBuffers[this->currentRingBuffer].completionFence = this->currentTagData.tagValue; } - return 0ull; + return boHandleForExec; } template diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp index 33082e70e6..361b729c90 100644 --- a/shared/source/os_interface/linux/drm_neo.cpp +++ b/shared/source/os_interface/linux/drm_neo.cpp @@ -120,6 +120,9 @@ int Drm::ioctl(DrmIoctl request, void *arg) { end = std::chrono::steady_clock::now(); long long elapsedTime = std::chrono::duration_cast(end - start).count(); + static std::mutex mtx; + std::lock_guard lock(mtx); + IoctlStatisticsEntry ioctlData{}; auto ioctlDataIt = this->ioctlStatistics.find(request); if (ioctlDataIt != this->ioctlStatistics.end()) { diff --git a/shared/source/xe_hpg_core/xe_lpg/linux/product_helper_xe_lpg_linux.inl b/shared/source/xe_hpg_core/xe_lpg/linux/product_helper_xe_lpg_linux.inl index 614c035ec3..e6657f4861 100644 --- a/shared/source/xe_hpg_core/xe_lpg/linux/product_helper_xe_lpg_linux.inl +++ b/shared/source/xe_hpg_core/xe_lpg/linux/product_helper_xe_lpg_linux.inl @@ -22,7 +22,7 @@ int ProductHelperHw::configureHardwareCustom(HardwareInfo *hwInfo, O kmdNotifyProperties.enableKmdNotify = true; kmdNotifyProperties.delayKmdNotifyMicroseconds = 150; kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission = true; - kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds = 20; + kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds = 28000; return 0; } diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp index 8b0b2a2d36..9604cc5c9e 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp @@ -994,6 +994,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmi batchBuffer.startOffset = 4; uint8_t bbStart[64]; batchBuffer.endCmdPtr = &bbStart[0]; + static_cast(static_cast *>(csr)->drm)->isVmBindAvailableCall.callParent = false; auto flushStamp = csr->obtainCurrentFlushStamp(); csr->flush(batchBuffer, csr->getResidencyAllocations()); @@ -1005,6 +1006,25 @@ HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmi static_cast *>(directSubmission)->currentTagData.tagValue = 0u; } +HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionLightWhenFlushThenFlushStampIsUpdated) { + auto &cs = csr->getCS(); + CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); + EncodeNoop::alignToCacheLine(cs); + BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed()); + batchBuffer.startOffset = 4; + uint8_t bbStart[64]; + batchBuffer.endCmdPtr = &bbStart[0]; + + auto flushStamp = csr->obtainCurrentFlushStamp(); + csr->flush(batchBuffer, csr->getResidencyAllocations()); + + EXPECT_NE(csr->obtainCurrentFlushStamp(), flushStamp); + + auto directSubmission = static_cast *>(csr)->directSubmission.get(); + ASSERT_NE(nullptr, directSubmission); + static_cast *>(directSubmission)->currentTagData.tagValue = 0u; +} + HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionWhenFlushThenCommandBufferAllocationIsNotAddedToHandlerResidencySet) { mock->bindAvailable = true; auto &cs = csr->getCS(); diff --git a/shared/test/unit_test/xe_hpg_core/arl/linux/product_helper_tests_arl.cpp b/shared/test/unit_test/xe_hpg_core/arl/linux/product_helper_tests_arl.cpp index 0ad8bb9820..03a7c4f9a9 100644 --- a/shared/test/unit_test/xe_hpg_core/arl/linux/product_helper_tests_arl.cpp +++ b/shared/test/unit_test/xe_hpg_core/arl/linux/product_helper_tests_arl.cpp @@ -36,7 +36,7 @@ ARLTEST_F(ArlProductHelperLinux, GivenArlWhenConfigureHardwareCustomThenKmdNotif EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(150ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); - EXPECT_EQ(20ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); + EXPECT_EQ(28000ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } ARLTEST_F(ArlProductHelperLinux, givenArlWhenIsBlitterForImagesSupportedIsCalledThenTrueIsReturned) { diff --git a/shared/test/unit_test/xe_hpg_core/mtl/linux/product_helper_tests_mtl_linux.cpp b/shared/test/unit_test/xe_hpg_core/mtl/linux/product_helper_tests_mtl_linux.cpp index 174875c0c5..263386de04 100644 --- a/shared/test/unit_test/xe_hpg_core/mtl/linux/product_helper_tests_mtl_linux.cpp +++ b/shared/test/unit_test/xe_hpg_core/mtl/linux/product_helper_tests_mtl_linux.cpp @@ -36,7 +36,7 @@ MTLTEST_F(MtlProductHelperLinux, GivenMtlWhenConfigureHardwareCustomThenKmdNotif EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(150ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); - EXPECT_EQ(20ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); + EXPECT_EQ(28000ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } MTLTEST_F(MtlProductHelperLinux, givenMtlWhenIsBlitterForImagesSupportedIsCalledThenTrueIsReturned) {