From d3355411b878740a6eadacd911cd20d889f7971f Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Wed, 30 Aug 2023 10:05:58 +0000 Subject: [PATCH] performance: Improve dispatch monitor fence from wait -update last submitted value while flush monitor fence -dispatch monitor fence with workload when stalling cmds present Signed-off-by: Lukasz Jobczyk --- .../direct_submission/direct_submission_hw.h | 3 +- .../direct_submission_hw.inl | 9 ++++-- .../linux/drm_direct_submission.h | 2 +- .../linux/drm_direct_submission.inl | 2 +- .../windows/wddm_direct_submission.h | 4 ++- .../windows/wddm_direct_submission.inl | 30 ++++++++++++------- .../common/mocks/mock_direct_submission_hw.h | 2 +- .../linux/drm_direct_submission_tests.cpp | 4 +-- .../windows/wddm_direct_submission_tests.cpp | 20 +++++++++++-- 9 files changed, 55 insertions(+), 21 deletions(-) diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 7818c29a66..dd9f415e57 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -116,7 +116,8 @@ class DirectSubmissionHw { virtual uint64_t switchRingBuffers(); virtual void handleSwitchRingBuffers() = 0; GraphicsAllocation *switchRingBuffersAllocations(); - virtual uint64_t updateTagValue() = 0; + virtual uint64_t updateTagValue(bool hasStallingCmds) = 0; + virtual bool dispatchMonitorFenceRequired(bool hasStallingCmds); virtual void getTagAddressValue(TagData &tagData) = 0; void unblockGpu(); bool copyCommandBufferIntoRing(BatchBuffer &batchBuffer); diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index ee659d6aee..b5dbb025fc 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -767,7 +767,7 @@ void *DirectSubmissionHw::dispatchWorkloadSection(BatchBu Dispatcher::dispatchCacheFlush(ringCommandStream, this->rootDeviceEnvironment, gpuVaForMiFlush); } - if (!disableMonitorFence) { + if (!disableMonitorFence || this->dispatchMonitorFenceRequired(batchBuffer.hasStallingCmds)) { TagData currentTagData = {}; getTagAddressValue(currentTagData); Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, this->rootDeviceEnvironment, @@ -981,7 +981,7 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe currentQueueWorkCount++; DirectSubmissionDiagnostics::diagnosticModeOneSubmit(diagnostic.get()); - uint64_t flushValue = updateTagValue(); + uint64_t flushValue = updateTagValue(batchBuffer.hasStallingCmds); flushStamp.setStamp(flushValue); return ringStart; @@ -1087,6 +1087,11 @@ inline GraphicsAllocation *DirectSubmissionHw::switchRing return nextAllocation; } +template +bool DirectSubmissionHw::dispatchMonitorFenceRequired(bool hasStallingCmds) { + return false; +} + template void DirectSubmissionHw::deallocateResources() { for (uint32_t ringBufferIndex = 0; ringBufferIndex < this->ringBuffers.size(); ringBufferIndex++) { diff --git a/shared/source/direct_submission/linux/drm_direct_submission.h b/shared/source/direct_submission/linux/drm_direct_submission.h index 30b548a159..4b1f1260a9 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.h +++ b/shared/source/direct_submission/linux/drm_direct_submission.h @@ -31,7 +31,7 @@ class DrmDirectSubmission : public DirectSubmissionHw { void ensureRingCompletion() override; void handleSwitchRingBuffers() override; - uint64_t updateTagValue() override; + uint64_t updateTagValue(bool hasStallingCmds) override; void getTagAddressValue(TagData &tagData) override; bool isCompleted(uint32_t ringBufferIndex) override; bool isCompletionFenceSupported(); diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index 68c94ba8ad..a73360e548 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -217,7 +217,7 @@ void DrmDirectSubmission::handleSwitchRingBuffers() { } template -uint64_t DrmDirectSubmission::updateTagValue() { +uint64_t DrmDirectSubmission::updateTagValue(bool hasStallingCmds) { if (!this->disableMonitorFence) { this->currentTagData.tagValue++; this->ringBuffers[this->currentRingBuffer].completionFence = this->currentTagData.tagValue; diff --git a/shared/source/direct_submission/windows/wddm_direct_submission.h b/shared/source/direct_submission/windows/wddm_direct_submission.h index b6712bc90e..572d194ce8 100644 --- a/shared/source/direct_submission/windows/wddm_direct_submission.h +++ b/shared/source/direct_submission/windows/wddm_direct_submission.h @@ -34,7 +34,9 @@ class WddmDirectSubmission : public DirectSubmissionHw { void ensureRingCompletion() override; void handleSwitchRingBuffers() override; void handleStopRingBuffer() override; - uint64_t updateTagValue() override; + uint64_t updateTagValue(bool hasStallingCmds) override; + bool dispatchMonitorFenceRequired(bool hasStallingCmds) override; + uint64_t updateTagValueImpl(); void getTagAddressValue(TagData &tagData) override; bool isCompleted(uint32_t ringBufferIndex) override; diff --git a/shared/source/direct_submission/windows/wddm_direct_submission.inl b/shared/source/direct_submission/windows/wddm_direct_submission.inl index ce4034935b..9da594be19 100644 --- a/shared/source/direct_submission/windows/wddm_direct_submission.inl +++ b/shared/source/direct_submission/windows/wddm_direct_submission.inl @@ -81,7 +81,7 @@ inline void WddmDirectSubmission::flushMonitorFence() { this->unblockGpu(); this->currentQueueWorkCount++; - this->updateTagValue(); + this->updateTagValueImpl(); } template @@ -148,19 +148,29 @@ void WddmDirectSubmission::handleSwitchRingBuffers() { } template -uint64_t WddmDirectSubmission::updateTagValue() { - if (!this->disableMonitorFence) { - MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence(); - - currentFence.lastSubmittedFence = currentFence.currentFenceValue; - currentFence.currentFenceValue++; - this->ringBuffers[this->currentRingBuffer].completionFence = currentFence.lastSubmittedFence; - - return currentFence.lastSubmittedFence; +uint64_t WddmDirectSubmission::updateTagValue(bool hasStallingCmds) { + if (!this->disableMonitorFence || hasStallingCmds) { + return this->updateTagValueImpl(); } return 0ull; } +template +bool WddmDirectSubmission::dispatchMonitorFenceRequired(bool hasStallingCmds) { + return hasStallingCmds; +} + +template +uint64_t WddmDirectSubmission::updateTagValueImpl() { + MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence(); + + currentFence.lastSubmittedFence = currentFence.currentFenceValue; + currentFence.currentFenceValue++; + this->ringBuffers[this->currentRingBuffer].completionFence = currentFence.lastSubmittedFence; + + return currentFence.lastSubmittedFence; +} + template void WddmDirectSubmission::handleCompletionFence(uint64_t completionValue, MonitoredFence &fence) { wddm->waitFromCpu(completionValue, fence); diff --git a/shared/test/common/mocks/mock_direct_submission_hw.h b/shared/test/common/mocks/mock_direct_submission_hw.h index b6160f1ddd..7b4e2b4be7 100644 --- a/shared/test/common/mocks/mock_direct_submission_hw.h +++ b/shared/test/common/mocks/mock_direct_submission_hw.h @@ -146,7 +146,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw void handleSwitchRingBuffers() override {} - uint64_t updateTagValue() override { + uint64_t updateTagValue(bool hasStallingCmds) override { return updateTagValueReturn; } diff --git a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp index 4cd623cbc6..1e5b4f04ef 100644 --- a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp @@ -118,7 +118,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenDrmDirectSubmissionWhenCallingLinuxImplem EXPECT_NE(0ull, drmDirectSubmission.switchRingBuffers()); - EXPECT_EQ(0ull, drmDirectSubmission.updateTagValue()); + EXPECT_EQ(0ull, drmDirectSubmission.updateTagValue(false)); TagData tagData = {1ull, 1ull}; drmDirectSubmission.getTagAddressValue(tagData); @@ -662,7 +662,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenDisabledMonitorFenceWhenUpdateTagValueThe EXPECT_TRUE(ret); auto currentTag = directSubmission.currentTagData.tagValue; - directSubmission.updateTagValue(); + directSubmission.updateTagValue(false); auto updatedTag = directSubmission.currentTagData.tagValue; diff --git a/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp index 847c639852..6369747ec9 100644 --- a/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp @@ -424,7 +424,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenUpdatingTagValueThenExpectcomple MockWddmDirectSubmission> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); - uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(); + uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(false); EXPECT_EQ(value, actualTagValue); EXPECT_EQ(value + 1, contextFence.currentFenceValue); EXPECT_EQ(value, wddmDirectSubmission.ringBuffers[wddmDirectSubmission.currentRingBuffer].completionFence); @@ -440,11 +440,27 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmDisableMonitorFenceWhenUpdatingTagVa MockWddmDirectSubmission> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); wddmDirectSubmission.disableMonitorFence = true; - uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(); + uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(false); EXPECT_EQ(0ull, actualTagValue); EXPECT_EQ(value, contextFence.currentFenceValue); } +HWTEST_F(WddmDirectSubmissionTest, givenWddmDisableMonitorFenceAndStallingCmdsWhenUpdatingTagValueThenUpdateCompletionFence) { + uint64_t address = 0xFF00FF0000ull; + uint64_t value = 0x12345678ull; + MonitoredFence &contextFence = osContext->getResidencyController().getMonitoredFence(); + contextFence.gpuAddress = address; + contextFence.currentFenceValue = value; + + MockWddmDirectSubmission> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); + wddmDirectSubmission.disableMonitorFence = true; + + uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(true); + EXPECT_EQ(value, actualTagValue); + EXPECT_EQ(value + 1, contextFence.currentFenceValue); + EXPECT_EQ(value, wddmDirectSubmission.ringBuffers[wddmDirectSubmission.currentRingBuffer].completionFence); +} + HWTEST_F(WddmDirectSubmissionTest, givenWddmDisableMonitorFenceWhenHandleStopRingBufferThenExpectCompletionFenceUpdated) { uint64_t address = 0xFF00FF0000ull; uint64_t value = 0x12345678ull;