diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 09d52cf620..f5b7a23e32 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -442,6 +442,9 @@ WaitStatus CommandQueue::waitUntilComplete(TaskCountType gpgpuTaskCountToWait, R DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag()); if (!skipWait) { + if (flushStampToWait == 0 && getGpgpuCommandStreamReceiver().isKmdWaitOnTaskCountAllowed()) { + flushStampToWait = gpgpuTaskCountToWait; + } waitStatus = getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait, flushStampToWait, useQuickKmdSleep, diff --git a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp index 25e710f94e..a9a4dfa8c8 100644 --- a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp +++ b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp @@ -280,7 +280,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModeAndCal EXPECT_EQ(1, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } -HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButThereIsNoFlushStampeAndCallWaitThenTimeoutIsDisabled) { +HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButThereIsNoFlushStampAndCallWaitThenTimeoutIsDisabled) { overrideKmdNotifyParams(false, 3, false, 2, false, 9999999, false, 0); auto csr = createMockCsr(); @@ -292,6 +292,19 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButTher EXPECT_EQ(0, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } +HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModAndThereIsNoFlushStampButKmdWaitOnTaskCountAllowedAndCallWaitThenTimeoutIsEnabled) { + overrideKmdNotifyParams(false, 3, false, 2, false, 9999999, false, 0); + auto csr = createMockCsr(); + csr->isKmdWaitOnTaskCountAllowedValue = true; + + cmdQ->throttle = QueueThrottle::LOW; + + cmdQ->waitUntilComplete(1, {}, 0, false); + EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); + EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); + EXPECT_EQ(1, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); +} + HWTEST_F(KmdNotifyTests, givenQuickSleepRequestWhenItsSporadicWaitOptimizationIsDisabledThenDontOverrideQuickSleepRequest) { overrideKmdNotifyParams(true, 3, true, 2, false, 0, false, 0); auto csr = createMockCsr(); diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index bb2f1716cf..9282f931fe 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -297,6 +297,10 @@ class CommandStreamReceiver { return false; } + virtual bool isKmdWaitOnTaskCountAllowed() const { + return false; + } + virtual void stopDirectSubmission() {} bool isStaticWorkPartitioningEnabled() const { diff --git a/shared/source/os_interface/linux/drm_command_stream.h b/shared/source/os_interface/linux/drm_command_stream.h index 60701b9a03..1b8921b7b0 100644 --- a/shared/source/os_interface/linux/drm_command_stream.h +++ b/shared/source/os_interface/linux/drm_command_stream.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -49,6 +49,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver { void makeNonResident(GraphicsAllocation &gfxAllocation) override; bool waitForFlushStamp(FlushStamp &flushStampToWait) override; bool isKmdWaitModeActive() override; + bool isKmdWaitOnTaskCountAllowed() const override; DrmMemoryManager *getMemoryManager() const; GmmPageTableMngr *createPageTableManager() override; diff --git a/shared/source/os_interface/linux/drm_command_stream.inl b/shared/source/os_interface/linux/drm_command_stream.inl index d8a92e8cb9..e92586fdda 100644 --- a/shared/source/os_interface/linux/drm_command_stream.inl +++ b/shared/source/os_interface/linux/drm_command_stream.inl @@ -341,4 +341,9 @@ template inline bool DrmCommandStreamReceiver::isUserFenceWaitActive() { return (this->drm->isVmBindAvailable() && useUserFenceWait); } + +template +bool DrmCommandStreamReceiver::isKmdWaitOnTaskCountAllowed() const { + return this->isDirectSubmissionEnabled(); +} } // namespace NEO diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 8f4cf93d8c..ccda7fcbed 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -362,6 +362,13 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ return blitterDirectSubmissionAvailable; } + bool isKmdWaitOnTaskCountAllowed() const override { + if (callBaseIsKmdWaitOnTaskCountAllowed) { + return BaseClass::isKmdWaitOnTaskCountAllowed(); + } + return isKmdWaitOnTaskCountAllowedValue; + } + bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { createAllocationForHostSurfaceCalled++; cpuCopyForHostPtrSurfaceAllowed = surface.peekIsPtrCopyAllowed(); @@ -449,6 +456,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ bool callBaseFlushBcsTask{true}; bool callBaseSendRenderStateCacheFlush = true; bool forceReturnGpuHang = false; + bool callBaseIsKmdWaitOnTaskCountAllowed = false; + bool isKmdWaitOnTaskCountAllowedValue = false; }; } // namespace NEO diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index cc2d8d3f26..ad7ff5c7cb 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -728,6 +728,12 @@ HWTEST_F(CommandStreamReceiverTest, givenNoDirectSubmissionWhenCheckTaskCountFro EXPECT_FALSE(csr.isUpdateTagFromWaitEnabled()); } +HWTEST_F(CommandStreamReceiverTest, givenCsrWhenCheckKmdWaitOnTaskCountEnabledThenReturnsFalse) { + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.callBaseIsKmdWaitOnTaskCountAllowed = true; + EXPECT_FALSE(csr.isKmdWaitOnTaskCountAllowed()); +} + HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenCheckTaskCountFromWaitEnabledThenProperValueReturned) { DebugManagerStateRestore restorer; auto &csr = pDevice->getUltCommandStreamReceiver(); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp index efbf69c455..c81e986965 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp @@ -65,6 +65,11 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenEnabledDirectSubmissionWhenGetting *csr.completionFenceValuePointer = 0; } +HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenDisabledDirectSubmissionWhenCheckingIsKmdWaitOnTaskCountAllowedThenFalseIsReturned) { + EXPECT_FALSE(csr->isDirectSubmissionEnabled()); + EXPECT_FALSE(csr->isKmdWaitOnTaskCountAllowed()); +} + HWTEST_TEMPLATED_F(DrmCommandStreamTest, whenGettingCompletionAddressThenOffsettedTagAddressIsReturned) { csr->initializeTagAllocation(); EXPECT_NE(nullptr, csr->getTagAddress()); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp index d8b572496f..d3940f6359 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp @@ -825,6 +825,11 @@ struct MockDrmDirectSubmissionToTestDtor : public DrmDirectSubmissionisDirectSubmissionEnabled()); + EXPECT_TRUE(csr->isKmdWaitOnTaskCountAllowed()); +} + HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionWhenDtorIsCalledButRingIsNotStartedThenDontCallStopRingBufferNorWaitForTagValue) { DrmDirectSubmissionFunctionsCalled functionsCalled{}; auto directSubmission = std::make_unique>(*device->getDefaultEngine().commandStreamReceiver, functionsCalled);