fix(ocl): reduce busy waiting in clFinish
Use flushStamp=taskCount when passed flushStamp==0. This will cause driver to busy wait for a short while before falling back to use kmd notify. Related-To: GSD-3612 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
parent
02436b8877
commit
60d5e22f3b
|
@ -442,6 +442,9 @@ WaitStatus CommandQueue::waitUntilComplete(TaskCountType gpgpuTaskCountToWait, R
|
|||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag());
|
||||
|
||||
if (!skipWait) {
|
||||
if (flushStampToWait == 0 && getGpgpuCommandStreamReceiver().isKmdWaitOnTaskCountAllowed()) {
|
||||
flushStampToWait = gpgpuTaskCountToWait;
|
||||
}
|
||||
waitStatus = getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait,
|
||||
flushStampToWait,
|
||||
useQuickKmdSleep,
|
||||
|
|
|
@ -280,7 +280,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModeAndCal
|
|||
EXPECT_EQ(1, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs);
|
||||
}
|
||||
|
||||
HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButThereIsNoFlushStampeAndCallWaitThenTimeoutIsDisabled) {
|
||||
HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButThereIsNoFlushStampAndCallWaitThenTimeoutIsDisabled) {
|
||||
overrideKmdNotifyParams(false, 3, false, 2, false, 9999999, false, 0);
|
||||
auto csr = createMockCsr<FamilyType>();
|
||||
|
||||
|
@ -292,6 +292,19 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButTher
|
|||
EXPECT_EQ(0, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs);
|
||||
}
|
||||
|
||||
HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModAndThereIsNoFlushStampButKmdWaitOnTaskCountAllowedAndCallWaitThenTimeoutIsEnabled) {
|
||||
overrideKmdNotifyParams(false, 3, false, 2, false, 9999999, false, 0);
|
||||
auto csr = createMockCsr<FamilyType>();
|
||||
csr->isKmdWaitOnTaskCountAllowedValue = true;
|
||||
|
||||
cmdQ->throttle = QueueThrottle::LOW;
|
||||
|
||||
cmdQ->waitUntilComplete(1, {}, 0, false);
|
||||
EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled);
|
||||
EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout);
|
||||
EXPECT_EQ(1, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs);
|
||||
}
|
||||
|
||||
HWTEST_F(KmdNotifyTests, givenQuickSleepRequestWhenItsSporadicWaitOptimizationIsDisabledThenDontOverrideQuickSleepRequest) {
|
||||
overrideKmdNotifyParams(true, 3, true, 2, false, 0, false, 0);
|
||||
auto csr = createMockCsr<FamilyType>();
|
||||
|
|
|
@ -297,6 +297,10 @@ class CommandStreamReceiver {
|
|||
return false;
|
||||
}
|
||||
|
||||
virtual bool isKmdWaitOnTaskCountAllowed() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void stopDirectSubmission() {}
|
||||
|
||||
bool isStaticWorkPartitioningEnabled() const {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -49,6 +49,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
|
|||
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
|
||||
bool waitForFlushStamp(FlushStamp &flushStampToWait) override;
|
||||
bool isKmdWaitModeActive() override;
|
||||
bool isKmdWaitOnTaskCountAllowed() const override;
|
||||
|
||||
DrmMemoryManager *getMemoryManager() const;
|
||||
GmmPageTableMngr *createPageTableManager() override;
|
||||
|
|
|
@ -341,4 +341,9 @@ template <typename GfxFamily>
|
|||
inline bool DrmCommandStreamReceiver<GfxFamily>::isUserFenceWaitActive() {
|
||||
return (this->drm->isVmBindAvailable() && useUserFenceWait);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool DrmCommandStreamReceiver<GfxFamily>::isKmdWaitOnTaskCountAllowed() const {
|
||||
return this->isDirectSubmissionEnabled();
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -362,6 +362,13 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
|||
return blitterDirectSubmissionAvailable;
|
||||
}
|
||||
|
||||
bool isKmdWaitOnTaskCountAllowed() const override {
|
||||
if (callBaseIsKmdWaitOnTaskCountAllowed) {
|
||||
return BaseClass::isKmdWaitOnTaskCountAllowed();
|
||||
}
|
||||
return isKmdWaitOnTaskCountAllowedValue;
|
||||
}
|
||||
|
||||
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
|
||||
createAllocationForHostSurfaceCalled++;
|
||||
cpuCopyForHostPtrSurfaceAllowed = surface.peekIsPtrCopyAllowed();
|
||||
|
@ -449,6 +456,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
|||
bool callBaseFlushBcsTask{true};
|
||||
bool callBaseSendRenderStateCacheFlush = true;
|
||||
bool forceReturnGpuHang = false;
|
||||
bool callBaseIsKmdWaitOnTaskCountAllowed = false;
|
||||
bool isKmdWaitOnTaskCountAllowedValue = false;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -728,6 +728,12 @@ HWTEST_F(CommandStreamReceiverTest, givenNoDirectSubmissionWhenCheckTaskCountFro
|
|||
EXPECT_FALSE(csr.isUpdateTagFromWaitEnabled());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenCsrWhenCheckKmdWaitOnTaskCountEnabledThenReturnsFalse) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.callBaseIsKmdWaitOnTaskCountAllowed = true;
|
||||
EXPECT_FALSE(csr.isKmdWaitOnTaskCountAllowed());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenCheckTaskCountFromWaitEnabledThenProperValueReturned) {
|
||||
DebugManagerStateRestore restorer;
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
|
|
@ -65,6 +65,11 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenEnabledDirectSubmissionWhenGetting
|
|||
*csr.completionFenceValuePointer = 0;
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenDisabledDirectSubmissionWhenCheckingIsKmdWaitOnTaskCountAllowedThenFalseIsReturned) {
|
||||
EXPECT_FALSE(csr->isDirectSubmissionEnabled());
|
||||
EXPECT_FALSE(csr->isKmdWaitOnTaskCountAllowed());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmCommandStreamTest, whenGettingCompletionAddressThenOffsettedTagAddressIsReturned) {
|
||||
csr->initializeTagAllocation();
|
||||
EXPECT_NE(nullptr, csr->getTagAddress());
|
||||
|
|
|
@ -825,6 +825,11 @@ struct MockDrmDirectSubmissionToTestDtor : public DrmDirectSubmission<GfxFamily,
|
|||
DrmDirectSubmissionFunctionsCalled &functionsCalled;
|
||||
};
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionWhenCheckingIsKmdWaitOnTaskCountAllowedThenTrueIsReturned) {
|
||||
EXPECT_TRUE(csr->isDirectSubmissionEnabled());
|
||||
EXPECT_TRUE(csr->isKmdWaitOnTaskCountAllowed());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionWhenDtorIsCalledButRingIsNotStartedThenDontCallStopRingBufferNorWaitForTagValue) {
|
||||
DrmDirectSubmissionFunctionsCalled functionsCalled{};
|
||||
auto directSubmission = std::make_unique<MockDrmDirectSubmissionToTestDtor<FamilyType>>(*device->getDefaultEngine().commandStreamReceiver, functionsCalled);
|
||||
|
|
Loading…
Reference in New Issue