performance: Wait in busy loop with timeout before Gdi sync object

Resolves: NEO-8343

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2023-09-08 12:08:28 +00:00 committed by Compute-Runtime-Automation
parent 66c030b5a0
commit e9f199bcad
11 changed files with 35 additions and 23 deletions

View File

@ -173,7 +173,7 @@ uint64_t WddmDirectSubmission<GfxFamily, Dispatcher>::updateTagValueImpl() {
template <typename GfxFamily, typename Dispatcher>
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionFence(uint64_t completionValue, MonitoredFence &fence) {
wddm->waitFromCpu(completionValue, fence);
wddm->waitFromCpu(completionValue, fence, false);
}
template <typename GfxFamily, typename Dispatcher>

View File

@ -133,7 +133,7 @@ bool WddmResidencyController::trimResidencyToBudget(uint64_t bytes) {
uint64_t sizeToTrim = 0;
if (lastFence > *monitoredFence.cpuAddress) {
this->wddm.waitFromCpu(lastFence, this->getMonitoredFence());
this->wddm.waitFromCpu(lastFence, this->getMonitoredFence(), false);
}
if (wddmAllocation->fragmentsStorage.fragmentCount == 0) {

View File

@ -1068,7 +1068,7 @@ bool Wddm::waitOnGPU(D3DKMT_HANDLE context) {
return status == STATUS_SUCCESS;
}
bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) {
bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait) {
NTSTATUS status = STATUS_SUCCESS;
if (!skipResourceCleanup() && lastFenceValue > *monitoredFence.cpuAddress) {
@ -1079,14 +1079,26 @@ bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredF
});
}
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU waitFromCpu = {};
waitFromCpu.ObjectCount = 1;
waitFromCpu.ObjectHandleArray = &monitoredFence.fenceHandle;
waitFromCpu.FenceValueArray = &lastFenceValue;
waitFromCpu.hDevice = device;
waitFromCpu.hAsyncEvent = NULL_HANDLE;
status = getGdi()->waitForSynchronizationObjectFromCpu(&waitFromCpu);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
if (busyWait) {
constexpr int64_t timeout = 20;
int64_t timeDiff = 0u;
auto waitStartTime = std::chrono::high_resolution_clock::now();
while (lastFenceValue > *monitoredFence.cpuAddress && timeDiff < timeout) {
auto currentTime = std::chrono::high_resolution_clock::now();
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(currentTime - waitStartTime).count();
}
}
if (lastFenceValue > *monitoredFence.cpuAddress) {
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU waitFromCpu = {};
waitFromCpu.ObjectCount = 1;
waitFromCpu.ObjectHandleArray = &monitoredFence.fenceHandle;
waitFromCpu.FenceValueArray = &lastFenceValue;
waitFromCpu.hDevice = device;
waitFromCpu.hAsyncEvent = NULL_HANDLE;
status = getGdi()->waitForSynchronizationObjectFromCpu(&waitFromCpu);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
}
}
return status == STATUS_SUCCESS;

View File

@ -94,7 +94,7 @@ class Wddm : public DriverModel {
MOCKABLE_VIRTUAL NTSTATUS createNTHandle(const D3DKMT_HANDLE *resourceHandle, HANDLE *ntHandle);
MOCKABLE_VIRTUAL bool submit(uint64_t commandBuffer, size_t size, void *commandHeader, WddmSubmitArguments &submitArguments);
MOCKABLE_VIRTUAL bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence);
MOCKABLE_VIRTUAL bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait);
MOCKABLE_VIRTUAL NTSTATUS escape(D3DKMT_ESCAPE &escapeCommand);
MOCKABLE_VIRTUAL VOID *registerTrimCallback(PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback, WddmResidencyController &residencyController);

View File

@ -153,7 +153,7 @@ WddmMemoryManager *WddmCommandStreamReceiver<GfxFamily>::getMemoryManager() cons
template <typename GfxFamily>
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait) {
return wddm->waitFromCpu(flushStampToWait, static_cast<OsContextWin *>(this->osContext)->getResidencyController().getMonitoredFence());
return wddm->waitFromCpu(flushStampToWait, static_cast<OsContextWin *>(this->osContext)->getResidencyController().getMonitoredFence(), false);
}
template <typename GfxFamily>

View File

@ -734,7 +734,7 @@ void WddmMemoryManager::handleFenceCompletion(GraphicsAllocation *allocation) {
if (lastFenceValue != 0u) {
const auto &monitoredFence = static_cast<OsContextWin *>(engine.osContext)->getResidencyController().getMonitoredFence();
const auto wddm = static_cast<OsContextWin *>(engine.osContext)->getWddm();
wddm->waitFromCpu(lastFenceValue, monitoredFence);
wddm->waitFromCpu(lastFenceValue, monitoredFence, engine.commandStreamReceiver->isAnyDirectSubmissionEnabled());
}
}
}

View File

@ -249,12 +249,12 @@ GMM_GFX_PARTITIONING *WddmMock::getGfxPartitionPtr() {
return &gfxPartition;
}
bool WddmMock::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) {
bool WddmMock::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait) {
waitFromCpuResult.called++;
waitFromCpuResult.uint64ParamPassed = lastFenceValue;
waitFromCpuResult.monitoredFence = &monitoredFence;
if (callBaseWaitFromCpu) {
return waitFromCpuResult.success = Wddm::waitFromCpu(lastFenceValue, monitoredFence);
return waitFromCpuResult.success = Wddm::waitFromCpu(lastFenceValue, monitoredFence, busyWait);
}
return waitFromCpuResult.success = true;
}

View File

@ -88,7 +88,7 @@ class WddmMock : public Wddm {
void setHwContextId(unsigned long hwContextId);
void setHeap32(uint64_t base, uint64_t size);
GMM_GFX_PARTITIONING *getGfxPartitionPtr();
bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) override;
bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait) override;
void *virtualAlloc(void *inPtr, size_t size, bool topDownHint) override;
void virtualFree(void *ptr, size_t size) override;
void releaseReservedAddress(void *reservedAddress) override;

View File

@ -1173,7 +1173,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenLastSubmittedFenceLowerTha
uint64_t value = 0u;
NEO::MonitoredFence monitorFence = {};
monitorFence.cpuAddress = &value;
wddm->waitFromCpu(1, monitorFence);
wddm->waitFromCpu(1, monitorFence, false);
EXPECT_EQ(directSubmission->flushMonitorFenceCalled, 1u);
}

View File

@ -877,7 +877,7 @@ TEST_F(Wddm20Tests, WhenLastFenceLessEqualThanMonitoredThenWaitFromCpuIsNotCalle
gdi->getWaitFromCpuArg().ObjectCount = 0;
gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr;
auto status = wddm->waitFromCpu(10, osContext->getResidencyController().getMonitoredFence());
auto status = wddm->waitFromCpu(10, osContext->getResidencyController().getMonitoredFence(), true);
EXPECT_TRUE(status);
@ -900,7 +900,7 @@ TEST_F(Wddm20Tests, WhenLastFenceGreaterThanMonitoredThenWaitFromCpuIsCalled) {
gdi->getWaitFromCpuArg().ObjectCount = 0;
gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr;
auto status = wddm->waitFromCpu(20, osContext->getResidencyController().getMonitoredFence());
auto status = wddm->waitFromCpu(20, osContext->getResidencyController().getMonitoredFence(), true);
EXPECT_TRUE(status);

View File

@ -597,7 +597,7 @@ TEST_F(WddmSkipResourceCleanupFixtureTests, givenWaitForSynchronizationObjectFro
EXPECT_TRUE(wddm->skipResourceCleanup());
wddm->getGdi()->waitForSynchronizationObjectFromCpu = &waitForSynchronizationObjectFromCpuNoOpMock;
MonitoredFence monitoredFence = {};
EXPECT_TRUE(wddm->waitFromCpu(0, monitoredFence));
EXPECT_TRUE(wddm->waitFromCpu(0, monitoredFence, true));
EXPECT_EQ(0u, waitForSynchronizationObjectFromCpuCounter);
}
@ -613,7 +613,7 @@ TEST_F(WddmSkipResourceCleanupFixtureTests, givenWaitForSynchronizationObjectFro
monitoredFence.lastSubmittedFence = 1u;
monitoredFence.cpuAddress = &fenceValue;
monitoredFence.fenceHandle = fenceHandle;
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence));
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence, true));
EXPECT_EQ(1u, waitForSynchronizationObjectFromCpuCounter);
}
@ -627,7 +627,7 @@ TEST_F(WddmSkipResourceCleanupFixtureTests, givenWaitForSynchronizationObjectFro
MonitoredFence monitoredFence = {};
monitoredFence.lastSubmittedFence = 0u;
monitoredFence.cpuAddress = &fenceValue;
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence));
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence, true));
EXPECT_EQ(0u, waitForSynchronizationObjectFromCpuCounter);
}