performance: Wait in busy loop with timeout before Gdi sync object
Resolves: NEO-8343 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
66c030b5a0
commit
e9f199bcad
|
@ -173,7 +173,7 @@ uint64_t WddmDirectSubmission<GfxFamily, Dispatcher>::updateTagValueImpl() {
|
|||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionFence(uint64_t completionValue, MonitoredFence &fence) {
|
||||
wddm->waitFromCpu(completionValue, fence);
|
||||
wddm->waitFromCpu(completionValue, fence, false);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
|
|
|
@ -133,7 +133,7 @@ bool WddmResidencyController::trimResidencyToBudget(uint64_t bytes) {
|
|||
uint64_t sizeToTrim = 0;
|
||||
|
||||
if (lastFence > *monitoredFence.cpuAddress) {
|
||||
this->wddm.waitFromCpu(lastFence, this->getMonitoredFence());
|
||||
this->wddm.waitFromCpu(lastFence, this->getMonitoredFence(), false);
|
||||
}
|
||||
|
||||
if (wddmAllocation->fragmentsStorage.fragmentCount == 0) {
|
||||
|
|
|
@ -1068,7 +1068,7 @@ bool Wddm::waitOnGPU(D3DKMT_HANDLE context) {
|
|||
return status == STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) {
|
||||
bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait) {
|
||||
NTSTATUS status = STATUS_SUCCESS;
|
||||
|
||||
if (!skipResourceCleanup() && lastFenceValue > *monitoredFence.cpuAddress) {
|
||||
|
@ -1079,14 +1079,26 @@ bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredF
|
|||
});
|
||||
}
|
||||
|
||||
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU waitFromCpu = {};
|
||||
waitFromCpu.ObjectCount = 1;
|
||||
waitFromCpu.ObjectHandleArray = &monitoredFence.fenceHandle;
|
||||
waitFromCpu.FenceValueArray = &lastFenceValue;
|
||||
waitFromCpu.hDevice = device;
|
||||
waitFromCpu.hAsyncEvent = NULL_HANDLE;
|
||||
status = getGdi()->waitForSynchronizationObjectFromCpu(&waitFromCpu);
|
||||
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
|
||||
if (busyWait) {
|
||||
constexpr int64_t timeout = 20;
|
||||
int64_t timeDiff = 0u;
|
||||
auto waitStartTime = std::chrono::high_resolution_clock::now();
|
||||
while (lastFenceValue > *monitoredFence.cpuAddress && timeDiff < timeout) {
|
||||
auto currentTime = std::chrono::high_resolution_clock::now();
|
||||
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(currentTime - waitStartTime).count();
|
||||
}
|
||||
}
|
||||
|
||||
if (lastFenceValue > *monitoredFence.cpuAddress) {
|
||||
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU waitFromCpu = {};
|
||||
waitFromCpu.ObjectCount = 1;
|
||||
waitFromCpu.ObjectHandleArray = &monitoredFence.fenceHandle;
|
||||
waitFromCpu.FenceValueArray = &lastFenceValue;
|
||||
waitFromCpu.hDevice = device;
|
||||
waitFromCpu.hAsyncEvent = NULL_HANDLE;
|
||||
status = getGdi()->waitForSynchronizationObjectFromCpu(&waitFromCpu);
|
||||
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
return status == STATUS_SUCCESS;
|
||||
|
|
|
@ -94,7 +94,7 @@ class Wddm : public DriverModel {
|
|||
MOCKABLE_VIRTUAL NTSTATUS createNTHandle(const D3DKMT_HANDLE *resourceHandle, HANDLE *ntHandle);
|
||||
|
||||
MOCKABLE_VIRTUAL bool submit(uint64_t commandBuffer, size_t size, void *commandHeader, WddmSubmitArguments &submitArguments);
|
||||
MOCKABLE_VIRTUAL bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence);
|
||||
MOCKABLE_VIRTUAL bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait);
|
||||
|
||||
MOCKABLE_VIRTUAL NTSTATUS escape(D3DKMT_ESCAPE &escapeCommand);
|
||||
MOCKABLE_VIRTUAL VOID *registerTrimCallback(PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback, WddmResidencyController &residencyController);
|
||||
|
|
|
@ -153,7 +153,7 @@ WddmMemoryManager *WddmCommandStreamReceiver<GfxFamily>::getMemoryManager() cons
|
|||
|
||||
template <typename GfxFamily>
|
||||
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait) {
|
||||
return wddm->waitFromCpu(flushStampToWait, static_cast<OsContextWin *>(this->osContext)->getResidencyController().getMonitoredFence());
|
||||
return wddm->waitFromCpu(flushStampToWait, static_cast<OsContextWin *>(this->osContext)->getResidencyController().getMonitoredFence(), false);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -734,7 +734,7 @@ void WddmMemoryManager::handleFenceCompletion(GraphicsAllocation *allocation) {
|
|||
if (lastFenceValue != 0u) {
|
||||
const auto &monitoredFence = static_cast<OsContextWin *>(engine.osContext)->getResidencyController().getMonitoredFence();
|
||||
const auto wddm = static_cast<OsContextWin *>(engine.osContext)->getWddm();
|
||||
wddm->waitFromCpu(lastFenceValue, monitoredFence);
|
||||
wddm->waitFromCpu(lastFenceValue, monitoredFence, engine.commandStreamReceiver->isAnyDirectSubmissionEnabled());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -249,12 +249,12 @@ GMM_GFX_PARTITIONING *WddmMock::getGfxPartitionPtr() {
|
|||
return &gfxPartition;
|
||||
}
|
||||
|
||||
bool WddmMock::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) {
|
||||
bool WddmMock::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait) {
|
||||
waitFromCpuResult.called++;
|
||||
waitFromCpuResult.uint64ParamPassed = lastFenceValue;
|
||||
waitFromCpuResult.monitoredFence = &monitoredFence;
|
||||
if (callBaseWaitFromCpu) {
|
||||
return waitFromCpuResult.success = Wddm::waitFromCpu(lastFenceValue, monitoredFence);
|
||||
return waitFromCpuResult.success = Wddm::waitFromCpu(lastFenceValue, monitoredFence, busyWait);
|
||||
}
|
||||
return waitFromCpuResult.success = true;
|
||||
}
|
||||
|
|
|
@ -88,7 +88,7 @@ class WddmMock : public Wddm {
|
|||
void setHwContextId(unsigned long hwContextId);
|
||||
void setHeap32(uint64_t base, uint64_t size);
|
||||
GMM_GFX_PARTITIONING *getGfxPartitionPtr();
|
||||
bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) override;
|
||||
bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait) override;
|
||||
void *virtualAlloc(void *inPtr, size_t size, bool topDownHint) override;
|
||||
void virtualFree(void *ptr, size_t size) override;
|
||||
void releaseReservedAddress(void *reservedAddress) override;
|
||||
|
|
|
@ -1173,7 +1173,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenLastSubmittedFenceLowerTha
|
|||
uint64_t value = 0u;
|
||||
NEO::MonitoredFence monitorFence = {};
|
||||
monitorFence.cpuAddress = &value;
|
||||
wddm->waitFromCpu(1, monitorFence);
|
||||
wddm->waitFromCpu(1, monitorFence, false);
|
||||
|
||||
EXPECT_EQ(directSubmission->flushMonitorFenceCalled, 1u);
|
||||
}
|
||||
|
|
|
@ -877,7 +877,7 @@ TEST_F(Wddm20Tests, WhenLastFenceLessEqualThanMonitoredThenWaitFromCpuIsNotCalle
|
|||
gdi->getWaitFromCpuArg().ObjectCount = 0;
|
||||
gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr;
|
||||
|
||||
auto status = wddm->waitFromCpu(10, osContext->getResidencyController().getMonitoredFence());
|
||||
auto status = wddm->waitFromCpu(10, osContext->getResidencyController().getMonitoredFence(), true);
|
||||
|
||||
EXPECT_TRUE(status);
|
||||
|
||||
|
@ -900,7 +900,7 @@ TEST_F(Wddm20Tests, WhenLastFenceGreaterThanMonitoredThenWaitFromCpuIsCalled) {
|
|||
gdi->getWaitFromCpuArg().ObjectCount = 0;
|
||||
gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr;
|
||||
|
||||
auto status = wddm->waitFromCpu(20, osContext->getResidencyController().getMonitoredFence());
|
||||
auto status = wddm->waitFromCpu(20, osContext->getResidencyController().getMonitoredFence(), true);
|
||||
|
||||
EXPECT_TRUE(status);
|
||||
|
||||
|
|
|
@ -597,7 +597,7 @@ TEST_F(WddmSkipResourceCleanupFixtureTests, givenWaitForSynchronizationObjectFro
|
|||
EXPECT_TRUE(wddm->skipResourceCleanup());
|
||||
wddm->getGdi()->waitForSynchronizationObjectFromCpu = &waitForSynchronizationObjectFromCpuNoOpMock;
|
||||
MonitoredFence monitoredFence = {};
|
||||
EXPECT_TRUE(wddm->waitFromCpu(0, monitoredFence));
|
||||
EXPECT_TRUE(wddm->waitFromCpu(0, monitoredFence, true));
|
||||
EXPECT_EQ(0u, waitForSynchronizationObjectFromCpuCounter);
|
||||
}
|
||||
|
||||
|
@ -613,7 +613,7 @@ TEST_F(WddmSkipResourceCleanupFixtureTests, givenWaitForSynchronizationObjectFro
|
|||
monitoredFence.lastSubmittedFence = 1u;
|
||||
monitoredFence.cpuAddress = &fenceValue;
|
||||
monitoredFence.fenceHandle = fenceHandle;
|
||||
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence));
|
||||
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence, true));
|
||||
EXPECT_EQ(1u, waitForSynchronizationObjectFromCpuCounter);
|
||||
}
|
||||
|
||||
|
@ -627,7 +627,7 @@ TEST_F(WddmSkipResourceCleanupFixtureTests, givenWaitForSynchronizationObjectFro
|
|||
MonitoredFence monitoredFence = {};
|
||||
monitoredFence.lastSubmittedFence = 0u;
|
||||
monitoredFence.cpuAddress = &fenceValue;
|
||||
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence));
|
||||
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence, true));
|
||||
EXPECT_EQ(0u, waitForSynchronizationObjectFromCpuCounter);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue