performance: Wait in busy loop with timeout before Gdi sync object
Resolves: NEO-8343 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
66c030b5a0
commit
e9f199bcad
|
@ -173,7 +173,7 @@ uint64_t WddmDirectSubmission<GfxFamily, Dispatcher>::updateTagValueImpl() {
|
||||||
|
|
||||||
template <typename GfxFamily, typename Dispatcher>
|
template <typename GfxFamily, typename Dispatcher>
|
||||||
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionFence(uint64_t completionValue, MonitoredFence &fence) {
|
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionFence(uint64_t completionValue, MonitoredFence &fence) {
|
||||||
wddm->waitFromCpu(completionValue, fence);
|
wddm->waitFromCpu(completionValue, fence, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily, typename Dispatcher>
|
template <typename GfxFamily, typename Dispatcher>
|
||||||
|
|
|
@ -133,7 +133,7 @@ bool WddmResidencyController::trimResidencyToBudget(uint64_t bytes) {
|
||||||
uint64_t sizeToTrim = 0;
|
uint64_t sizeToTrim = 0;
|
||||||
|
|
||||||
if (lastFence > *monitoredFence.cpuAddress) {
|
if (lastFence > *monitoredFence.cpuAddress) {
|
||||||
this->wddm.waitFromCpu(lastFence, this->getMonitoredFence());
|
this->wddm.waitFromCpu(lastFence, this->getMonitoredFence(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wddmAllocation->fragmentsStorage.fragmentCount == 0) {
|
if (wddmAllocation->fragmentsStorage.fragmentCount == 0) {
|
||||||
|
|
|
@ -1068,7 +1068,7 @@ bool Wddm::waitOnGPU(D3DKMT_HANDLE context) {
|
||||||
return status == STATUS_SUCCESS;
|
return status == STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) {
|
bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait) {
|
||||||
NTSTATUS status = STATUS_SUCCESS;
|
NTSTATUS status = STATUS_SUCCESS;
|
||||||
|
|
||||||
if (!skipResourceCleanup() && lastFenceValue > *monitoredFence.cpuAddress) {
|
if (!skipResourceCleanup() && lastFenceValue > *monitoredFence.cpuAddress) {
|
||||||
|
@ -1079,6 +1079,17 @@ bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredF
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (busyWait) {
|
||||||
|
constexpr int64_t timeout = 20;
|
||||||
|
int64_t timeDiff = 0u;
|
||||||
|
auto waitStartTime = std::chrono::high_resolution_clock::now();
|
||||||
|
while (lastFenceValue > *monitoredFence.cpuAddress && timeDiff < timeout) {
|
||||||
|
auto currentTime = std::chrono::high_resolution_clock::now();
|
||||||
|
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(currentTime - waitStartTime).count();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastFenceValue > *monitoredFence.cpuAddress) {
|
||||||
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU waitFromCpu = {};
|
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU waitFromCpu = {};
|
||||||
waitFromCpu.ObjectCount = 1;
|
waitFromCpu.ObjectCount = 1;
|
||||||
waitFromCpu.ObjectHandleArray = &monitoredFence.fenceHandle;
|
waitFromCpu.ObjectHandleArray = &monitoredFence.fenceHandle;
|
||||||
|
@ -1088,6 +1099,7 @@ bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredF
|
||||||
status = getGdi()->waitForSynchronizationObjectFromCpu(&waitFromCpu);
|
status = getGdi()->waitForSynchronizationObjectFromCpu(&waitFromCpu);
|
||||||
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
|
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return status == STATUS_SUCCESS;
|
return status == STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,7 +94,7 @@ class Wddm : public DriverModel {
|
||||||
MOCKABLE_VIRTUAL NTSTATUS createNTHandle(const D3DKMT_HANDLE *resourceHandle, HANDLE *ntHandle);
|
MOCKABLE_VIRTUAL NTSTATUS createNTHandle(const D3DKMT_HANDLE *resourceHandle, HANDLE *ntHandle);
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL bool submit(uint64_t commandBuffer, size_t size, void *commandHeader, WddmSubmitArguments &submitArguments);
|
MOCKABLE_VIRTUAL bool submit(uint64_t commandBuffer, size_t size, void *commandHeader, WddmSubmitArguments &submitArguments);
|
||||||
MOCKABLE_VIRTUAL bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence);
|
MOCKABLE_VIRTUAL bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait);
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL NTSTATUS escape(D3DKMT_ESCAPE &escapeCommand);
|
MOCKABLE_VIRTUAL NTSTATUS escape(D3DKMT_ESCAPE &escapeCommand);
|
||||||
MOCKABLE_VIRTUAL VOID *registerTrimCallback(PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback, WddmResidencyController &residencyController);
|
MOCKABLE_VIRTUAL VOID *registerTrimCallback(PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback, WddmResidencyController &residencyController);
|
||||||
|
|
|
@ -153,7 +153,7 @@ WddmMemoryManager *WddmCommandStreamReceiver<GfxFamily>::getMemoryManager() cons
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait) {
|
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait) {
|
||||||
return wddm->waitFromCpu(flushStampToWait, static_cast<OsContextWin *>(this->osContext)->getResidencyController().getMonitoredFence());
|
return wddm->waitFromCpu(flushStampToWait, static_cast<OsContextWin *>(this->osContext)->getResidencyController().getMonitoredFence(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
|
|
@ -734,7 +734,7 @@ void WddmMemoryManager::handleFenceCompletion(GraphicsAllocation *allocation) {
|
||||||
if (lastFenceValue != 0u) {
|
if (lastFenceValue != 0u) {
|
||||||
const auto &monitoredFence = static_cast<OsContextWin *>(engine.osContext)->getResidencyController().getMonitoredFence();
|
const auto &monitoredFence = static_cast<OsContextWin *>(engine.osContext)->getResidencyController().getMonitoredFence();
|
||||||
const auto wddm = static_cast<OsContextWin *>(engine.osContext)->getWddm();
|
const auto wddm = static_cast<OsContextWin *>(engine.osContext)->getWddm();
|
||||||
wddm->waitFromCpu(lastFenceValue, monitoredFence);
|
wddm->waitFromCpu(lastFenceValue, monitoredFence, engine.commandStreamReceiver->isAnyDirectSubmissionEnabled());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -249,12 +249,12 @@ GMM_GFX_PARTITIONING *WddmMock::getGfxPartitionPtr() {
|
||||||
return &gfxPartition;
|
return &gfxPartition;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WddmMock::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) {
|
bool WddmMock::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait) {
|
||||||
waitFromCpuResult.called++;
|
waitFromCpuResult.called++;
|
||||||
waitFromCpuResult.uint64ParamPassed = lastFenceValue;
|
waitFromCpuResult.uint64ParamPassed = lastFenceValue;
|
||||||
waitFromCpuResult.monitoredFence = &monitoredFence;
|
waitFromCpuResult.monitoredFence = &monitoredFence;
|
||||||
if (callBaseWaitFromCpu) {
|
if (callBaseWaitFromCpu) {
|
||||||
return waitFromCpuResult.success = Wddm::waitFromCpu(lastFenceValue, monitoredFence);
|
return waitFromCpuResult.success = Wddm::waitFromCpu(lastFenceValue, monitoredFence, busyWait);
|
||||||
}
|
}
|
||||||
return waitFromCpuResult.success = true;
|
return waitFromCpuResult.success = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,7 +88,7 @@ class WddmMock : public Wddm {
|
||||||
void setHwContextId(unsigned long hwContextId);
|
void setHwContextId(unsigned long hwContextId);
|
||||||
void setHeap32(uint64_t base, uint64_t size);
|
void setHeap32(uint64_t base, uint64_t size);
|
||||||
GMM_GFX_PARTITIONING *getGfxPartitionPtr();
|
GMM_GFX_PARTITIONING *getGfxPartitionPtr();
|
||||||
bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) override;
|
bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence, bool busyWait) override;
|
||||||
void *virtualAlloc(void *inPtr, size_t size, bool topDownHint) override;
|
void *virtualAlloc(void *inPtr, size_t size, bool topDownHint) override;
|
||||||
void virtualFree(void *ptr, size_t size) override;
|
void virtualFree(void *ptr, size_t size) override;
|
||||||
void releaseReservedAddress(void *reservedAddress) override;
|
void releaseReservedAddress(void *reservedAddress) override;
|
||||||
|
|
|
@ -1173,7 +1173,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenLastSubmittedFenceLowerTha
|
||||||
uint64_t value = 0u;
|
uint64_t value = 0u;
|
||||||
NEO::MonitoredFence monitorFence = {};
|
NEO::MonitoredFence monitorFence = {};
|
||||||
monitorFence.cpuAddress = &value;
|
monitorFence.cpuAddress = &value;
|
||||||
wddm->waitFromCpu(1, monitorFence);
|
wddm->waitFromCpu(1, monitorFence, false);
|
||||||
|
|
||||||
EXPECT_EQ(directSubmission->flushMonitorFenceCalled, 1u);
|
EXPECT_EQ(directSubmission->flushMonitorFenceCalled, 1u);
|
||||||
}
|
}
|
||||||
|
|
|
@ -877,7 +877,7 @@ TEST_F(Wddm20Tests, WhenLastFenceLessEqualThanMonitoredThenWaitFromCpuIsNotCalle
|
||||||
gdi->getWaitFromCpuArg().ObjectCount = 0;
|
gdi->getWaitFromCpuArg().ObjectCount = 0;
|
||||||
gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr;
|
gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr;
|
||||||
|
|
||||||
auto status = wddm->waitFromCpu(10, osContext->getResidencyController().getMonitoredFence());
|
auto status = wddm->waitFromCpu(10, osContext->getResidencyController().getMonitoredFence(), true);
|
||||||
|
|
||||||
EXPECT_TRUE(status);
|
EXPECT_TRUE(status);
|
||||||
|
|
||||||
|
@ -900,7 +900,7 @@ TEST_F(Wddm20Tests, WhenLastFenceGreaterThanMonitoredThenWaitFromCpuIsCalled) {
|
||||||
gdi->getWaitFromCpuArg().ObjectCount = 0;
|
gdi->getWaitFromCpuArg().ObjectCount = 0;
|
||||||
gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr;
|
gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr;
|
||||||
|
|
||||||
auto status = wddm->waitFromCpu(20, osContext->getResidencyController().getMonitoredFence());
|
auto status = wddm->waitFromCpu(20, osContext->getResidencyController().getMonitoredFence(), true);
|
||||||
|
|
||||||
EXPECT_TRUE(status);
|
EXPECT_TRUE(status);
|
||||||
|
|
||||||
|
|
|
@ -597,7 +597,7 @@ TEST_F(WddmSkipResourceCleanupFixtureTests, givenWaitForSynchronizationObjectFro
|
||||||
EXPECT_TRUE(wddm->skipResourceCleanup());
|
EXPECT_TRUE(wddm->skipResourceCleanup());
|
||||||
wddm->getGdi()->waitForSynchronizationObjectFromCpu = &waitForSynchronizationObjectFromCpuNoOpMock;
|
wddm->getGdi()->waitForSynchronizationObjectFromCpu = &waitForSynchronizationObjectFromCpuNoOpMock;
|
||||||
MonitoredFence monitoredFence = {};
|
MonitoredFence monitoredFence = {};
|
||||||
EXPECT_TRUE(wddm->waitFromCpu(0, monitoredFence));
|
EXPECT_TRUE(wddm->waitFromCpu(0, monitoredFence, true));
|
||||||
EXPECT_EQ(0u, waitForSynchronizationObjectFromCpuCounter);
|
EXPECT_EQ(0u, waitForSynchronizationObjectFromCpuCounter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -613,7 +613,7 @@ TEST_F(WddmSkipResourceCleanupFixtureTests, givenWaitForSynchronizationObjectFro
|
||||||
monitoredFence.lastSubmittedFence = 1u;
|
monitoredFence.lastSubmittedFence = 1u;
|
||||||
monitoredFence.cpuAddress = &fenceValue;
|
monitoredFence.cpuAddress = &fenceValue;
|
||||||
monitoredFence.fenceHandle = fenceHandle;
|
monitoredFence.fenceHandle = fenceHandle;
|
||||||
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence));
|
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence, true));
|
||||||
EXPECT_EQ(1u, waitForSynchronizationObjectFromCpuCounter);
|
EXPECT_EQ(1u, waitForSynchronizationObjectFromCpuCounter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -627,7 +627,7 @@ TEST_F(WddmSkipResourceCleanupFixtureTests, givenWaitForSynchronizationObjectFro
|
||||||
MonitoredFence monitoredFence = {};
|
MonitoredFence monitoredFence = {};
|
||||||
monitoredFence.lastSubmittedFence = 0u;
|
monitoredFence.lastSubmittedFence = 0u;
|
||||||
monitoredFence.cpuAddress = &fenceValue;
|
monitoredFence.cpuAddress = &fenceValue;
|
||||||
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence));
|
EXPECT_TRUE(wddm->waitFromCpu(1u, monitoredFence, true));
|
||||||
EXPECT_EQ(0u, waitForSynchronizationObjectFromCpuCounter);
|
EXPECT_EQ(0u, waitForSynchronizationObjectFromCpuCounter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue