From 24ff26c396ee0b3ffdaa23032d175e3592790010 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Wed, 1 Jun 2022 10:05:07 +0000 Subject: [PATCH] Allocate new ring buffer if all are in use Signed-off-by: Lukasz Jobczyk --- .../debug_settings/debug_variables_base.inl | 5 +- .../direct_submission/direct_submission_hw.h | 22 ++-- .../direct_submission_hw.inl | 92 +++++++++------- .../linux/drm_direct_submission.h | 1 + .../linux/drm_direct_submission.inl | 28 +++-- .../windows/wddm_direct_submission.h | 3 +- .../windows/wddm_direct_submission.inl | 20 +++- .../common/mocks/mock_direct_submission_hw.h | 9 +- .../windows/mock_wddm_direct_submission.h | 7 +- shared/test/common/test_files/igdrcl.config | 1 + .../direct_submission_tests_1.cpp | 101 ++++++++++++++---- .../direct_submission_tests_2.cpp | 4 +- .../linux/drm_direct_submission_tests.cpp | 28 ++++- .../windows/wddm_direct_submission_tests.cpp | 93 ++++++++++++---- 14 files changed, 296 insertions(+), 118 deletions(-) diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 7a3f66e75d..32748a892e 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -298,11 +298,12 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmissionController, -1, "Enable di DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerTimeout, -1, "Set direct submission controller timeout, -1: default 5000 us, >=0: timeout in us") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerDivisor, -1, "Set direct submission controller timeout divider, -1: default 2, >0: divider value") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionForceLocalMemoryStorageMode, -1, "Force local memory storage for command/ring/semaphore buffer, -1: default - for all engines, 0: disabled, 1: for multiOsContextCapable engine, 2: for all engines") -DECLARE_DEBUG_VARIABLE(int32_t, EnableRingSwitchTagUpdateWa, -1, "-1: default, 0 - disable, 1 - enable. If enabled, completionRingBuffers wont be updated if ring is not running.") +DECLARE_DEBUG_VARIABLE(int32_t, EnableRingSwitchTagUpdateWa, -1, "-1: default, 0 - disable, 1 - enable. If enabled, completionFences wont be updated if ring is not running.") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionReadBackCommandBuffer, -1, "-1: default - disabled, 0 - disable, 1 - enable. If enabled, read first dword of cmd buffer after handling residency.") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionReadBackRingBuffer, -1, "-1: default - disabled, 0 - disable, 1 - enable. If enabled, read first dword of ring buffer after handling residency.") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertExtraMiMemFenceCommands, -1, "-1: default, 0 - disable, 1 - enable. If enabled, add extra MI_MEM_FENCE instructions with acquire bit set") -DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSubmission, -1, "-1: default, 0 - disable, 1 - Insert _mm_sfence before unlocking semaphore only, 2 - insert before and after semaphore") +DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSubmission, -1, "-1: default, 0 - disable, 1 - Instert _mm_sfence before unlocking semaphore only, 2 - insert before and after semaphore") +DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMaxRingBuffers, -1, "-1: default, >0: max ring buffer count, During switch ring buffer, if there is no available ring, wait for completion instead of allocating new one if DirectSubmissionMaxRingBuffers is reached") /* IMPLICIT SCALING */ DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disable, 1: enable, Enables Walker Partitioning via WPARID.") diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 462b59cca1..54cccab8ed 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -145,14 +145,23 @@ class DirectSubmissionHw { size_t getDiagnosticModeSection(); void setPostSyncOffset(); - enum RingBufferUse : uint32_t { - FirstBuffer, - SecondBuffer, - MaxBuffers + virtual bool isCompleted(uint32_t ringBufferIndex) = 0; + + struct RingBufferUse { + RingBufferUse() = default; + RingBufferUse(FlushStamp completionFence, GraphicsAllocation *ringBuffer) : completionFence(completionFence), ringBuffer(ringBuffer){}; + + constexpr static uint32_t initialRingBufferCount = 2u; + + FlushStamp completionFence = 0ull; + GraphicsAllocation *ringBuffer = nullptr; }; + std::vector ringBuffers; + uint32_t currentRingBuffer = 0u; + uint32_t previousRingBuffer = 0u; + uint32_t maxRingBufferCount = std::numeric_limits::max(); LinearStream ringCommandStream; - FlushStamp completionRingBuffers[RingBufferUse::MaxBuffers] = {0ull, 0ull}; std::unique_ptr diagnostic; uint64_t semaphoreGpuVa = 0u; @@ -165,8 +174,6 @@ class DirectSubmissionHw { const HardwareInfo *hwInfo = nullptr; const GraphicsAllocation *globalFenceAllocation = nullptr; GraphicsAllocation *completionFenceAllocation = nullptr; - GraphicsAllocation *ringBuffer = nullptr; - GraphicsAllocation *ringBuffer2 = nullptr; GraphicsAllocation *semaphores = nullptr; GraphicsAllocation *workPartitionAllocation = nullptr; void *semaphorePtr = nullptr; @@ -174,7 +181,6 @@ class DirectSubmissionHw { volatile void *workloadModeOneStoreAddress = nullptr; uint32_t currentQueueWorkCount = 1u; - RingBufferUse currentRingBuffer = RingBufferUse::FirstBuffer; uint32_t workloadMode = 0; uint32_t workloadModeOneExpectedValue = 0u; uint32_t activeTiles = 1u; diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 9813b3de97..fbcb4f6669 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -31,7 +31,7 @@ namespace NEO { template DirectSubmissionHw::DirectSubmissionHw(const DirectSubmissionInputParams &inputParams) - : osContext(inputParams.osContext), rootDeviceIndex(inputParams.rootDeviceIndex) { + : ringBuffers(RingBufferUse::initialRingBufferCount), osContext(inputParams.osContext), rootDeviceIndex(inputParams.rootDeviceIndex) { memoryManager = inputParams.memoryManager; globalFenceAllocation = inputParams.globalFenceAllocation; hwInfo = inputParams.rootDeviceEnvironment.getHardwareInfo(); @@ -42,6 +42,10 @@ DirectSubmissionHw::DirectSubmissionHw(const DirectSubmis disableCacheFlush = UllsDefaults::defaultDisableCacheFlush; disableMonitorFence = UllsDefaults::defaultDisableMonitorFence; + if (DebugManager.flags.DirectSubmissionMaxRingBuffers.get() != -1) { + this->maxRingBufferCount = DebugManager.flags.DirectSubmissionMaxRingBuffers.get(); + } + if (DebugManager.flags.DirectSubmissionDisableCacheFlush.get() != -1) { disableCacheFlush = !!DebugManager.flags.DirectSubmissionDisableCacheFlush.get(); } @@ -80,13 +84,14 @@ bool DirectSubmissionHw::allocateResources() { true, allocationSize, AllocationType::RING_BUFFER, isMultiOsContextCapable, false, osContext.getDeviceBitfield()}; - ringBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties); - UNRECOVERABLE_IF(ringBuffer == nullptr); - allocations.push_back(ringBuffer); - ringBuffer2 = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties); - UNRECOVERABLE_IF(ringBuffer2 == nullptr); - allocations.push_back(ringBuffer2); + for (uint32_t ringBufferIndex = 0; ringBufferIndex < RingBufferUse::initialRingBufferCount; ringBufferIndex++) { + auto ringBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties); + this->ringBuffers[ringBufferIndex].ringBuffer = ringBuffer; + UNRECOVERABLE_IF(ringBuffer == nullptr); + allocations.push_back(ringBuffer); + memset(ringBuffer->getUnderlyingBuffer(), 0, allocationSize); + } const AllocationProperties semaphoreAllocationProperties{rootDeviceIndex, true, MemoryConstants::pageSize, @@ -105,27 +110,23 @@ bool DirectSubmissionHw::allocateResources() { } if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) { - printf("Ring buffer 1 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n", - ringBuffer->getGpuAddress(), - ptrOffset(ringBuffer->getGpuAddress(), ringBuffer->getUnderlyingBufferSize()), - ringBuffer->getUnderlyingBuffer(), - ptrOffset(ringBuffer->getUnderlyingBuffer(), ringBuffer->getUnderlyingBufferSize()), - ringBuffer->getUnderlyingBufferSize()); + for (uint32_t ringBufferIndex = 0; ringBufferIndex < RingBufferUse::initialRingBufferCount; ringBufferIndex++) { + const auto ringBuffer = this->ringBuffers[ringBufferIndex].ringBuffer; - printf("Ring buffer 2 - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n", - ringBuffer2->getGpuAddress(), - ptrOffset(ringBuffer2->getGpuAddress(), ringBuffer2->getUnderlyingBufferSize()), - ringBuffer2->getUnderlyingBuffer(), - ptrOffset(ringBuffer2->getUnderlyingBuffer(), ringBuffer2->getUnderlyingBufferSize()), - ringBuffer2->getUnderlyingBufferSize()); + printf("Ring buffer %u - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n", + ringBufferIndex, + ringBuffer->getGpuAddress(), + ptrOffset(ringBuffer->getGpuAddress(), ringBuffer->getUnderlyingBufferSize()), + ringBuffer->getUnderlyingBuffer(), + ptrOffset(ringBuffer->getUnderlyingBuffer(), ringBuffer->getUnderlyingBufferSize()), + ringBuffer->getUnderlyingBufferSize()); + } } handleResidency(); - ringCommandStream.replaceBuffer(ringBuffer->getUnderlyingBuffer(), minimumRequiredSize); - ringCommandStream.replaceGraphicsAllocation(ringBuffer); + ringCommandStream.replaceBuffer(this->ringBuffers[0u].ringBuffer->getUnderlyingBuffer(), minimumRequiredSize); + ringCommandStream.replaceGraphicsAllocation(this->ringBuffers[0].ringBuffer); - memset(ringBuffer->getUnderlyingBuffer(), 0, allocationSize); - memset(ringBuffer2->getUnderlyingBuffer(), 0, allocationSize); semaphorePtr = semaphores->getUnderlyingBuffer(); semaphoreGpuVa = semaphores->getGpuAddress(); semaphoreData = static_cast(semaphorePtr); @@ -525,27 +526,46 @@ inline uint64_t DirectSubmissionHw::switchRingBuffers() { template inline GraphicsAllocation *DirectSubmissionHw::switchRingBuffersAllocations() { + this->previousRingBuffer = this->currentRingBuffer; GraphicsAllocation *nextAllocation = nullptr; - if (currentRingBuffer == RingBufferUse::FirstBuffer) { - nextAllocation = ringBuffer2; - currentRingBuffer = RingBufferUse::SecondBuffer; - } else { - nextAllocation = ringBuffer; - currentRingBuffer = RingBufferUse::FirstBuffer; + for (uint32_t ringBufferIndex = 0; ringBufferIndex < this->ringBuffers.size(); ringBufferIndex++) { + if (ringBufferIndex != this->currentRingBuffer && this->isCompleted(ringBufferIndex)) { + this->currentRingBuffer = ringBufferIndex; + nextAllocation = this->ringBuffers[ringBufferIndex].ringBuffer; + break; + } } + + if (nextAllocation == nullptr) { + if (this->ringBuffers.size() == this->maxRingBufferCount) { + this->currentRingBuffer = (this->currentRingBuffer + 1) % this->ringBuffers.size(); + nextAllocation = this->ringBuffers[this->currentRingBuffer].ringBuffer; + } else { + bool isMultiOsContextCapable = osContext.getNumSupportedDevices() > 1u; + constexpr size_t minimumRequiredSize = 256 * MemoryConstants::kiloByte; + constexpr size_t additionalAllocationSize = MemoryConstants::pageSize; + const auto allocationSize = alignUp(minimumRequiredSize + additionalAllocationSize, MemoryConstants::pageSize64k); + const AllocationProperties commandStreamAllocationProperties{rootDeviceIndex, + true, allocationSize, + AllocationType::RING_BUFFER, + isMultiOsContextCapable, false, osContext.getDeviceBitfield()}; + nextAllocation = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties); + this->currentRingBuffer = static_cast(this->ringBuffers.size()); + this->ringBuffers.emplace_back(0ull, nextAllocation); + auto ret = memoryOperationHandler->makeResidentWithinOsContext(&this->osContext, ArrayRef(&nextAllocation, 1u), false) == MemoryOperationsStatus::SUCCESS; + UNRECOVERABLE_IF(!ret); + } + } + UNRECOVERABLE_IF(this->currentRingBuffer == this->previousRingBuffer); return nextAllocation; } template void DirectSubmissionHw::deallocateResources() { - if (ringBuffer) { - memoryManager->freeGraphicsMemory(ringBuffer); - ringBuffer = nullptr; - } - if (ringBuffer2) { - memoryManager->freeGraphicsMemory(ringBuffer2); - ringBuffer2 = nullptr; + for (uint32_t ringBufferIndex = 0; ringBufferIndex < this->ringBuffers.size(); ringBufferIndex++) { + memoryManager->freeGraphicsMemory(this->ringBuffers[ringBufferIndex].ringBuffer); } + this->ringBuffers.clear(); if (semaphores) { memoryManager->freeGraphicsMemory(semaphores); semaphores = nullptr; diff --git a/shared/source/direct_submission/linux/drm_direct_submission.h b/shared/source/direct_submission/linux/drm_direct_submission.h index a4193e8bbb..fa00e28e99 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.h +++ b/shared/source/direct_submission/linux/drm_direct_submission.h @@ -39,6 +39,7 @@ class DrmDirectSubmission : public DirectSubmissionHw { void handleSwitchRingBuffers() override; uint64_t updateTagValue() override; void getTagAddressValue(TagData &tagData) override; + bool isCompleted(uint32_t ringBufferIndex) override; MOCKABLE_VIRTUAL void wait(uint32_t taskCountToWait); diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index 8c01fd9b50..86d73d4e12 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -175,22 +175,21 @@ void DrmDirectSubmission::handleStopRingBuffer() { template void DrmDirectSubmission::handleSwitchRingBuffers() { if (this->disableMonitorFence) { - auto previousRingBuffer = this->currentRingBuffer == DirectSubmissionHw::RingBufferUse::FirstBuffer ? DirectSubmissionHw::RingBufferUse::SecondBuffer : DirectSubmissionHw::RingBufferUse::FirstBuffer; this->currentTagData.tagValue++; - bool updateCompletionRingBuffers = this->ringStart; + bool updateCompletionFences = this->ringStart; if (DebugManager.flags.EnableRingSwitchTagUpdateWa.get() == 0) { - updateCompletionRingBuffers = true; + updateCompletionFences = true; } - if (updateCompletionRingBuffers) { - this->completionRingBuffers[previousRingBuffer] = this->currentTagData.tagValue; + if (updateCompletionFences) { + this->ringBuffers[this->previousRingBuffer].completionFence = this->currentTagData.tagValue; } } if (this->ringStart) { - if (this->completionRingBuffers[this->currentRingBuffer] != 0) { - this->wait(static_cast(this->completionRingBuffers[this->currentRingBuffer])); + if (this->ringBuffers[this->currentRingBuffer].completionFence != 0) { + this->wait(static_cast(this->ringBuffers[this->currentRingBuffer].completionFence)); } } } @@ -199,7 +198,7 @@ template uint64_t DrmDirectSubmission::updateTagValue() { if (!this->disableMonitorFence) { this->currentTagData.tagValue++; - this->completionRingBuffers[this->currentRingBuffer] = this->currentTagData.tagValue; + this->ringBuffers[this->currentRingBuffer].completionFence = this->currentTagData.tagValue; } return 0ull; } @@ -210,6 +209,19 @@ void DrmDirectSubmission::getTagAddressValue(TagData &tag tagData.tagValue = this->currentTagData.tagValue + 1; } +template +inline bool DrmDirectSubmission::isCompleted(uint32_t ringBufferIndex) { + auto taskCount = this->ringBuffers[ringBufferIndex].completionFence; + auto pollAddress = this->tagAddress; + for (uint32_t i = 0; i < this->activeTiles; i++) { + if (*pollAddress < taskCount) { + return false; + } + pollAddress = ptrOffset(pollAddress, this->postSyncOffset); + } + return true; +} + template void DrmDirectSubmission::wait(uint32_t taskCountToWait) { auto pollAddress = this->tagAddress; diff --git a/shared/source/direct_submission/windows/wddm_direct_submission.h b/shared/source/direct_submission/windows/wddm_direct_submission.h index 2ec84c7ae7..e09451958d 100644 --- a/shared/source/direct_submission/windows/wddm_direct_submission.h +++ b/shared/source/direct_submission/windows/wddm_direct_submission.h @@ -28,10 +28,11 @@ class WddmDirectSubmission : public DirectSubmissionHw { bool submit(uint64_t gpuAddress, size_t size) override; bool handleResidency() override; - void handleCompletionRingBuffer(uint64_t completionValue, MonitoredFence &fence); + void handleCompletionFence(uint64_t completionValue, MonitoredFence &fence); void handleSwitchRingBuffers() override; uint64_t updateTagValue() override; void getTagAddressValue(TagData &tagData) override; + bool isCompleted(uint32_t ringBufferIndex) override; OsContextWin *osContextWin; Wddm *wddm; diff --git a/shared/source/direct_submission/windows/wddm_direct_submission.inl b/shared/source/direct_submission/windows/wddm_direct_submission.inl index a432119c22..820052b9d7 100644 --- a/shared/source/direct_submission/windows/wddm_direct_submission.inl +++ b/shared/source/direct_submission/windows/wddm_direct_submission.inl @@ -42,7 +42,7 @@ WddmDirectSubmission::~WddmDirectSubmission() { perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "Stopping Wddm ULLS\n"); if (this->ringStart) { this->stopRingBuffer(); - WddmDirectSubmission::handleCompletionRingBuffer(ringFence.lastSubmittedFence, ringFence); + WddmDirectSubmission::handleCompletionFence(ringFence.lastSubmittedFence, ringFence); } this->deallocateResources(); wddm->getWddmInterface()->destroyMonitorFence(ringFence); @@ -89,9 +89,9 @@ bool WddmDirectSubmission::handleResidency() { template void WddmDirectSubmission::handleSwitchRingBuffers() { if (this->ringStart) { - if (this->completionRingBuffers[this->currentRingBuffer] != 0) { + if (this->ringBuffers[this->currentRingBuffer].completionFence != 0) { MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence(); - handleCompletionRingBuffer(this->completionRingBuffers[this->currentRingBuffer], currentFence); + handleCompletionFence(this->ringBuffers[this->currentRingBuffer].completionFence, currentFence); } } } @@ -102,13 +102,13 @@ uint64_t WddmDirectSubmission::updateTagValue() { currentFence.lastSubmittedFence = currentFence.currentFenceValue; currentFence.currentFenceValue++; - this->completionRingBuffers[this->currentRingBuffer] = currentFence.lastSubmittedFence; + this->ringBuffers[this->currentRingBuffer].completionFence = currentFence.lastSubmittedFence; return currentFence.lastSubmittedFence; } template -void WddmDirectSubmission::handleCompletionRingBuffer(uint64_t completionValue, MonitoredFence &fence) { +void WddmDirectSubmission::handleCompletionFence(uint64_t completionValue, MonitoredFence &fence) { wddm->waitFromCpu(completionValue, fence); } @@ -121,4 +121,14 @@ void WddmDirectSubmission::getTagAddressValue(TagData &ta tagData.tagValue = currentFence.currentFenceValue; } +template +inline bool WddmDirectSubmission::isCompleted(uint32_t ringBufferIndex) { + MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence(); + auto lastSubmittedFence = this->ringBuffers[ringBufferIndex].completionFence; + if (lastSubmittedFence > *currentFence.cpuAddress) { + return false; + } + return true; +} + } // namespace NEO diff --git a/shared/test/common/mocks/mock_direct_submission_hw.h b/shared/test/common/mocks/mock_direct_submission_hw.h index 87cd0b9f80..9e9cc88854 100644 --- a/shared/test/common/mocks/mock_direct_submission_hw.h +++ b/shared/test/common/mocks/mock_direct_submission_hw.h @@ -18,7 +18,6 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::activeTiles; using BaseClass::allocateResources; using BaseClass::completionFenceAllocation; - using BaseClass::completionRingBuffers; using BaseClass::cpuCachelineFlush; using BaseClass::currentQueueWorkCount; using BaseClass::currentRingBuffer; @@ -54,8 +53,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::performDiagnosticMode; using BaseClass::postSyncOffset; using BaseClass::reserved; - using BaseClass::ringBuffer; - using BaseClass::ringBuffer2; + using BaseClass::ringBuffers; using BaseClass::ringCommandStream; using BaseClass::ringStart; using BaseClass::semaphoreData; @@ -128,6 +126,10 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw BaseClass::performDiagnosticMode(); } + bool isCompleted(uint32_t ringBufferIndex) override { + return this->isCompletedReturn; + } + uint64_t updateTagValueReturn = 1ull; uint64_t tagAddressSetValue = MemoryConstants::pageSize; uint64_t tagValueSetValue = 1ull; @@ -141,5 +143,6 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw bool submitReturn = true; bool handleResidencyReturn = true; bool callBaseResident = false; + bool isCompletedReturn = true; }; } // namespace NEO diff --git a/shared/test/common/mocks/windows/mock_wddm_direct_submission.h b/shared/test/common/mocks/windows/mock_wddm_direct_submission.h index 9bb1520262..bf6764e47c 100644 --- a/shared/test/common/mocks/windows/mock_wddm_direct_submission.h +++ b/shared/test/common/mocks/windows/mock_wddm_direct_submission.h @@ -17,19 +17,18 @@ struct MockWddmDirectSubmission : public WddmDirectSubmissionmakeResidentCalledCount); ASSERT_EQ(3u, mockMemoryOperations->gfxAllocationsForMakeResident.size()); - EXPECT_EQ(directSubmission.ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]); - EXPECT_EQ(directSubmission.ringBuffer2, mockMemoryOperations->gfxAllocationsForMakeResident[1]); + EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]); + EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[1]); EXPECT_EQ(directSubmission.semaphores, mockMemoryOperations->gfxAllocationsForMakeResident[2]); pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release(); @@ -158,8 +158,8 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithCompletionFenceAllocatio EXPECT_EQ(1, mockMemoryOperations->makeResidentCalledCount); ASSERT_EQ(4u, mockMemoryOperations->gfxAllocationsForMakeResident.size()); - EXPECT_EQ(directSubmission.ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]); - EXPECT_EQ(directSubmission.ringBuffer2, mockMemoryOperations->gfxAllocationsForMakeResident[1]); + EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]); + EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[1]); EXPECT_EQ(directSubmission.semaphores, mockMemoryOperations->gfxAllocationsForMakeResident[2]); EXPECT_EQ(directSubmission.completionFenceAllocation, mockMemoryOperations->gfxAllocationsForMakeResident[3]); @@ -174,8 +174,8 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStarted EXPECT_TRUE(ret); EXPECT_TRUE(directSubmission.ringStart); - EXPECT_NE(nullptr, directSubmission.ringBuffer); - EXPECT_NE(nullptr, directSubmission.ringBuffer2); + EXPECT_NE(nullptr, directSubmission.ringBuffers[0].ringBuffer); + EXPECT_NE(nullptr, directSubmission.ringBuffers[1].ringBuffer); EXPECT_NE(nullptr, directSubmission.semaphores); EXPECT_NE(0u, directSubmission.ringCommandStream.getUsed()); @@ -188,42 +188,99 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsNotStar EXPECT_TRUE(ret); EXPECT_FALSE(directSubmission.ringStart); - EXPECT_NE(nullptr, directSubmission.ringBuffer); - EXPECT_NE(nullptr, directSubmission.ringBuffer2); + EXPECT_NE(nullptr, directSubmission.ringBuffers[0].ringBuffer); + EXPECT_NE(nullptr, directSubmission.ringBuffers[1].ringBuffer); EXPECT_NE(nullptr, directSubmission.semaphores); EXPECT_EQ(0u, directSubmission.ringCommandStream.getUsed()); } HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSwitchBuffersWhenCurrentIsPrimaryThenExpectNextSecondary) { - using RingBufferUse = typename MockDirectSubmissionHw>::RingBufferUse; MockDirectSubmissionHw> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); - EXPECT_EQ(RingBufferUse::FirstBuffer, directSubmission.currentRingBuffer); + EXPECT_EQ(0u, directSubmission.currentRingBuffer); GraphicsAllocation *nextRing = directSubmission.switchRingBuffersAllocations(); - EXPECT_EQ(directSubmission.ringBuffer2, nextRing); - EXPECT_EQ(RingBufferUse::SecondBuffer, directSubmission.currentRingBuffer); + EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, nextRing); + EXPECT_EQ(1u, directSubmission.currentRingBuffer); } HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSwitchBuffersWhenCurrentIsSecondaryThenExpectNextPrimary) { - using RingBufferUse = typename MockDirectSubmissionHw>::RingBufferUse; MockDirectSubmissionHw> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); - EXPECT_EQ(RingBufferUse::FirstBuffer, directSubmission.currentRingBuffer); + EXPECT_EQ(0u, directSubmission.currentRingBuffer); GraphicsAllocation *nextRing = directSubmission.switchRingBuffersAllocations(); - EXPECT_EQ(directSubmission.ringBuffer2, nextRing); - EXPECT_EQ(RingBufferUse::SecondBuffer, directSubmission.currentRingBuffer); + EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, nextRing); + EXPECT_EQ(1u, directSubmission.currentRingBuffer); nextRing = directSubmission.switchRingBuffersAllocations(); - EXPECT_EQ(directSubmission.ringBuffer, nextRing); - EXPECT_EQ(RingBufferUse::FirstBuffer, directSubmission.currentRingBuffer); + EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, nextRing); + EXPECT_EQ(0u, directSubmission.currentRingBuffer); } + +HWTEST_F(DirectSubmissionTest, givenDirectSubmissionCurrentRingBuffersInUseWhenSwitchRingBufferThenAllocateNewInsteadOfWaiting) { + auto mockMemoryOperations = std::make_unique(); + pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.reset(mockMemoryOperations.get()); + MockDirectSubmissionHw> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + directSubmission.isCompletedReturn = false; + + bool ret = directSubmission.initialize(false, false); + EXPECT_TRUE(ret); + EXPECT_EQ(0u, directSubmission.currentRingBuffer); + EXPECT_EQ(2u, directSubmission.ringBuffers.size()); + + auto nextRing = directSubmission.switchRingBuffersAllocations(); + EXPECT_EQ(3u, directSubmission.ringBuffers.size()); + EXPECT_EQ(directSubmission.ringBuffers[2].ringBuffer, nextRing); + EXPECT_EQ(2u, directSubmission.currentRingBuffer); + + nextRing = directSubmission.switchRingBuffersAllocations(); + EXPECT_EQ(4u, directSubmission.ringBuffers.size()); + EXPECT_EQ(directSubmission.ringBuffers[3].ringBuffer, nextRing); + EXPECT_EQ(3u, directSubmission.currentRingBuffer); + + directSubmission.isCompletedReturn = true; + + nextRing = directSubmission.switchRingBuffersAllocations(); + EXPECT_EQ(4u, directSubmission.ringBuffers.size()); + EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, nextRing); + EXPECT_EQ(0u, directSubmission.currentRingBuffer); + + nextRing = directSubmission.switchRingBuffersAllocations(); + EXPECT_EQ(4u, directSubmission.ringBuffers.size()); + EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, nextRing); + EXPECT_EQ(1u, directSubmission.currentRingBuffer); + + nextRing = directSubmission.switchRingBuffersAllocations(); + EXPECT_EQ(4u, directSubmission.ringBuffers.size()); + EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, nextRing); + EXPECT_EQ(0u, directSubmission.currentRingBuffer); + + nextRing = directSubmission.switchRingBuffersAllocations(); + EXPECT_EQ(4u, directSubmission.ringBuffers.size()); + EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, nextRing); + EXPECT_EQ(1u, directSubmission.currentRingBuffer); + + nextRing = directSubmission.switchRingBuffersAllocations(); + EXPECT_EQ(4u, directSubmission.ringBuffers.size()); + EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, nextRing); + EXPECT_EQ(0u, directSubmission.currentRingBuffer); + + directSubmission.isCompletedReturn = false; + + nextRing = directSubmission.switchRingBuffersAllocations(); + EXPECT_EQ(5u, directSubmission.ringBuffers.size()); + EXPECT_EQ(directSubmission.ringBuffers[4].ringBuffer, nextRing); + EXPECT_EQ(4u, directSubmission.currentRingBuffer); + + pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release(); +} + HWTEST_F(DirectSubmissionTest, givenDirectSubmissionAllocateFailWhenRingIsStartedThenExpectRingNotStarted) { MockDirectSubmissionHw> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); EXPECT_TRUE(directSubmission.disableCpuCacheFlush); @@ -549,8 +606,8 @@ HWTEST_F(DirectSubmissionTest, whenDirectSubmissionInitializedThenExpectCreatedA bool ret = directSubmission->initialize(false, false); EXPECT_TRUE(ret); - GraphicsAllocation *nulledAllocation = directSubmission->ringBuffer; - directSubmission->ringBuffer = nullptr; + GraphicsAllocation *nulledAllocation = directSubmission->ringBuffers[0u].ringBuffer; + directSubmission->ringBuffers[0u].ringBuffer = nullptr; directSubmission.reset(nullptr); memoryManager->freeGraphicsMemory(nulledAllocation); @@ -559,8 +616,8 @@ HWTEST_F(DirectSubmissionTest, whenDirectSubmissionInitializedThenExpectCreatedA ret = directSubmission->initialize(false, false); EXPECT_TRUE(ret); - nulledAllocation = directSubmission->ringBuffer2; - directSubmission->ringBuffer2 = nullptr; + nulledAllocation = directSubmission->ringBuffers[1u].ringBuffer; + directSubmission->ringBuffers[1u].ringBuffer = nullptr; directSubmission.reset(nullptr); memoryManager->freeGraphicsMemory(nulledAllocation); diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index 0ce4b85c83..f05def3e87 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -585,9 +585,9 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDirectSubmissionPrintBuffersWh std::string output = testing::internal::GetCapturedStdout(); - auto pos = output.find("Ring buffer 1"); + auto pos = output.find("Ring buffer 0"); EXPECT_TRUE(pos != std::string::npos); - pos = output.find("Ring buffer 2"); + pos = output.find("Ring buffer 1"); EXPECT_TRUE(pos != std::string::npos); pos = output.find("Client buffer"); EXPECT_TRUE(pos != std::string::npos); diff --git a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp index f88747cf73..0ba5477da1 100644 --- a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp @@ -60,6 +60,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); + + auto drm = static_cast(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as()); + EXPECT_TRUE(drm->isDirectSubmissionActive()); + EXPECT_TRUE(drmDirectSubmission.allocateResources()); + + drmDirectSubmission.ringBuffers[0].completionFence = 1u; + EXPECT_FALSE(drmDirectSubmission.isCompleted(0u)); + + *drmDirectSubmission.tagAddress = 1u; + EXPECT_TRUE(drmDirectSubmission.isCompleted(0u)); + + drmDirectSubmission.ringBuffers[0].completionFence = 0u; +} + HWTEST_F(DrmDirectSubmissionTest, whenCreateDirectSubmissionThenValidObjectIsReturned) { auto directSubmission = DirectSubmissionHw>::create(*device->getDefaultEngine().commandStreamReceiver); EXPECT_NE(directSubmission.get(), nullptr); @@ -295,7 +313,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenSubmittingThe MockDrmDirectSubmission> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); drmDirectSubmission.completionFenceAllocation = nullptr; EXPECT_TRUE(drmDirectSubmission.allocateResources()); - auto ringBuffer = static_cast(drmDirectSubmission.ringBuffer); + auto ringBuffer = static_cast(drmDirectSubmission.ringBuffers[drmDirectSubmission.currentRingBuffer].ringBuffer); auto initialBO = ringBuffer->getBufferObjectToModify(0); auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as(); @@ -331,7 +349,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenTile0AndCompletionFenceSupportWhenSubmitt MockDrmDirectSubmission> drmDirectSubmission(commandStreamReceiver); drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation(); EXPECT_TRUE(drmDirectSubmission.allocateResources()); - auto ringBuffer = static_cast(drmDirectSubmission.ringBuffer); + auto ringBuffer = static_cast(drmDirectSubmission.ringBuffers[drmDirectSubmission.currentRingBuffer].ringBuffer); auto initialBO = ringBuffer->getBufferObjectToModify(0); MockBufferObject mockBO(drm); @@ -368,7 +386,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenTile1AndCompletionFenceSupportWhenSubmitt MockDrmDirectSubmission> drmDirectSubmission(commandStreamReceiver); drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation(); EXPECT_TRUE(drmDirectSubmission.allocateResources()); - auto ringBuffer = static_cast(drmDirectSubmission.ringBuffer); + auto ringBuffer = static_cast(drmDirectSubmission.ringBuffers[drmDirectSubmission.currentRingBuffer].ringBuffer); auto initialBO = ringBuffer->getBufferObjectToModify(0); MockBufferObject mockBO(drm); @@ -411,7 +429,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenTwoTilesAndCompletionFenceSupportWhenSubm drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation(); EXPECT_TRUE(drmDirectSubmission.allocateResources()); - auto ringBuffer = static_cast(drmDirectSubmission.ringBuffer); + auto ringBuffer = static_cast(drmDirectSubmission.ringBuffers[drmDirectSubmission.currentRingBuffer].ringBuffer); auto initialBO = ringBuffer->getBufferObjectToModify(0); MockBufferObject mockBO(drm); diff --git a/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp index 791275885f..b7c967d82f 100644 --- a/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp @@ -59,8 +59,8 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThe bool ret = wddmDirectSubmission->initialize(true, false); EXPECT_TRUE(ret); EXPECT_TRUE(wddmDirectSubmission->ringStart); - EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffer); - EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffer2); + EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffers[0].ringBuffer); + EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffers[1].ringBuffer); EXPECT_NE(nullptr, wddmDirectSubmission->semaphores); EXPECT_EQ(1u, wddm->makeResidentResult.called); @@ -73,7 +73,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThe EXPECT_NE(0u, wddmDirectSubmission->ringCommandStream.getUsed()); *wddmDirectSubmission->ringFence.cpuAddress = 1ull; - wddmDirectSubmission->completionRingBuffers[wddmDirectSubmission->currentRingBuffer] = 2ull; + wddmDirectSubmission->ringBuffers[wddmDirectSubmission->currentRingBuffer].completionFence = 2ull; wddmDirectSubmission.reset(nullptr); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); @@ -90,8 +90,8 @@ HWTEST_F(WddmDirectSubmissionNoPreemptionTest, givenWddmWhenDirectIsInitializedA bool ret = wddmDirectSubmission->initialize(false, false); EXPECT_TRUE(ret); EXPECT_FALSE(wddmDirectSubmission->ringStart); - EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffer); - EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffer2); + EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffers[0].ringBuffer); + EXPECT_NE(nullptr, wddmDirectSubmission->ringBuffers[1].ringBuffer); EXPECT_NE(nullptr, wddmDirectSubmission->semaphores); EXPECT_EQ(1u, wddm->makeResidentResult.called); @@ -211,7 +211,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenHandlingRingBufferCompletionThen MockWddmDirectSubmission> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); uint64_t completionValue = 0x12345679ull; - wddmDirectSubmission.handleCompletionRingBuffer(completionValue, contextFence); + wddmDirectSubmission.handleCompletionFence(completionValue, contextFence); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); EXPECT_EQ(completionValue, wddm->waitFromCpuResult.uint64ParamPassed); @@ -219,6 +219,21 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenHandlingRingBufferCompletionThen EXPECT_EQ(value, wddm->waitFromCpuResult.monitoredFence->currentFenceValue); } +HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenCallIsCompleteThenProperValueIsReturned) { + MonitoredFence &contextFence = osContext->getResidencyController().getMonitoredFence(); + + MockWddmDirectSubmission> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); + + *contextFence.cpuAddress = 0u; + wddmDirectSubmission.ringBuffers[0].completionFence = 1u; + EXPECT_FALSE(wddmDirectSubmission.isCompleted(0u)); + + *contextFence.cpuAddress = 1u; + EXPECT_TRUE(wddmDirectSubmission.isCompleted(0u)); + + wddmDirectSubmission.ringBuffers[0].completionFence = 0u; +} + HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedThenExpectDispatchSwitchCommandsLinearStreamUpdated) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; MockWddmDirectSubmission> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); @@ -226,14 +241,14 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedThenEx bool ret = wddmDirectSubmission.initialize(true, false); EXPECT_TRUE(ret); size_t usedSpace = wddmDirectSubmission.ringCommandStream.getUsed(); - uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffer->getGpuAddress() + usedSpace; + uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffers[0].ringBuffer->getGpuAddress() + usedSpace; uint64_t gpuVa = wddmDirectSubmission.switchRingBuffers(); EXPECT_EQ(expectedGpuVa, gpuVa); - EXPECT_EQ(wddmDirectSubmission.ringBuffer2, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation()); + EXPECT_EQ(wddmDirectSubmission.ringBuffers[1].ringBuffer, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation()); LinearStream tmpCmdBuffer; - tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffer->getUnderlyingBuffer(), + tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffers[0].ringBuffer->getUnderlyingBuffer(), wddmDirectSubmission.ringCommandStream.getMaxAvailableSpace()); tmpCmdBuffer.getSpace(usedSpace + wddmDirectSubmission.getSizeSwitchRingBufferSection()); HardwareParse hwParse; @@ -242,7 +257,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedThenEx ASSERT_NE(nullptr, bbStart); auto gmmHelper = device->getGmmHelper(); uint64_t actualGpuVa = gmmHelper->canonize(bbStart->getBatchBufferStartAddress()); - EXPECT_EQ(wddmDirectSubmission.ringBuffer2->getGpuAddress(), actualGpuVa); + EXPECT_EQ(wddmDirectSubmission.ringBuffers[1].ringBuffer->getGpuAddress(), actualGpuVa); } HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferNotStartedThenExpectNoSwitchCommandsLinearStreamUpdated) { @@ -255,14 +270,14 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferNotStartedThe size_t usedSpace = wddmDirectSubmission.ringCommandStream.getUsed(); EXPECT_EQ(0u, usedSpace); - uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffer->getGpuAddress(); + uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffers[0].ringBuffer->getGpuAddress(); uint64_t gpuVa = wddmDirectSubmission.switchRingBuffers(); EXPECT_EQ(expectedGpuVa, gpuVa); - EXPECT_EQ(wddmDirectSubmission.ringBuffer2, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation()); + EXPECT_EQ(wddmDirectSubmission.ringBuffers[1].ringBuffer, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation()); LinearStream tmpCmdBuffer; - tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffer->getUnderlyingBuffer(), + tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffers[0].ringBuffer->getUnderlyingBuffer(), wddmDirectSubmission.ringCommandStream.getMaxAvailableSpace()); HardwareParse hwParse; hwParse.parseCommands(tmpCmdBuffer, 0u); @@ -270,24 +285,23 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferNotStartedThe EXPECT_EQ(nullptr, bbStart); } -HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedAndWaitFenceUpdateThenExpectWaitCalled) { - using RingBufferUse = typename MockWddmDirectSubmission>::RingBufferUse; +HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedAndWaitFenceUpdateThenExpectNewRingBufferAllocated) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; MockWddmDirectSubmission> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); bool ret = wddmDirectSubmission.initialize(true, false); EXPECT_TRUE(ret); uint64_t expectedWaitFence = 0x10ull; - wddmDirectSubmission.completionRingBuffers[RingBufferUse::SecondBuffer] = expectedWaitFence; + wddmDirectSubmission.ringBuffers[1u].completionFence = expectedWaitFence; size_t usedSpace = wddmDirectSubmission.ringCommandStream.getUsed(); - uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffer->getGpuAddress() + usedSpace; + uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffers[0].ringBuffer->getGpuAddress() + usedSpace; uint64_t gpuVa = wddmDirectSubmission.switchRingBuffers(); EXPECT_EQ(expectedGpuVa, gpuVa); - EXPECT_EQ(wddmDirectSubmission.ringBuffer2, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation()); + EXPECT_EQ(wddmDirectSubmission.ringBuffers[2u].ringBuffer, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation()); LinearStream tmpCmdBuffer; - tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffer->getUnderlyingBuffer(), + tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffers[0].ringBuffer->getUnderlyingBuffer(), wddmDirectSubmission.ringCommandStream.getMaxAvailableSpace()); tmpCmdBuffer.getSpace(usedSpace + wddmDirectSubmission.getSizeSwitchRingBufferSection()); HardwareParse hwParse; @@ -296,13 +310,48 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedAndWai ASSERT_NE(nullptr, bbStart); auto gmmHelper = device->getGmmHelper(); uint64_t actualGpuVa = gmmHelper->canonize(bbStart->getBatchBufferStartAddress()); - EXPECT_EQ(wddmDirectSubmission.ringBuffer2->getGpuAddress(), actualGpuVa); + EXPECT_EQ(wddmDirectSubmission.ringBuffers[2u].ringBuffer->getGpuAddress(), actualGpuVa); + + EXPECT_EQ(0u, wddm->waitFromCpuResult.called); +} + +HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedAndWaitFenceUpdateThenExpectWaitCalled) { + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionMaxRingBuffers.set(2u); + + MockWddmDirectSubmission> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); + + bool ret = wddmDirectSubmission.initialize(true, false); + EXPECT_TRUE(ret); + uint64_t expectedWaitFence = 0x10ull; + wddmDirectSubmission.ringBuffers[1u].completionFence = expectedWaitFence; + size_t usedSpace = wddmDirectSubmission.ringCommandStream.getUsed(); + uint64_t expectedGpuVa = wddmDirectSubmission.ringBuffers[0].ringBuffer->getGpuAddress() + usedSpace; + + uint64_t gpuVa = wddmDirectSubmission.switchRingBuffers(); + EXPECT_EQ(expectedGpuVa, gpuVa); + EXPECT_EQ(wddmDirectSubmission.ringBuffers.size(), 2u); + EXPECT_EQ(wddmDirectSubmission.ringBuffers[1u].ringBuffer, wddmDirectSubmission.ringCommandStream.getGraphicsAllocation()); + + LinearStream tmpCmdBuffer; + tmpCmdBuffer.replaceBuffer(wddmDirectSubmission.ringBuffers[0].ringBuffer->getUnderlyingBuffer(), + wddmDirectSubmission.ringCommandStream.getMaxAvailableSpace()); + tmpCmdBuffer.getSpace(usedSpace + wddmDirectSubmission.getSizeSwitchRingBufferSection()); + HardwareParse hwParse; + hwParse.parseCommands(tmpCmdBuffer, usedSpace); + MI_BATCH_BUFFER_START *bbStart = hwParse.getCommand(); + ASSERT_NE(nullptr, bbStart); + auto gmmHelper = device->getGmmHelper(); + uint64_t actualGpuVa = gmmHelper->canonize(bbStart->getBatchBufferStartAddress()); + EXPECT_EQ(wddmDirectSubmission.ringBuffers[1u].ringBuffer->getGpuAddress(), actualGpuVa); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); EXPECT_EQ(expectedWaitFence, wddm->waitFromCpuResult.uint64ParamPassed); } -HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenUpdatingTagValueThenExpectCompletionRingBufferUpdated) { +HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenUpdatingTagValueThenExpectcompletionFenceUpdated) { uint64_t address = 0xFF00FF0000ull; uint64_t value = 0x12345678ull; MonitoredFence &contextFence = osContext->getResidencyController().getMonitoredFence(); @@ -314,7 +363,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenUpdatingTagValueThenExpectComple uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(); EXPECT_EQ(value, actualTagValue); EXPECT_EQ(value + 1, contextFence.currentFenceValue); - EXPECT_EQ(value, wddmDirectSubmission.completionRingBuffers[wddmDirectSubmission.currentRingBuffer]); + EXPECT_EQ(value, wddmDirectSubmission.ringBuffers[wddmDirectSubmission.currentRingBuffer].completionFence); } HWTEST_F(WddmDirectSubmissionTest, givenWddmResidencyEnabledWhenCreatingDestroyingThenSubmitterNotifiesResidencyLogger) {