diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 3a353b2f47..c29dac75ba 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -82,6 +82,8 @@ class DirectSubmissionHw { MOCKABLE_VIRTUAL bool stopRingBuffer(bool blocking); + bool startRingBuffer(); + MOCKABLE_VIRTUAL bool dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp); uint32_t getDispatchErrorCode(); @@ -119,7 +121,6 @@ class DirectSubmissionHw { virtual bool dispatchMonitorFenceRequired(bool requireMonitorFence); virtual void getTagAddressValue(TagData &tagData) = 0; void unblockGpu(); - bool submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size); bool copyCommandBufferIntoRing(BatchBuffer &batchBuffer); void cpuCachelineFlush(void *ptr, size_t size); @@ -134,9 +135,6 @@ class DirectSubmissionHw { void dispatchStartSection(uint64_t gpuStartAddress); size_t getSizeStartSection(); - size_t getUllsStateSize(); - void dispatchUllsState(); - void dispatchSwitchRingBufferSection(uint64_t nextBufferGpuAddress); size_t getSizeSwitchRingBufferSection(); diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 62b3c0af2a..2bc82dfeac 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -499,6 +499,55 @@ bool DirectSubmissionHw::initialize(bool submitOnInit, bo return ret; } +template +bool DirectSubmissionHw::startRingBuffer() { + if (ringStart) { + return true; + } + + size_t startSize = getSizeSemaphoreSection(false); + if (!this->partitionConfigSet) { + startSize += getSizePartitionRegisterConfigurationSection(); + } + if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) { + startSize += getSizeSystemMemoryFenceAddress(); + } + if (this->relaxedOrderingEnabled && !this->relaxedOrderingInitialized) { + startSize += RelaxedOrderingHelper::getSizeRegistersInit(); + } + + size_t requiredSize = startSize + getSizeDispatch(false, false, dispatchMonitorFenceRequired(true)) + getSizeEnd(false); + if (ringCommandStream.getAvailableSpace() < requiredSize) { + switchRingBuffers(nullptr); + } + uint64_t gpuStartVa = ringCommandStream.getCurrentGpuAddressPosition(); + + if (!this->partitionConfigSet) { + dispatchPartitionRegisterConfiguration(); + this->partitionConfigSet = true; + } + + if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) { + dispatchSystemMemoryFenceAddress(); + this->systemMemoryFenceAddressSet = true; + } + + if (this->relaxedOrderingEnabled && !this->relaxedOrderingInitialized) { + preinitializeRelaxedOrderingSections(); + dispatchStaticRelaxedOrderingScheduler(); + initRelaxedOrderingRegisters(); + + this->relaxedOrderingInitialized = true; + } + + currentQueueWorkCount++; + dispatchSemaphoreSection(currentQueueWorkCount); + + ringStart = submit(gpuStartVa, startSize); + + return ringStart; +} + template bool DirectSubmissionHw::stopRingBuffer(bool blocking) { if (!ringStart) { @@ -891,46 +940,15 @@ bool DirectSubmissionHw::copyCommandBufferIntoRing(BatchB return ret; } -template -size_t DirectSubmissionHw::getUllsStateSize() { - size_t startSize = 0u; - if (!this->partitionConfigSet) { - startSize += getSizePartitionRegisterConfigurationSection(); - } - if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) { - startSize += getSizeSystemMemoryFenceAddress(); - } - if (this->relaxedOrderingEnabled && !this->relaxedOrderingInitialized) { - startSize += RelaxedOrderingHelper::getSizeRegistersInit(); - } - return startSize; -} - -template -void DirectSubmissionHw::dispatchUllsState() { - if (!this->partitionConfigSet) { - dispatchPartitionRegisterConfiguration(); - this->partitionConfigSet = true; - } - if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) { - dispatchSystemMemoryFenceAddress(); - this->systemMemoryFenceAddressSet = true; - } - if (this->relaxedOrderingEnabled && !this->relaxedOrderingInitialized) { - preinitializeRelaxedOrderingSections(); - dispatchStaticRelaxedOrderingScheduler(); - initRelaxedOrderingRegisters(); - - this->relaxedOrderingInitialized = true; - } -} - template bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp) { if (batchBuffer.ringBufferRestartRequest) { this->stopRingBuffer(false); } + if (!this->startRingBuffer()) { + return false; + } lastSubmittedThrottle = batchBuffer.throttle; bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies); bool inputRequiredMonitorFence = false; @@ -941,7 +959,7 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe } bool dispatchMonitorFence = this->dispatchMonitorFenceRequired(inputRequiredMonitorFence); - size_t dispatchSize = this->getUllsStateSize() + getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies, dispatchMonitorFence); + size_t dispatchSize = getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies, dispatchMonitorFence); if (this->copyCommandBufferIntoRing(batchBuffer)) { dispatchSize += (batchBuffer.stream->getUsed() - batchBuffer.startOffset) - 2 * getSizeStartSection(); @@ -960,14 +978,8 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe } } - auto needStart = !this->ringStart; - this->ringStart = true; - auto startVA = ringCommandStream.getCurrentGpuAddressPosition(); - this->switchRingBuffersNeeded(requiredMinimalSize, batchBuffer.allocationsForResidency); - this->dispatchUllsState(); - if (this->relaxedOrderingEnabled && batchBuffer.hasStallingCmds && this->relaxedOrderingSchedulerRequired) { dispatchRelaxedOrderingQueueStall(); } @@ -979,10 +991,9 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe void *currentPosition = dispatchWorkloadSection(batchBuffer, dispatchMonitorFence); cpuCachelineFlush(currentPosition, dispatchSize); + handleResidency(); - if (!this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize)) { - return false; - } + this->unblockGpu(); cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize); currentQueueWorkCount++; @@ -997,17 +1008,6 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe return ringStart; } -template -bool DirectSubmissionHw::submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size) { - if (needStart) { - return this->submit(gpuAddress, size); - } else { - handleResidency(); - this->unblockGpu(); - return true; - } -} - template inline void DirectSubmissionHw::setReturnAddress(void *returnCmd, uint64_t returnAddress) { using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; diff --git a/shared/source/direct_submission/windows/wddm_direct_submission.inl b/shared/source/direct_submission/windows/wddm_direct_submission.inl index 6efb1df07f..b9c8fcb30d 100644 --- a/shared/source/direct_submission/windows/wddm_direct_submission.inl +++ b/shared/source/direct_submission/windows/wddm_direct_submission.inl @@ -55,9 +55,7 @@ WddmDirectSubmission::~WddmDirectSubmission() { template inline void WddmDirectSubmission::flushMonitorFence() { - auto needStart = !this->ringStart; - this->ringStart = true; - auto startVA = this->ringCommandStream.getCurrentGpuAddressPosition(); + this->startRingBuffer(); size_t requiredMinimalSize = this->getSizeSemaphoreSection(false) + Dispatcher::getSizeMonitorFence(this->rootDeviceEnvironment) + @@ -73,7 +71,8 @@ inline void WddmDirectSubmission::flushMonitorFence() { Dispatcher::dispatchMonitorFence(this->ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, this->rootDeviceEnvironment, this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired); this->dispatchSemaphoreSection(this->currentQueueWorkCount + 1); - this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize); + this->handleResidency(); + this->unblockGpu(); this->currentQueueWorkCount++; this->updateTagValueImpl(this->currentRingBuffer); diff --git a/shared/test/common/mocks/mock_direct_submission_hw.h b/shared/test/common/mocks/mock_direct_submission_hw.h index f922c9a443..a535b3f3ea 100644 --- a/shared/test/common/mocks/mock_direct_submission_hw.h +++ b/shared/test/common/mocks/mock_direct_submission_hw.h @@ -40,7 +40,6 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::dispatchSemaphoreSection; using BaseClass::dispatchStartSection; using BaseClass::dispatchSwitchRingBufferSection; - using BaseClass::dispatchUllsState; using BaseClass::dispatchWorkloadSection; using BaseClass::getDiagnosticModeSection; using BaseClass::getSizeDisablePrefetcher; @@ -80,6 +79,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::semaphorePtr; using BaseClass::semaphores; using BaseClass::setReturnAddress; + using BaseClass::startRingBuffer; using BaseClass::stopRingBuffer; using BaseClass::switchRingBuffersAllocations; using BaseClass::switchRingBuffersNeeded; diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp index fa91249379..aba6383d92 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp @@ -375,6 +375,74 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSubmitFailWhenRingIsStartedT EXPECT_NE(0u, directSubmission.ringCommandStream.getUsed()); } +HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsStartedThenExpectNoStartCommandsDispatched) { + MockDirectSubmissionHw> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + + bool ret = directSubmission.initialize(true, false); + EXPECT_TRUE(ret); + size_t usedSize = directSubmission.ringCommandStream.getUsed(); + + ret = directSubmission.startRingBuffer(); + EXPECT_TRUE(ret); + EXPECT_EQ(usedSize, directSubmission.ringCommandStream.getUsed()); +} + +HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsNotStartedThenExpectStartCommandsDispatched) { + MockDirectSubmissionHw> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + + bool ret = directSubmission.initialize(false, false); + EXPECT_TRUE(ret); + size_t usedSize = directSubmission.ringCommandStream.getUsed(); + + ret = directSubmission.startRingBuffer(); + EXPECT_TRUE(ret); + EXPECT_TRUE(directSubmission.ringStart); + EXPECT_NE(usedSize, directSubmission.ringCommandStream.getUsed()); +} + +HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsNotStartedSubmitFailThenExpectStartCommandsDispatchedRingNotStarted) { + MockDirectSubmissionHw> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + + bool ret = directSubmission.initialize(false, false); + EXPECT_TRUE(ret); + size_t usedSize = directSubmission.ringCommandStream.getUsed(); + + directSubmission.submitReturn = false; + ret = directSubmission.startRingBuffer(); + EXPECT_FALSE(ret); + EXPECT_FALSE(directSubmission.ringStart); + EXPECT_NE(usedSize, directSubmission.ringCommandStream.getUsed()); +} + +HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsNotStartedAndSwitchBufferIsNeededThenExpectRingAllocationChangedStartCommandsDispatched) { + MockDirectSubmissionHw> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + + bool ret = directSubmission.initialize(false, false); + EXPECT_TRUE(ret); + auto expectedRingBuffer = directSubmission.currentRingBuffer; + GraphicsAllocation *oldRingBuffer = directSubmission.ringCommandStream.getGraphicsAllocation(); + + auto requiredSize = directSubmission.getSizeSemaphoreSection(false); + if (directSubmission.miMemFenceRequired) { + requiredSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } + if (directSubmission.isRelaxedOrderingEnabled()) { + requiredSize += RelaxedOrderingHelper::getSizeRegistersInit(); + } + + directSubmission.ringCommandStream.getSpace(directSubmission.ringCommandStream.getAvailableSpace() - requiredSize); + + ret = directSubmission.startRingBuffer(); + auto actualRingBuffer = directSubmission.currentRingBuffer; + + EXPECT_TRUE(ret); + EXPECT_TRUE(directSubmission.ringStart); + EXPECT_NE(oldRingBuffer, directSubmission.ringCommandStream.getGraphicsAllocation()); + EXPECT_EQ(requiredSize, directSubmission.ringCommandStream.getUsed()); + + EXPECT_NE(expectedRingBuffer, actualRingBuffer); +} + HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStopWhenStopRingIsCalledThenExpectStopCommandDispatched) { MockDirectSubmissionHw> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index edb093857d..61ac184fd4 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -139,7 +139,7 @@ HWTEST_F(DirectSubmissionDispatchMiMemFenceTest, givenMiMemFenceSupportedWhenDis EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp)); - validateFenceProgramming(directSubmission, 1, 1); + validateFenceProgramming(directSubmission, 2, 1); EXPECT_EQ(miMemFenceSupported, directSubmission.systemMemoryFenceAddressSet); } @@ -155,7 +155,7 @@ HWTEST_F(DirectSubmissionDispatchMiMemFenceTest, givenMiMemFenceSupportedWhenSys EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp)); - validateFenceProgramming(directSubmission, 1, 0); + validateFenceProgramming(directSubmission, 2, 0); EXPECT_TRUE(directSubmission.systemMemoryFenceAddressSet); } @@ -651,20 +651,23 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, ret = directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); EXPECT_TRUE(ret); EXPECT_EQ(oldRingAllocation, directSubmission.ringCommandStream.getGraphicsAllocation()); - EXPECT_EQ(0u, directSubmission.semaphoreData->queueWorkCount); - EXPECT_EQ(2u, directSubmission.currentQueueWorkCount); + EXPECT_EQ(2u, directSubmission.semaphoreData->queueWorkCount); + EXPECT_EQ(3u, directSubmission.currentQueueWorkCount); EXPECT_EQ(1u, directSubmission.submitCount); - EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress); - EXPECT_EQ(1u, directSubmission.handleResidencyCount); - - size_t submitSize = directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler(); + size_t submitSize = directSubmission.getSizeSemaphoreSection(false); if (directSubmission.miMemFenceRequired) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { submitSize += RelaxedOrderingHelper::getSizeRegistersInit(); } - EXPECT_EQ(submitSize, directSubmission.ringCommandStream.getUsed()); + EXPECT_EQ(submitSize, directSubmission.submitSize); + EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress); + EXPECT_EQ(2u, directSubmission.handleResidencyCount); + + size_t dispatchSize = submitSize + directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler(); + + EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed()); EXPECT_TRUE(directSubmission.ringStart); } @@ -729,19 +732,22 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, ret = directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); EXPECT_TRUE(ret); EXPECT_NE(oldRingAllocation, directSubmission.ringCommandStream.getGraphicsAllocation()); - EXPECT_EQ(0u, directSubmission.semaphoreData->queueWorkCount); - EXPECT_EQ(2u, directSubmission.currentQueueWorkCount); + EXPECT_EQ(2u, directSubmission.semaphoreData->queueWorkCount); + EXPECT_EQ(3u, directSubmission.currentQueueWorkCount); EXPECT_EQ(1u, directSubmission.submitCount); - EXPECT_EQ(1u, directSubmission.handleResidencyCount); - - size_t submitSize = directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler(); + size_t submitSize = directSubmission.getSizeSemaphoreSection(false); if (directSubmission.miMemFenceRequired) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { submitSize += RelaxedOrderingHelper::getSizeRegistersInit(); } - EXPECT_EQ(submitSize, directSubmission.ringCommandStream.getUsed()); + EXPECT_EQ(submitSize, directSubmission.submitSize); + EXPECT_EQ(2u, directSubmission.handleResidencyCount); + + size_t dispatchSize = submitSize + directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler(); + + EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed()); EXPECT_TRUE(directSubmission.ringStart); } @@ -867,8 +873,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, EXPECT_FALSE(directSubmission.ringStart); EXPECT_EQ(0x0u, directSubmission.ringCommandStream.getUsed()); - directSubmission.dispatchUllsState(); + ret = directSubmission.startRingBuffer(); + EXPECT_TRUE(ret); EXPECT_TRUE(directSubmission.partitionConfigSet); + EXPECT_TRUE(directSubmission.ringStart); HardwareParse hwParse; hwParse.parseCommands(directSubmission.ringCommandStream, 0); @@ -1636,10 +1644,13 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStat directSubmission.initialize(false, false); EXPECT_EQ(0u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); - directSubmission.dispatchUllsState(); + directSubmission.startRingBuffer(); EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); + directSubmission.startRingBuffer(); + EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); + directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); } @@ -1708,7 +1719,7 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitialize size_t offset = directSubmission.ringCommandStream.getUsed(); - directSubmission.dispatchUllsState(); + directSubmission.startRingBuffer(); EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, offset)); EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled); @@ -1719,7 +1730,7 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitialize directSubmission.initialize(false, false); EXPECT_EQ(0u, directSubmission.preinitializeRelaxedOrderingSectionsCalled); - directSubmission.dispatchUllsState(); + directSubmission.startRingBuffer(); EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled); EXPECT_TRUE(directSubmission.relaxedOrderingInitialized); @@ -1727,7 +1738,7 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitialize EXPECT_NE(nullptr, directSubmission.preinitializedRelaxedOrderingScheduler.get()); size_t offset = directSubmission.ringCommandStream.getUsed(); - directSubmission.dispatchUllsState(); + directSubmission.startRingBuffer(); EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, offset)); EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled); }