diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index f7ad1fb8ee..281a6fb76c 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -67,7 +67,7 @@ struct DirectSubmissionInputParams : NonCopyableClass { OsContext &osContext; const RootDeviceEnvironment &rootDeviceEnvironment; MemoryManager *memoryManager = nullptr; - const GraphicsAllocation *globalFenceAllocation = nullptr; + GraphicsAllocation *globalFenceAllocation = nullptr; GraphicsAllocation *workPartitionAllocation = nullptr; GraphicsAllocation *completionFenceAllocation = nullptr; TaskCountType initialCompletionFenceValue = 0; @@ -192,6 +192,7 @@ class DirectSubmissionHw { void updateRelaxedOrderingQueueSize(uint32_t newSize); + virtual void makeGlobalFenceAlwaysResident(){}; struct RingBufferUse { RingBufferUse() = default; RingBufferUse(FlushStamp completionFence, GraphicsAllocation *ringBuffer) : completionFence(completionFence), ringBuffer(ringBuffer){}; @@ -223,7 +224,7 @@ class DirectSubmissionHw { MemoryOperationsHandler *memoryOperationHandler = nullptr; const HardwareInfo *hwInfo = nullptr; const RootDeviceEnvironment &rootDeviceEnvironment; - const GraphicsAllocation *globalFenceAllocation = nullptr; + GraphicsAllocation *globalFenceAllocation = nullptr; GraphicsAllocation *completionFenceAllocation = nullptr; GraphicsAllocation *semaphores = nullptr; GraphicsAllocation *workPartitionAllocation = nullptr; diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index a07d972206..a149b3e545 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -1241,6 +1241,7 @@ size_t DirectSubmissionHw::getDiagnosticModeSection() { template void DirectSubmissionHw::dispatchSystemMemoryFenceAddress() { + this->makeGlobalFenceAlwaysResident(); EncodeMemoryFence::encodeSystemMemoryFence(ringCommandStream, this->globalFenceAllocation); } diff --git a/shared/source/direct_submission/windows/wddm_direct_submission.h b/shared/source/direct_submission/windows/wddm_direct_submission.h index b7999f72c3..a98884801b 100644 --- a/shared/source/direct_submission/windows/wddm_direct_submission.h +++ b/shared/source/direct_submission/windows/wddm_direct_submission.h @@ -42,6 +42,7 @@ class WddmDirectSubmission : public DirectSubmissionHw { void getTagAddressValue(TagData &tagData) override; bool isCompleted(uint32_t ringBufferIndex) override; MOCKABLE_VIRTUAL void updateMonitorFenceValueForResidencyList(ResidencyContainer *allocationsForResidency); + void makeGlobalFenceAlwaysResident() override; OsContextWin *osContextWin; Wddm *wddm; diff --git a/shared/source/direct_submission/windows/wddm_direct_submission.inl b/shared/source/direct_submission/windows/wddm_direct_submission.inl index bffb61438a..3c99afcdc0 100644 --- a/shared/source/direct_submission/windows/wddm_direct_submission.inl +++ b/shared/source/direct_submission/windows/wddm_direct_submission.inl @@ -219,4 +219,13 @@ inline void WddmDirectSubmission::unblockPagingFenceSemap this->semaphoreData->pagingFenceCounter = static_cast(*this->wddm->getPagingFenceAddress()); } +template +inline void WddmDirectSubmission::makeGlobalFenceAlwaysResident() { + if (this->globalFenceAllocation != nullptr) { + DirectSubmissionAllocations allocations; + allocations.push_back(this->globalFenceAllocation); + UNRECOVERABLE_IF(!this->makeResourcesResident(allocations)); + } +} + } // namespace NEO diff --git a/shared/source/os_interface/windows/wddm_device_command_stream.inl b/shared/source/os_interface/windows/wddm_device_command_stream.inl index 5d14e0b6fb..54003c31b0 100644 --- a/shared/source/os_interface/windows/wddm_device_command_stream.inl +++ b/shared/source/os_interface/windows/wddm_device_command_stream.inl @@ -189,9 +189,10 @@ template void WddmCommandStreamReceiver::flushMonitorFence() { if (this->directSubmission.get()) { this->directSubmission->flushMonitorFence(); + } else if (this->blitterDirectSubmission.get()) { + this->blitterDirectSubmission->flushMonitorFence(); } } - template void WddmCommandStreamReceiver::kmDafLockAllocations(ResidencyContainer &allocationsForResidency) { for (auto &graphicsAllocation : allocationsForResidency) { diff --git a/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp index 7c88cbf738..345db974f8 100644 --- a/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp @@ -8,6 +8,7 @@ #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/os_interface/windows/sys_calls.h" @@ -24,6 +25,7 @@ #include "shared/test/common/test_macros/hw_test.h" #include "shared/test/unit_test/direct_submission/direct_submission_controller_mock.h" #include "shared/test/unit_test/mocks/windows/mock_wddm_direct_submission.h" + extern uint64_t cpuFence; namespace NEO { @@ -82,6 +84,9 @@ struct WddmDirectSubmissionWithMockGdiDllFixture : public WddmFixtureWithMockGdi using WddmDirectSubmissionWithMockGdiDllTest = Test; HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThenExpectProperCommandsDispatched) { + DebugManagerStateRestore restorer; + debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0); + std::unique_ptr>> wddmDirectSubmission = std::make_unique>>(*device->getDefaultEngine().commandStreamReceiver); @@ -115,6 +120,33 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThe EXPECT_EQ(1u, wddmMockInterface->destroyMonitorFenceCalled); } +HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedWithMiMemFenceSupportedThenMakeGlobalFenceResident) { + DebugManagerStateRestore restorer; + debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1); + std::unique_ptr>> wddmDirectSubmission = + std::make_unique>>(*device->getDefaultEngine().commandStreamReceiver); + + EXPECT_EQ(1u, wddmDirectSubmission->commandBufferHeader->NeedsMidBatchPreEmptionSupport); + + bool ret = wddmDirectSubmission->initialize(true, false); + EXPECT_TRUE(ret); + EXPECT_TRUE(wddmDirectSubmission->ringStart); + + auto isFenceRequired = device->getGfxCoreHelper().isFenceAllocationRequired(device->getHardwareInfo()); + auto &compilerProductHelper = device->getCompilerProductHelper(); + auto isHeaplessStateInit = compilerProductHelper.isHeaplessStateInitEnabled(compilerProductHelper.isHeaplessModeEnabled()); + if (isFenceRequired && !isHeaplessStateInit) { + EXPECT_EQ(1u, wddm->makeResidentResult.handleCount); + EXPECT_TRUE(device->getDefaultEngine().commandStreamReceiver->getGlobalFenceAllocation()->isExplicitlyMadeResident()); + } + *wddmDirectSubmission->ringFence.cpuAddress = 1ull; + wddmDirectSubmission->ringBuffers[wddmDirectSubmission->currentRingBuffer].completionFence = 2ull; + + wddmDirectSubmission.reset(nullptr); + EXPECT_EQ(1u, wddm->waitFromCpuResult.called); + EXPECT_EQ(1u, wddmMockInterface->destroyMonitorFenceCalled); +} + using WddmDirectSubmissionNoPreemptionTest = WddmDirectSubmissionFixture; HWTEST_F(WddmDirectSubmissionNoPreemptionTest, givenWddmWhenDirectIsInitializedAndNotStartedThenExpectNoCommandsDispatched) { std::unique_ptr>> wddmDirectSubmission = diff --git a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp index cc8b59feb5..7635122da2 100644 --- a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp @@ -1103,10 +1103,10 @@ HWTEST_F(WddmCsrCompressionTests, givenDisabledCompressionWhenFlushingThenDontIn memoryManager->freeGraphicsMemory(graphicsAllocation); } -template -struct MockWddmDrmDirectSubmissionDispatchCommandBuffer : public MockWddmDirectSubmission> { - MockWddmDrmDirectSubmissionDispatchCommandBuffer(const CommandStreamReceiver &commandStreamReceiver) - : MockWddmDirectSubmission>(commandStreamReceiver) { +template +struct MockWddmDrmDirectSubmissionDispatchCommandBuffer : public MockWddmDirectSubmission { + MockWddmDrmDirectSubmissionDispatchCommandBuffer(const CommandStreamReceiver &commandStreamReceiver) + : MockWddmDirectSubmission(commandStreamReceiver) { } bool dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp) override { @@ -1123,7 +1123,8 @@ struct MockWddmDrmDirectSubmissionDispatchCommandBuffer : public MockWddmDirectS }; HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenCsrWhenFlushMonitorFenceThenFlushMonitorFenceOnDirectSubmission) { - using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer; + using Dispatcher = RenderDispatcher; + using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer; auto mockCsr = static_cast *>(csr); debugManager.flags.EnableDirectSubmission.set(1); @@ -1156,8 +1157,34 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenCsrWhenFlushMonitorFenceTh EXPECT_EQ(directSubmission->flushMonitorFenceCalled, 1u); } +HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnBcsWhenCsrFlushMonitorFenceCalledThenFlushCalled) { + using Dispatcher = BlitterDispatcher; + using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer; + + auto mockCsr = static_cast *>(csr); + OsContextWin bcsOsContext(*wddm, 0, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::regular})); + bcsOsContext.ensureContextInitialized(false); + mockCsr->setupContext(bcsOsContext); + + debugManager.flags.EnableDirectSubmission.set(1); + debugManager.flags.DirectSubmissionFlatRingBuffer.set(0); + + auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); + hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].engineSupported = true; + + mockCsr->blitterDirectSubmission = std::make_unique(*device->getDefaultEngine().commandStreamReceiver); + auto directSubmission = reinterpret_cast(mockCsr->blitterDirectSubmission.get()); + EXPECT_FALSE(csr->isDirectSubmissionEnabled()); + EXPECT_TRUE(csr->isBlitterDirectSubmissionEnabled()); + + EXPECT_EQ(directSubmission->flushMonitorFenceCalled, 0u); + csr->flushMonitorFence(); + EXPECT_EQ(directSubmission->flushMonitorFenceCalled, 1u); +} + HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenLastSubmittedFenceLowerThanFenceValueToWaitWhenWaitFromCpuThenFlushMonitorFence) { - using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer; + using Dispatcher = RenderDispatcher; + using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer; auto mockCsr = static_cast *>(csr); debugManager.flags.EnableDirectSubmission.set(1); @@ -1197,7 +1224,8 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenLastSubmittedFenceLowerTha } HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionFailsThenFlushReturnsError) { - using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer; + using Dispatcher = RenderDispatcher; + using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer; auto mockCsr = static_cast *>(csr); bool renderStreamerFound = false; @@ -1544,4 +1572,4 @@ TEST_F(SemaphorWaitForResidencyTest, givenIllegalAllocationTypeThenDontSignalFla EXPECT_TRUE(batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling); memoryManager->freeGraphicsMemory(cmdBuffer); -} +} \ No newline at end of file