diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 533d341ce4..be7b38071a 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -386,6 +386,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrdering, -1, "-1: defaul DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingForBcs, -1, "-1: default, 0 - disable, 1 - enable. If set, enable RelaxedOrdering feature for BCS engine") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, -1, "-1: default, >0: Max gpu queue size. If limit is reached, scheduler wont consume new work") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingMinNumberOfClients, -1, "-1: default, >0: Enables RelaxedOrdering mode only if specified number of clients is assigned to given CSR.") +DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMonitorFenceInputPolicy, -1, "-1: default, 0: stalling command flag, 1: explicit monitor fence flag. Selects policy to dispatch monitor fence upon input flag, either for every stalling command or explicit motor fence dispatch") DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers") /*FEATURE FLAGS*/ diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 51cd153b32..784c32dfb4 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -244,5 +244,6 @@ class DirectSubmissionHw { bool relaxedOrderingEnabled = false; bool relaxedOrderingInitialized = false; bool relaxedOrderingSchedulerRequired = false; + bool inputMonitorFenceDispatchRequirement = false; }; } // namespace NEO diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index f20787ed52..c23d7e26c8 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -80,6 +80,10 @@ DirectSubmissionHw::DirectSubmissionHw(const DirectSubmis sfenceMode = static_cast(DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get()); } + if (DebugManager.flags.DirectSubmissionMonitorFenceInputPolicy.get() != -1) { + this->inputMonitorFenceDispatchRequirement = !!(DebugManager.flags.DirectSubmissionMonitorFenceInputPolicy.get()); + } + int32_t disableCacheFlushKey = DebugManager.flags.DirectSubmissionDisableCpuCacheFlush.get(); if (disableCacheFlushKey != -1) { disableCpuCacheFlush = disableCacheFlushKey == 1 ? true : false; @@ -947,7 +951,13 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe } bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies); - bool dispatchMonitorFence = this->dispatchMonitorFenceRequired(batchBuffer.hasStallingCmds); + bool inputRequiredMonitorFence = false; + if (this->inputMonitorFenceDispatchRequirement) { + inputRequiredMonitorFence = batchBuffer.dispatchMonitorFence; + } else { + inputRequiredMonitorFence = batchBuffer.hasStallingCmds; + } + bool dispatchMonitorFence = this->dispatchMonitorFenceRequired(inputRequiredMonitorFence); size_t dispatchSize = getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies, dispatchMonitorFence); @@ -989,7 +999,7 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe currentQueueWorkCount++; DirectSubmissionDiagnostics::diagnosticModeOneSubmit(diagnostic.get()); - uint64_t flushValue = updateTagValue(batchBuffer.hasStallingCmds); + uint64_t flushValue = updateTagValue(dispatchMonitorFence); if (flushValue == DirectSubmissionHw::updateTagValueFail) { return false; } diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index 446ab5e31c..8165a9bd6c 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -212,7 +212,7 @@ void DrmDirectSubmission::handleSwitchRingBuffers() { template uint64_t DrmDirectSubmission::updateTagValue(bool requireMonitorFence) { - if (!this->disableMonitorFence) { + if (requireMonitorFence) { this->currentTagData.tagValue++; this->ringBuffers[this->currentRingBuffer].completionFence = this->currentTagData.tagValue; } diff --git a/shared/source/direct_submission/windows/wddm_direct_submission.inl b/shared/source/direct_submission/windows/wddm_direct_submission.inl index 11fa809fec..89175a86e7 100644 --- a/shared/source/direct_submission/windows/wddm_direct_submission.inl +++ b/shared/source/direct_submission/windows/wddm_direct_submission.inl @@ -153,7 +153,7 @@ uint64_t WddmDirectSubmission::updateTagValue(bool requir } } - if (!this->disableMonitorFence || requireMonitorFence) { + if (requireMonitorFence) { return this->updateTagValueImpl(); } return 0ull; diff --git a/shared/test/common/mocks/mock_direct_submission_hw.h b/shared/test/common/mocks/mock_direct_submission_hw.h index 87fa643e62..af6cbaa836 100644 --- a/shared/test/common/mocks/mock_direct_submission_hw.h +++ b/shared/test/common/mocks/mock_direct_submission_hw.h @@ -55,6 +55,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::getSizeSystemMemoryFenceAddress; using BaseClass::hwInfo; using BaseClass::immWritePostSyncOffset; + using BaseClass::inputMonitorFenceDispatchRequirement; using BaseClass::isDisablePrefetcherRequired; using BaseClass::miMemFenceRequired; using BaseClass::osContext; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index af7723fec8..426053b79a 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -117,6 +117,7 @@ USMEvictAfterMigration = 0 EnableDirectSubmissionController = -1 DirectSubmissionControllerTimeout = -1 DirectSubmissionControllerDivisor = -1 +DirectSubmissionMonitorFenceInputPolicy = -1 UseVmBind = -1 EnableNullHardware = 0 ForceLinearImages = 0 diff --git a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp index b9313b5176..d1599cd0b3 100644 --- a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp @@ -10,6 +10,7 @@ #include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h" #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/direct_submission/linux/drm_direct_submission.h" +#include "shared/source/helpers/flush_stamp.h" #include "shared/source/os_interface/linux/drm_gem_close_worker.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/linux/sys_calls.h" @@ -73,6 +74,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); + EXPECT_FALSE(drmDirectSubmission.inputMonitorFenceDispatchRequirement); +} + +HWTEST_F(DrmDirectSubmissionTest, + givenDrmDirectSubmissionWithStallingCommandInputMonitorFencePolicyWhenDispatchingWorkloadWithDisabledMonitorFenceThenDrmIgnoresInputFlag) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; + + DebugManagerStateRestore dbgRestorer; + DebugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(0); + + MockDrmDirectSubmission> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); + EXPECT_FALSE(drmDirectSubmission.inputMonitorFenceDispatchRequirement); + drmDirectSubmission.disableMonitorFence = true; + + FlushStampTracker flushStamp(true); + + EXPECT_TRUE(drmDirectSubmission.initialize(false, false)); + + BatchBuffer batchBuffer = {}; + GraphicsAllocation *commandBuffer = nullptr; + LinearStream stream; + + const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000, + AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}; + commandBuffer = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties); + + stream.replaceGraphicsAllocation(commandBuffer); + stream.replaceBuffer(commandBuffer->getUnderlyingBuffer(), commandBuffer->getUnderlyingBufferSize()); + stream.getSpace(0x20); + + memset(stream.getCpuBase(), 0, 0x20); + + batchBuffer.endCmdPtr = ptrOffset(stream.getCpuBase(), 0x20); + batchBuffer.commandBufferAllocation = commandBuffer; + batchBuffer.usedSize = 0x40; + batchBuffer.taskStartAddress = 0x881112340000; + batchBuffer.stream = &stream; + batchBuffer.hasStallingCmds = true; + + EXPECT_TRUE(drmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp)); + + HardwareParse hwParse; + hwParse.parsePipeControl = true; + hwParse.parseCommands(drmDirectSubmission.ringCommandStream, 0); + hwParse.findHardwareCommands(); + + bool foundFenceUpdate = false; + for (auto &it : hwParse.pipeControlList) { + PIPE_CONTROL *pipeControl = reinterpret_cast(it); + if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + foundFenceUpdate = true; + break; + } + } + EXPECT_FALSE(foundFenceUpdate); + + executionEnvironment.memoryManager->freeGraphicsMemory(commandBuffer); + *drmDirectSubmission.tagAddress = 1; +} + +HWTEST_F(DrmDirectSubmissionTest, + givenDrmDirectSubmissionWithExplicitFlagInputMonitorFencePolicyWhenDispatchingWorkloadWithDisabledMonitorFenceThenDrmIgnoresInputFlag) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; + + DebugManagerStateRestore dbgRestorer; + DebugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(1); + + MockDrmDirectSubmission> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); + EXPECT_TRUE(drmDirectSubmission.inputMonitorFenceDispatchRequirement); + drmDirectSubmission.disableMonitorFence = true; + + FlushStampTracker flushStamp(true); + + EXPECT_TRUE(drmDirectSubmission.initialize(false, false)); + + BatchBuffer batchBuffer = {}; + GraphicsAllocation *commandBuffer = nullptr; + LinearStream stream; + + const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000, + AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}; + commandBuffer = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties); + + stream.replaceGraphicsAllocation(commandBuffer); + stream.replaceBuffer(commandBuffer->getUnderlyingBuffer(), commandBuffer->getUnderlyingBufferSize()); + stream.getSpace(0x20); + + memset(stream.getCpuBase(), 0, 0x20); + + batchBuffer.endCmdPtr = ptrOffset(stream.getCpuBase(), 0x20); + batchBuffer.commandBufferAllocation = commandBuffer; + batchBuffer.usedSize = 0x40; + batchBuffer.taskStartAddress = 0x881112340000; + batchBuffer.stream = &stream; + batchBuffer.dispatchMonitorFence = true; + + EXPECT_TRUE(drmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp)); + + HardwareParse hwParse; + hwParse.parsePipeControl = true; + hwParse.parseCommands(drmDirectSubmission.ringCommandStream, 0); + hwParse.findHardwareCommands(); + + bool foundFenceUpdate = false; + for (auto &it : hwParse.pipeControlList) { + PIPE_CONTROL *pipeControl = reinterpret_cast(it); + if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + foundFenceUpdate = true; + break; + } + } + EXPECT_FALSE(foundFenceUpdate); + + executionEnvironment.memoryManager->freeGraphicsMemory(commandBuffer); + *drmDirectSubmission.tagAddress = 1; +} diff --git a/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp index dd771df140..c6ea8844a4 100644 --- a/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp @@ -448,7 +448,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenUpdatingTagValueThenExpectcomple MockWddmDirectSubmission> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); EXPECT_TRUE(wddmDirectSubmission.allocateOsResources()); - uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(false); + uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(wddmDirectSubmission.dispatchMonitorFenceRequired(false)); EXPECT_EQ(value, actualTagValue); EXPECT_EQ(value + 1, contextFence.currentFenceValue); EXPECT_EQ(value, wddmDirectSubmission.ringBuffers[wddmDirectSubmission.currentRingBuffer].completionFence); @@ -465,7 +465,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmDisableMonitorFenceWhenUpdatingTagVa wddmDirectSubmission.disableMonitorFence = true; EXPECT_TRUE(wddmDirectSubmission.allocateOsResources()); - uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(false); + uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(wddmDirectSubmission.dispatchMonitorFenceRequired(false)); EXPECT_EQ(0ull, actualTagValue); EXPECT_EQ(value, contextFence.currentFenceValue); } @@ -481,7 +481,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmDisableMonitorFenceAndStallingCmdsWh wddmDirectSubmission.disableMonitorFence = true; EXPECT_TRUE(wddmDirectSubmission.allocateOsResources()); - uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(true); + uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(wddmDirectSubmission.dispatchMonitorFenceRequired(true)); EXPECT_EQ(value, actualTagValue); EXPECT_EQ(value + 1, contextFence.currentFenceValue); EXPECT_EQ(value, wddmDirectSubmission.ringBuffers[wddmDirectSubmission.currentRingBuffer].completionFence); @@ -740,6 +740,7 @@ HWTEST_F(WddmDirectSubmissionTest, FlushStampTracker flushStamp(true); MockWddmDirectSubmission wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); + EXPECT_FALSE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement); wddmDirectSubmission.disableMonitorFence = false; bool ret = wddmDirectSubmission.initialize(true, true); @@ -813,3 +814,129 @@ HWTEST_F(WddmDirectSubmissionTest, EXPECT_TRUE(wddmDirectSubmission.dispatchMonitorFenceRequired(true)); } + +HWTEST_F(WddmDirectSubmissionTest, + givenWddmDirectSubmissionWithDisabledMonitorFenceWhenInputPolicyIsStallingCommandAndBatchBufferDispatchedWithExplicitMonitorFenceFlagThenDispatchNoPostSyncOperation) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using Dispatcher = RenderDispatcher; + + DebugManagerStateRestore dbgRestorer; + DebugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(0); + + BatchBuffer batchBuffer = {}; + GraphicsAllocation *clientCommandBuffer = nullptr; + std::unique_ptr clientStream; + + auto memoryManager = executionEnvironment->memoryManager.get(); + const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000, + AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}; + clientCommandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties); + ASSERT_NE(nullptr, clientCommandBuffer); + + clientStream = std::make_unique(clientCommandBuffer); + clientStream->getSpace(0x40); + + memset(clientStream->getCpuBase(), 0, 0x20); + + batchBuffer.endCmdPtr = ptrOffset(clientStream->getCpuBase(), 0x20); + batchBuffer.commandBufferAllocation = clientCommandBuffer; + batchBuffer.usedSize = 0x40; + batchBuffer.taskStartAddress = clientCommandBuffer->getGpuAddress(); + batchBuffer.stream = clientStream.get(); + batchBuffer.dispatchMonitorFence = true; + + FlushStampTracker flushStamp(true); + + MockWddmDirectSubmission wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); + EXPECT_FALSE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement); + wddmDirectSubmission.disableMonitorFence = true; + + bool ret = wddmDirectSubmission.initialize(true, true); + EXPECT_TRUE(ret); + EXPECT_TRUE(wddmDirectSubmission.useNotifyForPostSync); + + size_t sizeUsedBefore = wddmDirectSubmission.ringCommandStream.getUsed(); + ret = wddmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); + EXPECT_TRUE(ret); + + HardwareParse hwParse; + hwParse.parsePipeControl = true; + hwParse.parseCommands(wddmDirectSubmission.ringCommandStream, sizeUsedBefore); + hwParse.findHardwareCommands(); + + bool foundFenceUpdate = false; + for (auto it = hwParse.pipeControlList.begin(); it != hwParse.pipeControlList.end(); it++) { + auto pipeControl = genCmdCast(*it); + if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + foundFenceUpdate = true; + break; + } + } + EXPECT_FALSE(foundFenceUpdate); + + memoryManager->freeGraphicsMemory(clientCommandBuffer); +} + +HWTEST_F(WddmDirectSubmissionTest, + givenWddmDirectSubmissionWithDisabledMonitorFenceWhenInputPolicyIsExplicitMonitorFenceAndBatchBufferDispatchedWithStallingCommandFlagThenDispatchNoPostSyncOperation) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using Dispatcher = RenderDispatcher; + + DebugManagerStateRestore dbgRestorer; + DebugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(1); + + BatchBuffer batchBuffer = {}; + GraphicsAllocation *clientCommandBuffer = nullptr; + std::unique_ptr clientStream; + + auto memoryManager = executionEnvironment->memoryManager.get(); + const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000, + AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}; + clientCommandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties); + ASSERT_NE(nullptr, clientCommandBuffer); + + clientStream = std::make_unique(clientCommandBuffer); + clientStream->getSpace(0x40); + + memset(clientStream->getCpuBase(), 0, 0x20); + + batchBuffer.endCmdPtr = ptrOffset(clientStream->getCpuBase(), 0x20); + batchBuffer.commandBufferAllocation = clientCommandBuffer; + batchBuffer.usedSize = 0x40; + batchBuffer.taskStartAddress = clientCommandBuffer->getGpuAddress(); + batchBuffer.stream = clientStream.get(); + batchBuffer.hasStallingCmds = true; + + FlushStampTracker flushStamp(true); + + MockWddmDirectSubmission wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver); + EXPECT_TRUE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement); + wddmDirectSubmission.disableMonitorFence = true; + + bool ret = wddmDirectSubmission.initialize(true, true); + EXPECT_TRUE(ret); + EXPECT_TRUE(wddmDirectSubmission.useNotifyForPostSync); + + size_t sizeUsedBefore = wddmDirectSubmission.ringCommandStream.getUsed(); + ret = wddmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); + EXPECT_TRUE(ret); + + HardwareParse hwParse; + hwParse.parsePipeControl = true; + hwParse.parseCommands(wddmDirectSubmission.ringCommandStream, sizeUsedBefore); + hwParse.findHardwareCommands(); + + bool foundFenceUpdate = false; + for (auto it = hwParse.pipeControlList.begin(); it != hwParse.pipeControlList.end(); it++) { + auto pipeControl = genCmdCast(*it); + if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + foundFenceUpdate = true; + break; + } + } + EXPECT_FALSE(foundFenceUpdate); + + memoryManager->freeGraphicsMemory(clientCommandBuffer); +} diff --git a/shared/test/unit_test/mocks/windows/mock_wddm_direct_submission.h b/shared/test/unit_test/mocks/windows/mock_wddm_direct_submission.h index f862b66d52..f9f0d00ba2 100644 --- a/shared/test/unit_test/mocks/windows/mock_wddm_direct_submission.h +++ b/shared/test/unit_test/mocks/windows/mock_wddm_direct_submission.h @@ -34,6 +34,7 @@ struct MockWddmDirectSubmission : public WddmDirectSubmission