fix: call flushMonitorFence on BCS CSR

Related-To: NEO-12477

Also, make sure that global fence is always resident

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2024-12-03 11:40:08 +00:00
committed by Compute-Runtime-Automation
parent 0ecbc627bd
commit d5e5c8e59e
7 changed files with 84 additions and 11 deletions

View File

@@ -67,7 +67,7 @@ struct DirectSubmissionInputParams : NonCopyableClass {
OsContext &osContext;
const RootDeviceEnvironment &rootDeviceEnvironment;
MemoryManager *memoryManager = nullptr;
const GraphicsAllocation *globalFenceAllocation = nullptr;
GraphicsAllocation *globalFenceAllocation = nullptr;
GraphicsAllocation *workPartitionAllocation = nullptr;
GraphicsAllocation *completionFenceAllocation = nullptr;
TaskCountType initialCompletionFenceValue = 0;
@@ -192,6 +192,7 @@ class DirectSubmissionHw {
void updateRelaxedOrderingQueueSize(uint32_t newSize);
virtual void makeGlobalFenceAlwaysResident(){};
struct RingBufferUse {
RingBufferUse() = default;
RingBufferUse(FlushStamp completionFence, GraphicsAllocation *ringBuffer) : completionFence(completionFence), ringBuffer(ringBuffer){};
@@ -223,7 +224,7 @@ class DirectSubmissionHw {
MemoryOperationsHandler *memoryOperationHandler = nullptr;
const HardwareInfo *hwInfo = nullptr;
const RootDeviceEnvironment &rootDeviceEnvironment;
const GraphicsAllocation *globalFenceAllocation = nullptr;
GraphicsAllocation *globalFenceAllocation = nullptr;
GraphicsAllocation *completionFenceAllocation = nullptr;
GraphicsAllocation *semaphores = nullptr;
GraphicsAllocation *workPartitionAllocation = nullptr;

View File

@@ -1241,6 +1241,7 @@ size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getDiagnosticModeSection() {
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSystemMemoryFenceAddress() {
this->makeGlobalFenceAlwaysResident();
EncodeMemoryFence<GfxFamily>::encodeSystemMemoryFence(ringCommandStream, this->globalFenceAllocation);
}

View File

@@ -42,6 +42,7 @@ class WddmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
void getTagAddressValue(TagData &tagData) override;
bool isCompleted(uint32_t ringBufferIndex) override;
MOCKABLE_VIRTUAL void updateMonitorFenceValueForResidencyList(ResidencyContainer *allocationsForResidency);
void makeGlobalFenceAlwaysResident() override;
OsContextWin *osContextWin;
Wddm *wddm;

View File

@@ -219,4 +219,13 @@ inline void WddmDirectSubmission<GfxFamily, Dispatcher>::unblockPagingFenceSemap
this->semaphoreData->pagingFenceCounter = static_cast<uint32_t>(*this->wddm->getPagingFenceAddress());
}
template <typename GfxFamily, typename Dispatcher>
inline void WddmDirectSubmission<GfxFamily, Dispatcher>::makeGlobalFenceAlwaysResident() {
if (this->globalFenceAllocation != nullptr) {
DirectSubmissionAllocations allocations;
allocations.push_back(this->globalFenceAllocation);
UNRECOVERABLE_IF(!this->makeResourcesResident(allocations));
}
}
} // namespace NEO

View File

@@ -189,9 +189,10 @@ template <typename GfxFamily>
void WddmCommandStreamReceiver<GfxFamily>::flushMonitorFence() {
if (this->directSubmission.get()) {
this->directSubmission->flushMonitorFence();
} else if (this->blitterDirectSubmission.get()) {
this->blitterDirectSubmission->flushMonitorFence();
}
}
template <typename GfxFamily>
void WddmCommandStreamReceiver<GfxFamily>::kmDafLockAllocations(ResidencyContainer &allocationsForResidency) {
for (auto &graphicsAllocation : allocationsForResidency) {

View File

@@ -8,6 +8,7 @@
#include "shared/source/command_stream/submissions_aggregator.h"
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/flush_stamp.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/os_interface/windows/sys_calls.h"
@@ -24,6 +25,7 @@
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/unit_test/direct_submission/direct_submission_controller_mock.h"
#include "shared/test/unit_test/mocks/windows/mock_wddm_direct_submission.h"
extern uint64_t cpuFence;
namespace NEO {
@@ -82,6 +84,9 @@ struct WddmDirectSubmissionWithMockGdiDllFixture : public WddmFixtureWithMockGdi
using WddmDirectSubmissionWithMockGdiDllTest = Test<WddmDirectSubmissionWithMockGdiDllFixture>;
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThenExpectProperCommandsDispatched) {
DebugManagerStateRestore restorer;
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
std::unique_ptr<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>> wddmDirectSubmission =
std::make_unique<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>>(*device->getDefaultEngine().commandStreamReceiver);
@@ -115,6 +120,33 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThe
EXPECT_EQ(1u, wddmMockInterface->destroyMonitorFenceCalled);
}
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedWithMiMemFenceSupportedThenMakeGlobalFenceResident) {
DebugManagerStateRestore restorer;
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
std::unique_ptr<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>> wddmDirectSubmission =
std::make_unique<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>>(*device->getDefaultEngine().commandStreamReceiver);
EXPECT_EQ(1u, wddmDirectSubmission->commandBufferHeader->NeedsMidBatchPreEmptionSupport);
bool ret = wddmDirectSubmission->initialize(true, false);
EXPECT_TRUE(ret);
EXPECT_TRUE(wddmDirectSubmission->ringStart);
auto isFenceRequired = device->getGfxCoreHelper().isFenceAllocationRequired(device->getHardwareInfo());
auto &compilerProductHelper = device->getCompilerProductHelper();
auto isHeaplessStateInit = compilerProductHelper.isHeaplessStateInitEnabled(compilerProductHelper.isHeaplessModeEnabled());
if (isFenceRequired && !isHeaplessStateInit) {
EXPECT_EQ(1u, wddm->makeResidentResult.handleCount);
EXPECT_TRUE(device->getDefaultEngine().commandStreamReceiver->getGlobalFenceAllocation()->isExplicitlyMadeResident());
}
*wddmDirectSubmission->ringFence.cpuAddress = 1ull;
wddmDirectSubmission->ringBuffers[wddmDirectSubmission->currentRingBuffer].completionFence = 2ull;
wddmDirectSubmission.reset(nullptr);
EXPECT_EQ(1u, wddm->waitFromCpuResult.called);
EXPECT_EQ(1u, wddmMockInterface->destroyMonitorFenceCalled);
}
using WddmDirectSubmissionNoPreemptionTest = WddmDirectSubmissionFixture<PreemptionMode::Disabled>;
HWTEST_F(WddmDirectSubmissionNoPreemptionTest, givenWddmWhenDirectIsInitializedAndNotStartedThenExpectNoCommandsDispatched) {
std::unique_ptr<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>> wddmDirectSubmission =

View File

@@ -1103,10 +1103,10 @@ HWTEST_F(WddmCsrCompressionTests, givenDisabledCompressionWhenFlushingThenDontIn
memoryManager->freeGraphicsMemory(graphicsAllocation);
}
template <typename GfxFamily>
struct MockWddmDrmDirectSubmissionDispatchCommandBuffer : public MockWddmDirectSubmission<GfxFamily, RenderDispatcher<GfxFamily>> {
MockWddmDrmDirectSubmissionDispatchCommandBuffer<GfxFamily>(const CommandStreamReceiver &commandStreamReceiver)
: MockWddmDirectSubmission<GfxFamily, RenderDispatcher<GfxFamily>>(commandStreamReceiver) {
template <typename GfxFamily, typename Dispatcher>
struct MockWddmDrmDirectSubmissionDispatchCommandBuffer : public MockWddmDirectSubmission<GfxFamily, Dispatcher> {
MockWddmDrmDirectSubmissionDispatchCommandBuffer<GfxFamily, Dispatcher>(const CommandStreamReceiver &commandStreamReceiver)
: MockWddmDirectSubmission<GfxFamily, Dispatcher>(commandStreamReceiver) {
}
bool dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp) override {
@@ -1123,7 +1123,8 @@ struct MockWddmDrmDirectSubmissionDispatchCommandBuffer : public MockWddmDirectS
};
HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenCsrWhenFlushMonitorFenceThenFlushMonitorFenceOnDirectSubmission) {
using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer<FamilyType>;
using Dispatcher = RenderDispatcher<FamilyType>;
using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer<FamilyType, Dispatcher>;
auto mockCsr = static_cast<MockWddmCsr<FamilyType> *>(csr);
debugManager.flags.EnableDirectSubmission.set(1);
@@ -1156,8 +1157,34 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenCsrWhenFlushMonitorFenceTh
EXPECT_EQ(directSubmission->flushMonitorFenceCalled, 1u);
}
HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnBcsWhenCsrFlushMonitorFenceCalledThenFlushCalled) {
using Dispatcher = BlitterDispatcher<FamilyType>;
using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer<FamilyType, Dispatcher>;
auto mockCsr = static_cast<MockWddmCsr<FamilyType> *>(csr);
OsContextWin bcsOsContext(*wddm, 0, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::regular}));
bcsOsContext.ensureContextInitialized(false);
mockCsr->setupContext(bcsOsContext);
debugManager.flags.EnableDirectSubmission.set(1);
debugManager.flags.DirectSubmissionFlatRingBuffer.set(0);
auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo();
hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].engineSupported = true;
mockCsr->blitterDirectSubmission = std::make_unique<MockSubmission>(*device->getDefaultEngine().commandStreamReceiver);
auto directSubmission = reinterpret_cast<MockSubmission *>(mockCsr->blitterDirectSubmission.get());
EXPECT_FALSE(csr->isDirectSubmissionEnabled());
EXPECT_TRUE(csr->isBlitterDirectSubmissionEnabled());
EXPECT_EQ(directSubmission->flushMonitorFenceCalled, 0u);
csr->flushMonitorFence();
EXPECT_EQ(directSubmission->flushMonitorFenceCalled, 1u);
}
HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenLastSubmittedFenceLowerThanFenceValueToWaitWhenWaitFromCpuThenFlushMonitorFence) {
using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer<FamilyType>;
using Dispatcher = RenderDispatcher<FamilyType>;
using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer<FamilyType, Dispatcher>;
auto mockCsr = static_cast<MockWddmCsr<FamilyType> *>(csr);
debugManager.flags.EnableDirectSubmission.set(1);
@@ -1197,7 +1224,8 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenLastSubmittedFenceLowerTha
}
HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionFailsThenFlushReturnsError) {
using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer<FamilyType>;
using Dispatcher = RenderDispatcher<FamilyType>;
using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer<FamilyType, Dispatcher>;
auto mockCsr = static_cast<MockWddmCsr<FamilyType> *>(csr);
bool renderStreamerFound = false;
@@ -1544,4 +1572,4 @@ TEST_F(SemaphorWaitForResidencyTest, givenIllegalAllocationTypeThenDontSignalFla
EXPECT_TRUE(batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling);
memoryManager->freeGraphicsMemory(cmdBuffer);
}
}