fix: make global fence always resident on linux

Related-To: NEO-13843

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-03-04 10:03:04 +00:00
committed by Compute-Runtime-Automation
parent 1876a43024
commit efb814d979
9 changed files with 33 additions and 20 deletions

View File

@@ -194,7 +194,6 @@ class DirectSubmissionHw {
void updateRelaxedOrderingQueueSize(uint32_t newSize);
virtual void makeGlobalFenceAlwaysResident(){};
struct RingBufferUse {
RingBufferUse() = default;
RingBufferUse(FlushStamp completionFence, GraphicsAllocation *ringBuffer) : completionFence(completionFence), ringBuffer(ringBuffer){};

View File

@@ -361,6 +361,9 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
if (completionFenceAllocation != nullptr) {
allocations.push_back(completionFenceAllocation);
}
if (this->globalFenceAllocation != nullptr && this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) {
allocations.push_back(globalFenceAllocation);
}
if (this->relaxedOrderingEnabled) {
const AllocationProperties allocationProperties(rootDeviceIndex,
@@ -1239,7 +1242,6 @@ size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getDiagnosticModeSection() {
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSystemMemoryFenceAddress() {
this->makeGlobalFenceAlwaysResident();
EncodeMemoryFence<GfxFamily>::encodeSystemMemoryFence(ringCommandStream, this->globalFenceAllocation);
}

View File

@@ -42,7 +42,6 @@ class WddmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
void getTagAddressValue(TagData &tagData) override;
bool isCompleted(uint32_t ringBufferIndex) override;
MOCKABLE_VIRTUAL void updateMonitorFenceValueForResidencyList(ResidencyContainer *allocationsForResidency);
void makeGlobalFenceAlwaysResident() override;
OsContextWin *osContextWin;
Wddm *wddm;

View File

@@ -219,13 +219,4 @@ inline void WddmDirectSubmission<GfxFamily, Dispatcher>::unblockPagingFenceSemap
this->semaphoreData->pagingFenceCounter = static_cast<uint32_t>(*this->wddm->getPagingFenceAddress());
}
template <typename GfxFamily, typename Dispatcher>
inline void WddmDirectSubmission<GfxFamily, Dispatcher>::makeGlobalFenceAlwaysResident() {
if (this->globalFenceAllocation != nullptr) {
DirectSubmissionAllocations allocations;
allocations.push_back(this->globalFenceAllocation);
UNRECOVERABLE_IF(!this->makeResourcesResident(allocations));
}
}
} // namespace NEO

View File

@@ -54,6 +54,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::getSizeStartSection;
using BaseClass::getSizeSwitchRingBufferSection;
using BaseClass::getSizeSystemMemoryFenceAddress;
using BaseClass::globalFenceAllocation;
using BaseClass::hwInfo;
using BaseClass::immWritePostSyncOffset;
using BaseClass::inputMonitorFenceDispatchRequirement;

View File

@@ -245,6 +245,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithoutCompletionFenceAlloca
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.reset(mockMemoryOperations.get());
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
auto miMemFenceRequired = directSubmission.globalFenceAllocation && directSubmission.miMemFenceRequired && !directSubmission.systemMemoryFenceAddressSet;
directSubmission.callBaseResident = true;
bool ret = directSubmission.initialize(true);
@@ -256,7 +257,9 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithoutCompletionFenceAlloca
if (gfxCoreHelper.isRelaxedOrderingSupported()) {
expectedAllocationsCnt += 2;
}
if (miMemFenceRequired) {
expectedAllocationsCnt++;
}
EXPECT_EQ(1, mockMemoryOperations->makeResidentCalledCount);
ASSERT_EQ(expectedAllocationsCnt, mockMemoryOperations->gfxAllocationsForMakeResident.size());
EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]);
@@ -273,6 +276,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithCompletionFenceAllocatio
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.reset(mockMemoryOperations.get());
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
auto miMemFenceRequired = directSubmission.globalFenceAllocation && directSubmission.miMemFenceRequired && !directSubmission.systemMemoryFenceAddressSet;
MockGraphicsAllocation completionFenceAllocation{};
@@ -288,13 +292,18 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithCompletionFenceAllocatio
if (gfxCoreHelper.isRelaxedOrderingSupported()) {
expectedAllocationsCnt += 2;
}
if (miMemFenceRequired) {
expectedAllocationsCnt++;
}
EXPECT_EQ(1, mockMemoryOperations->makeResidentCalledCount);
ASSERT_EQ(expectedAllocationsCnt, mockMemoryOperations->gfxAllocationsForMakeResident.size());
EXPECT_EQ(directSubmission.ringBuffers[0].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[0]);
EXPECT_EQ(directSubmission.ringBuffers[1].ringBuffer, mockMemoryOperations->gfxAllocationsForMakeResident[1]);
EXPECT_EQ(directSubmission.semaphores, mockMemoryOperations->gfxAllocationsForMakeResident[2]);
EXPECT_EQ(directSubmission.completionFenceAllocation, mockMemoryOperations->gfxAllocationsForMakeResident[3]);
if (miMemFenceRequired) {
EXPECT_EQ(directSubmission.globalFenceAllocation, mockMemoryOperations->gfxAllocationsForMakeResident[4]);
}
pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release();
}

View File

@@ -839,6 +839,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest,
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.partitionConfigSet);
auto miMemFenceRequired = directSubmission.globalFenceAllocation && directSubmission.miMemFenceRequired && !directSubmission.systemMemoryFenceAddressSet;
directSubmission.activeTiles = 2;
directSubmission.partitionedMode = true;
directSubmission.partitionConfigSet = false;
@@ -864,6 +865,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest,
submitSize += RelaxedOrderingHelper::getSizeRegistersInit<FamilyType>();
}
if (miMemFenceRequired) {
expectedAllocationsCount += 1;
}
EXPECT_EQ(submitSize, directSubmission.submitSize);
EXPECT_EQ(1u, directSubmission.handleResidencyCount);
EXPECT_EQ(expectedAllocationsCount, directSubmission.makeResourcesResidentVectorSize);

View File

@@ -136,7 +136,6 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedWithMiMemFenc
auto &compilerProductHelper = device->getCompilerProductHelper();
auto isHeaplessStateInit = compilerProductHelper.isHeaplessStateInitEnabled(compilerProductHelper.isHeaplessModeEnabled());
if (isFenceRequired && !isHeaplessStateInit) {
EXPECT_EQ(1u, wddm->makeResidentResult.handleCount);
EXPECT_TRUE(device->getDefaultEngine().commandStreamReceiver->getGlobalFenceAllocation()->isExplicitlyMadeResident());
}
*wddmDirectSubmission->ringFence.cpuAddress = 1ull;
@@ -151,7 +150,7 @@ using WddmDirectSubmissionNoPreemptionTest = WddmDirectSubmissionFixture<Preempt
HWTEST_F(WddmDirectSubmissionNoPreemptionTest, givenWddmWhenDirectIsInitializedAndNotStartedThenExpectNoCommandsDispatched) {
std::unique_ptr<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>> wddmDirectSubmission =
std::make_unique<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>>(*device->getDefaultEngine().commandStreamReceiver);
auto miMemFenceRequired = wddmDirectSubmission->globalFenceAllocation && wddmDirectSubmission->miMemFenceRequired && !wddmDirectSubmission->systemMemoryFenceAddressSet;
EXPECT_EQ(0u, wddmDirectSubmission->commandBufferHeader->NeedsMidBatchPreEmptionSupport);
bool ret = wddmDirectSubmission->initialize(false);
@@ -166,6 +165,9 @@ HWTEST_F(WddmDirectSubmissionNoPreemptionTest, givenWddmWhenDirectIsInitializedA
if (gfxCoreHelper.isRelaxedOrderingSupported()) {
expectedAllocationsCnt += 2;
}
if (miMemFenceRequired) {
expectedAllocationsCnt++;
}
EXPECT_EQ(expectedAllocationsCnt, wddm->makeResidentResult.handleCount);
EXPECT_EQ(1u, wddmMockInterface->createMonitoredFenceCalled);
@@ -199,7 +201,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSubmitingCmdBufferThenExpectPass
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenAllocateOsResourcesThenExpectRingMonitorFenceCreatedAndAllocationsResident) {
MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
auto miMemFenceRequired = wddmDirectSubmission.globalFenceAllocation && wddmDirectSubmission.miMemFenceRequired && !wddmDirectSubmission.systemMemoryFenceAddressSet;
bool ret = wddmDirectSubmission.allocateResources();
EXPECT_TRUE(ret);
auto &gfxCoreHelper = device->getGfxCoreHelper();
@@ -207,7 +209,9 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenAllocateOsResourcesThenExpectRin
if (gfxCoreHelper.isRelaxedOrderingSupported()) {
expectedAllocationsCnt += 2;
}
if (miMemFenceRequired) {
expectedAllocationsCnt++;
}
EXPECT_EQ(1u, wddmMockInterface->createMonitoredFenceCalled);
EXPECT_EQ(expectedAllocationsCnt, wddm->makeResidentResult.handleCount);
}
@@ -237,7 +241,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenAllocateOsResourcesFenceCreation
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenAllocateOsResourcesResidencyFailsThenExpectRingMonitorFenceCreatedAndAllocationsNotResident) {
MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
auto miMemFenceRequired = wddmDirectSubmission.globalFenceAllocation && wddmDirectSubmission.miMemFenceRequired && !wddmDirectSubmission.systemMemoryFenceAddressSet;
wddm->callBaseMakeResident = false;
wddm->makeResidentStatus = false;
@@ -248,7 +252,9 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenAllocateOsResourcesResidencyFail
if (gfxCoreHelper.isRelaxedOrderingSupported()) {
expectedAllocationsCnt += 2;
}
if (miMemFenceRequired) {
expectedAllocationsCnt++;
}
EXPECT_EQ(0u, wddmMockInterface->createMonitoredFenceCalled);
EXPECT_EQ(expectedAllocationsCnt, wddm->makeResidentResult.handleCount);
}

View File

@@ -28,6 +28,7 @@ struct MockWddmDirectSubmission : public WddmDirectSubmission<GfxFamily, Dispatc
using BaseClass::getSizeSwitchRingBufferSection;
using BaseClass::getSizeSystemMemoryFenceAddress;
using BaseClass::getTagAddressValue;
using BaseClass::globalFenceAllocation;
using BaseClass::gpuVaForAdditionalSynchronizationWA;
using BaseClass::handleCompletionFence;
using BaseClass::handleNewResourcesSubmission;
@@ -49,6 +50,7 @@ struct MockWddmDirectSubmission : public WddmDirectSubmission<GfxFamily, Dispatc
using BaseClass::semaphores;
using BaseClass::submit;
using BaseClass::switchRingBuffers;
using BaseClass::systemMemoryFenceAddressSet;
using BaseClass::updateMonitorFenceValueForResidencyList;
using BaseClass::updateTagValue;
using BaseClass::wddm;