fix: check reset status after completion

Added a logic to check the reset status after the completion to make
sure we go through the logic at least once

Related-To: GSD-8902
Signed-off-by: Young Jin Yoon <young.jin.yoon@intel.com>
This commit is contained in:
Young Jin Yoon
2024-05-07 16:58:28 +00:00
committed by Compute-Runtime-Automation
parent aa0441bc63
commit 2c488d9e84
10 changed files with 150 additions and 10 deletions

View File

@@ -275,7 +275,7 @@ int DrmMock::ioctl(DrmIoctl request, void *arg) {
return storedRetValForGemClose;
}
if (request == DrmIoctl::getResetStats && arg != nullptr) {
ioctlCount.gemResetStats++;
ioctlCount.getResetStats++;
auto outResetStats = static_cast<ResetStats *>(arg);
for (const auto &resetStats : resetStatsToReturn) {
if (resetStats.contextId == outResetStats->contextId) {

View File

@@ -33,7 +33,7 @@ void Ioctls::reset() {
gemSetDomain = 0;
gemWait = 0;
gemClose = 0;
gemResetStats = 0;
getResetStats = 0;
regRead = 0;
getParam = 0;
contextGetParam = 0;
@@ -62,6 +62,7 @@ void DrmMockCustom::testIoctls() {
NEO_IOCTL_EXPECT_EQ(gemSetDomain);
NEO_IOCTL_EXPECT_EQ(gemWait);
NEO_IOCTL_EXPECT_EQ(gemClose);
NEO_IOCTL_EXPECT_EQ(getResetStats);
NEO_IOCTL_EXPECT_EQ(regRead);
NEO_IOCTL_EXPECT_EQ(getParam);
NEO_IOCTL_EXPECT_EQ(contextGetParam);
@@ -206,6 +207,10 @@ int DrmMockCustom::ioctl(DrmIoctl request, void *arg) {
vmCreate->vmId = vmIdToCreate;
break;
}
case DrmIoctl::getResetStats: {
ioctlCnt.getResetStats++;
break;
}
default:
int res = ioctlExtra(request, arg);
if (returnIoctlExtraErrorValue) {
@@ -245,6 +250,11 @@ int DrmMockCustom::waitUserFence(uint32_t ctxId, uint64_t address, uint64_t valu
if (waitUserFenceCall.called == waitUserFenceCall.failSpecificCall) {
return 123;
}
if (waitUserFenceCall.failOnWaitUserFence == true) {
return -1;
}
return Drm::waitUserFence(ctxId, address, value, dataWidth, timeout, flags, userInterrupt, externalInterruptId, allocForInterruptWait);
}

View File

@@ -47,7 +47,7 @@ class Ioctls {
std::atomic<int32_t> gemSetDomain;
std::atomic<int32_t> gemWait;
std::atomic<int32_t> gemClose;
std::atomic<int32_t> gemResetStats;
std::atomic<int32_t> getResetStats;
std::atomic<int32_t> regRead;
std::atomic<int32_t> getParam;
std::atomic<int32_t> contextGetParam;
@@ -99,7 +99,9 @@ class DrmMockCustom : public Drm {
public:
using Drm::bindAvailable;
using Drm::cacheInfo;
using Drm::checkToDisableScratchPage;
using Drm::completionFenceSupported;
using Drm::disableScratch;
using Drm::ioctlHelper;
using Drm::memoryInfo;
using Drm::pageFaultSupported;
@@ -123,6 +125,8 @@ class DrmMockCustom : public Drm {
uint32_t called = 0u;
uint32_t failSpecificCall = 0;
bool failOnWaitUserFence = false;
int errnoForFailedWaitUserFence = 0;
};
struct IsVmBindAvailableCall {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -28,5 +28,9 @@ class DrmMockCustomPrelim : public DrmMockCustom {
return context.execBufferExtensions(arg);
}
bool checkResetStatus(OsContext &osContext) override {
return false;
}
DrmMockCustomPrelimContext context{};
};

View File

@@ -1142,6 +1142,64 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
EXPECT_EQ(Drm::ValueWidth::u64, mock->waitUserFenceCall.dataWidth);
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
givenWaitUserFenceFlagSetAndVmBindAvailableAndUseDrmCtxWhenDrmCsrWaitsForFlushStampiAndDifferentScratchPageOptionsThenCallResetStatusOnlyScratchPageDisabled) {
DebugManagerStateRestore restorer;
debugManager.flags.EnableUserFenceForCompletionWait.set(1);
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
for (int err : {EIO, ETIME}) {
for (bool disableScratchPage : {false, true}) {
for (int gpuFaultCheckThreshold : {0, 10}) {
debugManager.flags.DisableScratchPages.set(disableScratchPage);
debugManager.flags.GpuFaultCheckThreshold.set(gpuFaultCheckThreshold);
mock->disableScratch = disableScratchPage;
TestedDrmCommandStreamReceiver<FamilyType> *testedCsr =
new TestedDrmCommandStreamReceiver<FamilyType>(GemCloseWorkerMode::gemCloseWorkerInactive,
*this->executionEnvironment,
1);
EXPECT_TRUE(testedCsr->useUserFenceWait);
EXPECT_TRUE(testedCsr->isUsedNotifyEnableForPostSync());
device->resetCommandStreamReceiver(testedCsr);
mock->ioctlCnt.reset();
mock->waitUserFenceCall.called = 0u;
mock->checkResetStatusCalled = 0u;
mock->waitUserFenceCall.failOnWaitUserFence = true;
mock->errnoValue = err;
testedCsr->waitUserFenceResult.callParent = true;
auto osContextLinux = static_cast<const OsContextLinux *>(device->getDefaultEngine().osContext);
std::vector<uint32_t> &drmCtxIds = const_cast<std::vector<uint32_t> &>(osContextLinux->getDrmContextIds());
size_t drmCtxSize = drmCtxIds.size();
for (uint32_t i = 0; i < drmCtxSize; i++) {
drmCtxIds[i] = 5u + i;
}
TaskCountType waitValue = 2;
TaskCountType currentValue = 1;
uint64_t addr = castToUint64(&currentValue);
testedCsr->waitUserFence(waitValue, addr, -1, false, NEO::InterruptId::notUsed, nullptr);
EXPECT_EQ(0, mock->ioctlCnt.gemWait);
EXPECT_EQ(1u, testedCsr->waitUserFenceResult.called);
EXPECT_EQ(2u, testedCsr->waitUserFenceResult.waitValue);
EXPECT_EQ(1u, mock->waitUserFenceCall.called);
if (err == EIO && disableScratchPage && gpuFaultCheckThreshold != 0) {
EXPECT_EQ(1u, mock->checkResetStatusCalled);
} else {
EXPECT_EQ(0u, mock->checkResetStatusCalled);
}
}
}
}
}
HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
givenWaitUserFenceFlagSetAndVmBindNotAvailableWhenDrmCsrWaitsForFlushStampThenExpectUseDrmGemWaitCall) {
DebugManagerStateRestore restorer;

View File

@@ -327,7 +327,8 @@ TEST_F(DrmMemoryManagerTest, GivenAllocatePhysicalDeviceMemoryThenSuccessReturne
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(DrmMemoryManagerTest, whenCallingChekcUnexpectedGpuPagedfaultThenAllEnginesWereChecked) {
TEST_F(DrmMemoryManagerTest, whenCallingCheckUnexpectedGpuPagedfaultThenAllEnginesWereChecked) {
mock->ioctlExpected.total = -1; // don't care
memoryManager->checkUnexpectedGpuPageFault();
size_t allEnginesSize = 0u;
for (auto &engineContainer : memoryManager->allRegisteredEngines) {
@@ -7357,6 +7358,53 @@ TEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionOf
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(DrmMemoryManagerTest, givenDrmAllocationWithDifferentScratchPageOptionsWhenHandleFenceCompletionThenCallResetStatsOnlyWithScratchPageDisabledAndProperFaultCheckThreshold) {
mock->ioctlExpected.total = -1;
DebugManagerStateRestore dbgStateRestore;
VariableBackup<bool> backupFenceSupported{&mock->completionFenceSupported, true};
VariableBackup<bool> backupVmBindCallParent{&mock->isVmBindAvailableCall.callParent, false};
VariableBackup<bool> backupVmBindReturnValue{&mock->isVmBindAvailableCall.returnValue, true};
for (int err : {EIO, ETIME}) {
for (bool disableScratchPage : {false, true}) {
for (int gpuFaultCheckThreshold : {0, 10}) {
debugManager.flags.DisableScratchPages.set(disableScratchPage);
debugManager.flags.GpuFaultCheckThreshold.set(gpuFaultCheckThreshold);
mock->disableScratch = disableScratchPage;
mock->ioctlCnt.reset();
mock->waitUserFenceCall.called = 0u;
mock->checkResetStatusCalled = 0u;
mock->waitUserFenceCall.failOnWaitUserFence = true;
mock->errnoValue = err;
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 1024, AllocationType::commandBuffer});
auto engine = memoryManager->getRegisteredEngines(rootDeviceIndex)[0];
allocation->updateTaskCount(2, engine.osContext->getContextId());
uint64_t expectedFenceAddress = castToUint64(const_cast<TagAddressType *>(engine.commandStreamReceiver->getTagAddress())) + TagAllocationLayout::completionFenceOffset;
constexpr uint64_t expectedValue = 2;
memoryManager->handleFenceCompletion(allocation);
EXPECT_EQ(1u, mock->waitUserFenceCall.called);
EXPECT_EQ(expectedFenceAddress, mock->waitUserFenceCall.address);
EXPECT_EQ(expectedValue, mock->waitUserFenceCall.value);
if (err == EIO && disableScratchPage && gpuFaultCheckThreshold != 0) {
EXPECT_EQ(1u, mock->checkResetStatusCalled);
} else {
EXPECT_EQ(0u, mock->checkResetStatusCalled);
}
memoryManager->freeGraphicsMemory(allocation);
}
}
}
}
TEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionOfNotUsedAndEligbleAllocationThenDoNotCallWaitUserFence) {
mock->ioctlExpected.total = -1;