Detect GPU hang in evictUnusedAllocations()

This change introduces checking of the return value
of wait function in case of blocking version of
evictUnusedAllocations(). Furthermore, it propagates
the error to the callers. It contains also ULTs.

Related-To: NEO-6681
Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:
Patryk Wrobel
2022-04-13 10:42:27 +00:00
committed by Compute-Runtime-Automation
parent 641851cc48
commit 352583b9d9
11 changed files with 86 additions and 16 deletions

View File

@ -39,6 +39,19 @@ TEST_F(DrmBufferObjectTest, GivenInvalidParamsWhenCallingExecThenEfaultIsReturne
EXPECT_EQ(EFAULT, bo->exec(0, 0, 0, false, osContext.get(), 0, 1, nullptr, 0u, &execObjectsStorage, 0, 0));
}
TEST_F(DrmBufferObjectTest, GivenDetectedGpuHangDuringEvictUnusedAllocationsWhenCallingExecGpuHangErrorCodeIsRetrurned) {
mock->ioctl_expected.total = 2;
mock->ioctl_res = -1;
mock->errnoValue = EFAULT;
bo->callBaseEvictUnusedAllocations = false;
drm_i915_gem_exec_object2 execObjectsStorage = {};
const auto result = bo->exec(0, 0, 0, false, osContext.get(), 0, 1, nullptr, 0u, &execObjectsStorage, 0, 0);
EXPECT_EQ(BufferObject::GPU_HANG_DETECTED, result);
}
TEST_F(DrmBufferObjectTest, WhenSettingTilingThenCallSucceeds) {
mock->ioctl_expected.total = 1; //set_tiling
auto ret = bo->setTiling(I915_TILING_X, 0);

View File

@ -39,11 +39,13 @@ struct MockDrmMemoryOperationsHandlerBind : public DrmMemoryOperationsHandlerBin
bool useBaseEvictUnused = true;
uint32_t evictUnusedCalled = 0;
void evictUnusedAllocations(bool waitForCompletion, bool isLockNeeded) override {
MemoryOperationsStatus evictUnusedAllocations(bool waitForCompletion, bool isLockNeeded) override {
evictUnusedCalled++;
if (useBaseEvictUnused) {
DrmMemoryOperationsHandlerBind::evictUnusedAllocations(waitForCompletion, isLockNeeded);
return DrmMemoryOperationsHandlerBind::evictUnusedAllocations(waitForCompletion, isLockNeeded);
}
return MemoryOperationsStatus::SUCCESS;
}
};
@ -169,7 +171,6 @@ TEST_F(DrmMemoryOperationsHandlerBindTest, givenObjectAlwaysResidentAndNotUsedWh
}
EXPECT_EQ(mock->context.vmBindCalled, 2u);
operationHandler->evictUnusedAllocations(false, true);
EXPECT_EQ(mock->context.vmBindCalled, 2u);
@ -239,7 +240,8 @@ HWTEST_F(DrmMemoryOperationsHandlerBindTest, whenEvictUnusedResourcesWithWaitFor
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.latestWaitForCompletionWithTimeoutTaskCount.store(123u);
operationHandler->evictUnusedAllocations(true, true);
const auto status = operationHandler->evictUnusedAllocations(true, true);
EXPECT_EQ(MemoryOperationsStatus::SUCCESS, status);
auto latestWaitTaskCount = csr.latestWaitForCompletionWithTimeoutTaskCount.load();
EXPECT_NE(latestWaitTaskCount, 123u);
@ -247,6 +249,19 @@ HWTEST_F(DrmMemoryOperationsHandlerBindTest, whenEvictUnusedResourcesWithWaitFor
memoryManager->freeGraphicsMemory(allocation);
}
HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenGpuHangWhenEvictUnusedResourcesWithWaitForCompletionThenGpuHangIsReturned) {
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize});
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.callBaseWaitForCompletionWithTimeout = false;
csr.returnWaitForCompletionWithTimeout = WaitStatus::GpuHang;
const auto status = operationHandler->evictUnusedAllocations(true, true);
EXPECT_EQ(MemoryOperationsStatus::GPU_HANG_DETECTED_DURING_OPERATION, status);
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(DrmMemoryOperationsHandlerBindTest, whenRunningOutOfMemoryThenUnusedAllocationsAreUnbound) {
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize});