fix: add debug key to verify device state before submit

- new debug key EnableDeviceStateVerification to check device state not
ony in debug mode

Related-To: NEO-7669
Signed-off-by: Cencelewska, Katarzyna <katarzyna.cencelewska@intel.com>
This commit is contained in:
Cencelewska, Katarzyna 2023-05-29 14:06:01 +00:00 committed by Compute-Runtime-Automation
parent 27c2352f41
commit 115d6de350
10 changed files with 94 additions and 15 deletions

View File

@ -240,6 +240,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderImmediateCmdListExecution, -1, "-1:
DECLARE_DEBUG_VARIABLE(int64_t, OverrideEventSynchronizeTimeout, -1, "-1: default - user provided timeout value, >0: timeout in nanoseconds")
DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush disabled, 1: Tlb Flush enabled")
DECLARE_DEBUG_VARIABLE(int32_t, DebugSetMemoryDiagnosticsDelay, -1, "-1: default, >=0: delay time in minutes necessary for completion of Memory diagnostics")
DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerification, -1, "-1: default, 0: disable, 1: enable check of device state before submit on Windows")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")

View File

@ -71,6 +71,11 @@ Wddm::Wddm(std::unique_ptr<HwDeviceIdWddm> &&hwDeviceIdIn, RootDeviceEnvironment
kmDafListener = std::unique_ptr<KmDafListener>(new KmDafListener);
temporaryResources = std::make_unique<WddmResidentAllocationsContainer>(this);
osMemory = OSMemory::create();
bool forceCheck = false;
#if _DEBUG
forceCheck = true;
#endif
checkDeviceState = (DebugManager.flags.EnableDeviceStateVerification.get() != -1) ? DebugManager.flags.EnableDeviceStateVerification.get() : forceCheck;
}
Wddm::~Wddm() {
@ -1009,30 +1014,31 @@ bool Wddm::submit(uint64_t commandBuffer, size_t size, void *commandHeader, Wddm
printf("%u: Wddm Submission with context handle %u and HwQueue handle %u\n", SysCalls::getProcessId(), submitArguments.contextHandle, submitArguments.hwQueueHandle);
}
getDeviceState();
status = wddmInterface->submit(commandBuffer, size, commandHeader, submitArguments);
if (status) {
submitArguments.monitorFence->lastSubmittedFence = submitArguments.monitorFence->currentFenceValue;
submitArguments.monitorFence->currentFenceValue++;
}
getDeviceState();
return status;
}
void Wddm::getDeviceState() {
#ifdef _DEBUG
D3DKMT_GETDEVICESTATE GetDevState = {};
NTSTATUS status = STATUS_SUCCESS;
if (checkDeviceState) {
D3DKMT_GETDEVICESTATE getDevState = {};
NTSTATUS status = STATUS_SUCCESS;
GetDevState.hDevice = device;
GetDevState.StateType = D3DKMT_DEVICESTATE_EXECUTION;
getDevState.hDevice = device;
getDevState.StateType = D3DKMT_DEVICESTATE_EXECUTION;
status = getGdi()->getDeviceState(&GetDevState);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
if (status == STATUS_SUCCESS) {
DEBUG_BREAK_IF(GetDevState.ExecutionState != D3DKMT_DEVICEEXECUTION_ACTIVE);
status = getGdi()->getDeviceState(&getDevState);
DEBUG_BREAK_IF(status != STATUS_SUCCESS);
PRINT_DEBUG_STRING(getDevState.ExecutionState == D3DKMT_DEVICEEXECUTION_ERROR_OUTOFMEMORY, stderr, "Device execution error, out of memory %d\n", getDevState.ExecutionState);
if (status == STATUS_SUCCESS) {
DEBUG_BREAK_IF(getDevState.ExecutionState != D3DKMT_DEVICEEXECUTION_ACTIVE);
}
}
#endif
}
unsigned int Wddm::getEnablePreemptionRegValue() {

View File

@ -276,5 +276,6 @@ class Wddm : public DriverModel {
bool platformSupportsEvictIfNecessary = false;
bool instrumentationEnabled = false;
bool checkDeviceState = false;
};
} // namespace NEO

View File

@ -18,6 +18,7 @@ uint32_t gMapGpuVaFailConfigMax = 0;
uint64_t gGpuAddressSpace = 0ull;
uint32_t gLastPriority = 0ull;
ADAPTER_BDF gAdapterBDF{};
D3DKMT_DEVICEEXECUTION_STATE gExecutionState = D3DKMT_DEVICEEXECUTION_ACTIVE;
NTSTATUS __stdcall mockD3DKMTEscape(IN CONST D3DKMT_ESCAPE *pData) {
static int perfTicks = 0;
@ -590,7 +591,8 @@ NTSTATUS __stdcall mockD3DKMTEvict(IN OUT D3DKMT_EVICT *) {
return STATUS_SUCCESS;
}
NTSTATUS __stdcall mockD3DKMTGetDeviceState(IN OUT D3DKMT_GETDEVICESTATE *) {
NTSTATUS __stdcall mockD3DKMTGetDeviceState(IN OUT D3DKMT_GETDEVICESTATE *getDevState) {
getDevState->ExecutionState = gExecutionState;
return STATUS_SUCCESS;
}
@ -696,3 +698,7 @@ bool *getFailOnSetContextSchedulingPriorityCall() {
D3DKMT_SETCONTEXTSCHEDULINGPRIORITY *getSetContextSchedulingPriorityDataCall() {
return &setContextSchedulingPriorityData;
}
void setMockDeviceExecutionState(D3DKMT_DEVICEEXECUTION_STATE newState) {
gExecutionState = newState;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -103,5 +103,6 @@ D3DKMT_SETCONTEXTSCHEDULINGPRIORITY *getSetContextSchedulingPriorityDataCall();
bool *getRegisterTrimNotificationFailCall();
uint32_t getLastPriority();
void setAdapterBDF(ADAPTER_BDF &adapterBDF);
void setMockDeviceExecutionState(D3DKMT_DEVICEEXECUTION_STATE newState);
void initGfxPartition();

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -126,6 +126,9 @@ void *MockOsLibrary::getProcAddress(const std::string &procName) {
if (procName == "setMockCreateDeviceParams") {
return reinterpret_cast<void *>(setMockCreateDeviceParams);
}
if (procName == "setMockDeviceExecutionState") {
return reinterpret_cast<void *>(setMockDeviceExecutionState);
}
if (procName == "getMockAllocation") {
return reinterpret_cast<void *>(getMockAllocation);
}

View File

@ -30,6 +30,7 @@ class WddmMock : public Wddm {
using Wddm::adapterBDF;
using Wddm::additionalAdapterInfoOptions;
using Wddm::adjustEvictNeededParameter;
using Wddm::checkDeviceState;
using Wddm::createPagingFenceLogger;
using Wddm::currentPagingFenceValue;
using Wddm::dedicatedVideoMemory;
@ -38,6 +39,7 @@ class WddmMock : public Wddm {
using Wddm::enablePreemptionRegValue;
using Wddm::featureTable;
using Wddm::forceEvictOnlyIfNecessary;
using Wddm::getDeviceState;
using Wddm::getSystemInfo;
using Wddm::gfxPlatform;
using Wddm::gmmMemory;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -57,6 +57,7 @@ struct GdiDllFixture {
reinterpret_cast<decltype(&getLastPriority)>(mockGdiDll->getProcAddress("getLastPriority"));
setAdapterBDFFcn =
reinterpret_cast<decltype(&setAdapterBDF)>(mockGdiDll->getProcAddress("setAdapterBDF"));
setMockDeviceExecutionStateFcn = reinterpret_cast<decltype(&setMockDeviceExecutionState)>(mockGdiDll->getProcAddress("setMockDeviceExecutionState"));
setMockLastDestroyedResHandleFcn((D3DKMT_HANDLE)0);
*getDestroySynchronizationObjectDataFcn() = {};
*getCreateSynchronizationObject2FailCallFcn() = false;
@ -102,4 +103,5 @@ struct GdiDllFixture {
decltype(&getRegisterTrimNotificationFailCall) getRegisterTrimNotificationFailCallFcn = nullptr;
decltype(&getLastPriority) getLastPriorityFcn = nullptr;
decltype(&setAdapterBDF) setAdapterBDFFcn = nullptr;
decltype(&setMockDeviceExecutionState) setMockDeviceExecutionStateFcn = nullptr;
};

View File

@ -526,4 +526,5 @@ DebugSetMemoryDiagnosticsDelay = -1
EnableCpuCacheForResources = 1
OverrideHwIpVersion = -1
PrintGlobalTimestampInNs = 0
EnableDeviceStateVerification = -1
# Please don't edit below this line

View File

@ -409,6 +409,62 @@ TEST_F(WddmTests, GivenWddmWhenMapGpuVaCalledThenGmmClientCallsMapGpuVa) {
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmTests, givenCheckDeviceStateSetToTrueWhenCallGetDeviceStateAndForceExecutionStateThenProperMessageIsVisible) {
DebugManagerStateRestore restorer{};
DebugManager.flags.EnableDebugBreak.set(false);
wddm->checkDeviceState = true;
auto executionState = D3DKMT_DEVICEEXECUTION_ERROR_OUTOFMEMORY;
setMockDeviceExecutionStateFcn(executionState);
::testing::internal::CaptureStderr();
wddm->getDeviceState();
std::string output = testing::internal::GetCapturedStderr();
EXPECT_EQ(std::string("Device execution error, out of memory " + std::to_string(executionState) + "\n"), output);
setMockDeviceExecutionStateFcn(D3DKMT_DEVICEEXECUTION_ACTIVE);
::testing::internal::CaptureStderr();
wddm->getDeviceState();
output = testing::internal::GetCapturedStderr();
EXPECT_EQ(std::string(""), output);
}
TEST_F(WddmTests, givenCheckDeviceStateSetToFalseWhenCallGetDeviceStateAndForceExecutionStateThenNoMessageIsVisible) {
DebugManagerStateRestore restorer{};
DebugManager.flags.EnableDebugBreak.set(false);
wddm->checkDeviceState = false;
auto executionState = D3DKMT_DEVICEEXECUTION_ERROR_OUTOFMEMORY;
setMockDeviceExecutionStateFcn(executionState);
::testing::internal::CaptureStderr();
wddm->getDeviceState();
std::string output = testing::internal::GetCapturedStderr();
EXPECT_EQ(std::string(""), output);
setMockDeviceExecutionStateFcn(D3DKMT_DEVICEEXECUTION_ACTIVE);
::testing::internal::CaptureStderr();
wddm->getDeviceState();
output = testing::internal::GetCapturedStderr();
EXPECT_EQ(std::string(""), output);
}
TEST(WddmConstructorTest, givenEnableDeviceStateVerificationSetTrueWhenCreateWddmThenCheckDeviceStateIsTrue) {
DebugManagerStateRestore restorer{};
DebugManager.flags.EnableDeviceStateVerification.set(1);
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get();
auto mockWddm = std::make_unique<WddmMock>(*rootDeviceEnvironment);
EXPECT_TRUE(mockWddm->checkDeviceState);
}
TEST(WddmConstructorTest, givenEnableDeviceStateVerificationSetFalseWhenCreateWddmThenCheckDeviceStateIsFalse) {
DebugManagerStateRestore restorer{};
DebugManager.flags.EnableDeviceStateVerification.set(0);
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get();
auto mockWddm = std::make_unique<WddmMock>(*rootDeviceEnvironment);
EXPECT_FALSE(mockWddm->checkDeviceState);
}
uint64_t waitForSynchronizationObjectFromCpuCounter = 0u;
NTSTATUS __stdcall waitForSynchronizationObjectFromCpuNoOpMock(const D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU *waitStruct) {