From c0121eb824d4c29f97876c89496ea2beafac5f7b Mon Sep 17 00:00:00 2001 From: "Vilvaraj, T J Vivek" Date: Fri, 10 Jun 2022 10:52:34 +0000 Subject: [PATCH] SysMan: fix issues in execution environment restoration. the scpoe of the restorer is till the LinuxSysmanImp is deleted. Ideally the scope of restorer needs to be function level. Signed-off-by: Vilvaraj, T J Vivek --- .../diagnostics/linux/os_diagnostics_imp.cpp | 4 +++ .../linux/os_global_operations_imp.cpp | 3 +++ .../source/sysman/linux/os_sysman_imp.cpp | 1 - .../tools/source/sysman/linux/os_sysman_imp.h | 3 +-- .../linux/test_zes_sysman_diagnostics.cpp | 26 +++++++++++++++++++ .../linux/test_zes_global_operations.cpp | 1 + 6 files changed, 35 insertions(+), 3 deletions(-) diff --git a/level_zero/tools/source/sysman/diagnostics/linux/os_diagnostics_imp.cpp b/level_zero/tools/source/sysman/diagnostics/linux/os_diagnostics_imp.cpp index b6e2149d3e..91d994ccef 100644 --- a/level_zero/tools/source/sysman/diagnostics/linux/os_diagnostics_imp.cpp +++ b/level_zero/tools/source/sysman/diagnostics/linux/os_diagnostics_imp.cpp @@ -105,6 +105,10 @@ ze_result_t LinuxDiagnosticsImp::waitForQuiescentCompletion() { ze_result_t LinuxDiagnosticsImp::osRunDiagTestsinFW(zes_diag_result_t *pResult) { pLinuxSysmanImp->diagnosticsReset = true; + auto pDevice = pLinuxSysmanImp->getDeviceHandle(); + auto devicePtr = static_cast(pDevice); + NEO::ExecutionEnvironment *executionEnvironment = devicePtr->getNEODevice()->getExecutionEnvironment(); + auto restorer = std::make_unique(executionEnvironment); pLinuxSysmanImp->releaseDeviceResources(); ze_result_t result = gpuProcessCleanup(); if (ZE_RESULT_SUCCESS != result) { diff --git a/level_zero/tools/source/sysman/global_operations/linux/os_global_operations_imp.cpp b/level_zero/tools/source/sysman/global_operations/linux/os_global_operations_imp.cpp index 2f1a4e8ab0..691c94b24d 100644 --- a/level_zero/tools/source/sysman/global_operations/linux/os_global_operations_imp.cpp +++ b/level_zero/tools/source/sysman/global_operations/linux/os_global_operations_imp.cpp @@ -107,6 +107,9 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) { } ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; pDevice->getProperties(&deviceProperties); + auto devicePtr = static_cast(pDevice); + NEO::ExecutionEnvironment *executionEnvironment = devicePtr->getNEODevice()->getExecutionEnvironment(); + auto restorer = std::make_unique(executionEnvironment); pLinuxSysmanImp->releaseDeviceResources(); std::string resetPath; std::string resetName; diff --git a/level_zero/tools/source/sysman/linux/os_sysman_imp.cpp b/level_zero/tools/source/sysman/linux/os_sysman_imp.cpp index bcf84575dd..36a0c36c09 100644 --- a/level_zero/tools/source/sysman/linux/os_sysman_imp.cpp +++ b/level_zero/tools/source/sysman/linux/os_sysman_imp.cpp @@ -273,7 +273,6 @@ void LinuxSysmanImp::releaseDeviceResources() { devicePciBdf = devicePtr->getNEODevice()->getRootDeviceEnvironment().osInterface->getDriverModel()->as()->getPciPath(); rootDeviceIndex = devicePtr->getNEODevice()->getRootDeviceIndex(); - restorer = std::make_unique(executionEnvironment); releaseSysmanDeviceResources(); auto device = static_cast(getDeviceHandle()); executionEnvironment = device->getNEODevice()->getExecutionEnvironment(); diff --git a/level_zero/tools/source/sysman/linux/os_sysman_imp.h b/level_zero/tools/source/sysman/linux/os_sysman_imp.h index 99edfb6c7d..9456f26950 100644 --- a/level_zero/tools/source/sysman/linux/os_sysman_imp.h +++ b/level_zero/tools/source/sysman/linux/os_sysman_imp.h @@ -79,13 +79,12 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass { uint32_t rootDeviceIndex = 0u; NEO::ExecutionEnvironment *executionEnvironment = nullptr; bool diagnosticsReset = false; - std::unique_ptr restorer; + Device *pDevice = nullptr; protected: FsAccess *pFsAccess = nullptr; ProcfsAccess *pProcfsAccess = nullptr; SysfsAccess *pSysfsAccess = nullptr; - Device *pDevice = nullptr; NEO::Drm *pDrm = nullptr; PmuInterface *pPmuInterface = nullptr; FirmwareUtil *pFwUtilInterface = nullptr; diff --git a/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/test_zes_sysman_diagnostics.cpp b/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/test_zes_sysman_diagnostics.cpp index 5191267036..ffc92efc13 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/test_zes_sysman_diagnostics.cpp +++ b/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/test_zes_sysman_diagnostics.cpp @@ -226,6 +226,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenRunningDiagnosticsT pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0]); std::unique_ptr pOsDiagnosticsPrev = std::move(ptestDiagnosticsImp->pOsDiagnostics); ptestDiagnosticsImp->pOsDiagnostics = std::move(pPublicLinuxDiagnosticsImp); @@ -250,6 +252,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenRunningDiagnosticsT pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockDiagFwInterface->setDiagResult(ZES_DIAG_RESULT_FORCE_UINT32); pMockDiagFwInterface->mockFwRunDiagTestsResult = ZE_RESULT_ERROR_NOT_AVAILABLE; DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0]); @@ -276,6 +280,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenListProcessFailsThe pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockDiagProcfsAccess->setMockError(ZE_RESULT_ERROR_NOT_AVAILABLE); DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0]); std::unique_ptr pOsDiagnosticsPrev = std::move(ptestDiagnosticsImp->pOsDiagnostics); @@ -302,6 +308,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenQuiescentingFailsTh pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockSysfsAccess->setMockError(ZE_RESULT_ERROR_NOT_AVAILABLE); DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0]); std::unique_ptr pOsDiagnosticsPrev = std::move(ptestDiagnosticsImp->pOsDiagnostics); @@ -328,6 +336,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenInvalidateLmemFails pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockSysfsAccess->setMockError(ZE_RESULT_ERROR_NOT_AVAILABLE); DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0]); std::unique_ptr pOsDiagnosticsPrev = std::move(ptestDiagnosticsImp->pOsDiagnostics); @@ -354,6 +364,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenColdResetFailsThenC pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockDiagFwInterface->setDiagResult(ZES_DIAG_RESULT_REBOOT_FOR_REPAIR); pMockDiagLinuxSysmanImp->setMockError(ZE_RESULT_ERROR_NOT_AVAILABLE); DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0]); @@ -381,6 +393,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenWarmResetFailsThenC pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockDiagLinuxSysmanImp->setMockError(ZE_RESULT_ERROR_NOT_AVAILABLE); DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0]); std::unique_ptr pOsDiagnosticsPrev = std::move(ptestDiagnosticsImp->pOsDiagnostics); @@ -407,6 +421,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenWarmResetSucceedsAn pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockDiagLinuxSysmanImp->setMockInitDeviceError(ZE_RESULT_ERROR_NOT_AVAILABLE); DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0]); std::unique_ptr pOsDiagnosticsPrev = std::move(ptestDiagnosticsImp->pOsDiagnostics); @@ -433,6 +449,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenColdResetSucceedsAn pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockDiagFwInterface->setDiagResult(ZES_DIAG_RESULT_REBOOT_FOR_REPAIR); pMockDiagLinuxSysmanImp->setMockInitDeviceError(ZE_RESULT_ERROR_NOT_AVAILABLE); DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0]); @@ -460,6 +478,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenGPUProcessCleanupSu pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockDiagProcfsAccess->ourDevicePid = getpid(); pMockDiagLinuxSysmanImp->ourDevicePid = getpid(); pMockDiagLinuxSysmanImp->ourDeviceFd = ::open("/dev/null", 0); @@ -476,6 +496,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenGPUProcessCleanupFa pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockSysfsAccess->setMockError(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE); pMockDiagProcfsAccess->setMockError(ZE_RESULT_ERROR_NOT_AVAILABLE); EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, pPublicLinuxDiagnosticsImp->waitForQuiescentCompletion()); @@ -490,6 +512,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenQuiescentFailsConti pPublicLinuxDiagnosticsImp->pFwInterface = pMockDiagFwInterface.get(); pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pPublicLinuxDiagnosticsImp->pSleepFunctionSecs = mockSleepFunctionSecs; pMockSysfsAccess->setErrorAfterCount(12, ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE); @@ -506,6 +530,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenInvalidateLmemFails pPublicLinuxDiagnosticsImp->pProcfsAccess = pMockDiagProcfsAccess.get(); pPublicLinuxDiagnosticsImp->pLinuxSysmanImp = pMockDiagLinuxSysmanImp.get(); + pPublicLinuxDiagnosticsImp->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); + pMockSysfsAccess->setErrorAfterCount(1, ZE_RESULT_ERROR_NOT_AVAILABLE); EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, pPublicLinuxDiagnosticsImp->waitForQuiescentCompletion()); } diff --git a/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/test_zes_global_operations.cpp b/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/test_zes_global_operations.cpp index b53b2c3ba2..03ec23a8f6 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/test_zes_global_operations.cpp +++ b/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/test_zes_global_operations.cpp @@ -409,6 +409,7 @@ TEST_F(SysmanGlobalOperationsFixture, GivenDeviceIsNotWedgedWhenCallingGetDevice TEST_F(SysmanGlobalOperationsFixture, GivenForceTrueWhenCallingResetThenSuccessIsReturned) { pGlobalOperationsImp->init(); static_cast(pGlobalOperationsImp->pOsGlobalOperations)->pLinuxSysmanImp = pMockGlobalOpsLinuxSysmanImp.get(); + static_cast(pGlobalOperationsImp->pOsGlobalOperations)->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); ze_result_t result = zesDeviceReset(device, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); }