From 1002cb9f349c05cd62ff9d5a0b8251c7e931b149 Mon Sep 17 00:00:00 2001 From: Bellekallu Rajkiran Date: Fri, 19 Jan 2024 11:59:41 +0000 Subject: [PATCH] fix: Fix abort on device get state Related-To: NEO-9980 Signed-off-by: Bellekallu Rajkiran --- .../linux/sysman_os_global_operations_imp.cpp | 21 ++--------- .../linux/sysman_os_global_operations_imp.h | 3 +- .../sysman_os_global_operations.h | 3 +- .../sysman_os_global_operations_imp.cpp | 4 +- .../windows/sysman_os_global_operations_imp.h | 3 +- .../shared/linux/sysman_kmd_interface.cpp | 18 ++++++++- .../shared/linux/sysman_kmd_interface.h | 9 ++++- .../sysman_kmd_interface_i915_prelim.cpp | 4 ++ .../sysman_kmd_interface_i915_upstream.cpp | 6 ++- .../global_operations/linux/CMakeLists.txt | 3 +- .../linux/test_zes_global_operations_xe.cpp | 37 +++++++++++++++++++ ...sysman_kmd_interface_tests_i915_prelim.cpp | 31 +++++++++++++++- 12 files changed, 109 insertions(+), 33 deletions(-) create mode 100644 level_zero/sysman/test/unit_tests/sources/global_operations/linux/test_zes_global_operations_xe.cpp diff --git a/level_zero/sysman/source/api/global_operations/linux/sysman_os_global_operations_imp.cpp b/level_zero/sysman/source/api/global_operations/linux/sysman_os_global_operations_imp.cpp index ab8b11fe5f..9aef9d27d4 100644 --- a/level_zero/sysman/source/api/global_operations/linux/sysman_os_global_operations_imp.cpp +++ b/level_zero/sysman/source/api/global_operations/linux/sysman_os_global_operations_imp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -707,22 +707,6 @@ ze_result_t LinuxGlobalOperationsImp::scanProcessesState(std::vectorgetSysmanHwDeviceIdInstance(); - auto pDrm = pLinuxSysmanImp->getDrm(); - // Device is said to be in wedged if context creation returns EIO. - auto ret = pDrm->getIoctlHelper()->ioctl(NEO::DrmIoctl::gemContextCreateExt, &gcc); - if (ret == 0) { - pDrm->destroyDrmContext(gcc.contextId); - return; - } - - if (pDrm->getErrno() == EIO) { - pState->reset |= ZES_RESET_REASON_FLAG_WEDGED; - } -} - void LinuxGlobalOperationsImp::getRepairStatus(zes_device_state_t *pState) { SysmanProductHelper *pSysmanProductHelper = pLinuxSysmanImp->getSysmanProductHelper(); if (pSysmanProductHelper->isRepairStatusSupported()) { @@ -744,7 +728,8 @@ void LinuxGlobalOperationsImp::getRepairStatus(zes_device_state_t *pState) { ze_result_t LinuxGlobalOperationsImp::deviceGetState(zes_device_state_t *pState) { memset(pState, 0, sizeof(zes_device_state_t)); pState->repaired = ZES_REPAIR_STATUS_UNSUPPORTED; - getWedgedStatus(pState); + auto pSysmanKmdInterface = pLinuxSysmanImp->getSysmanKmdInterface(); + pSysmanKmdInterface->getWedgedStatus(pLinuxSysmanImp, pState); getRepairStatus(pState); return ZE_RESULT_SUCCESS; } diff --git a/level_zero/sysman/source/api/global_operations/linux/sysman_os_global_operations_imp.h b/level_zero/sysman/source/api/global_operations/linux/sysman_os_global_operations_imp.h index 752e9dd9ac..1167d516b5 100644 --- a/level_zero/sysman/source/api/global_operations/linux/sysman_os_global_operations_imp.h +++ b/level_zero/sysman/source/api/global_operations/linux/sysman_os_global_operations_imp.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -30,7 +30,6 @@ class LinuxGlobalOperationsImp : public OsGlobalOperations, NEO::NonCopyableOrMo void getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) override; void getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) override; void getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) override; - void getWedgedStatus(zes_device_state_t *pState) override; void getRepairStatus(zes_device_state_t *pState) override; ze_result_t reset(ze_bool_t force) override; ze_result_t scanProcessesState(std::vector &pProcessList) override; diff --git a/level_zero/sysman/source/api/global_operations/sysman_os_global_operations.h b/level_zero/sysman/source/api/global_operations/sysman_os_global_operations.h index 749602ad52..18f76107d3 100644 --- a/level_zero/sysman/source/api/global_operations/sysman_os_global_operations.h +++ b/level_zero/sysman/source/api/global_operations/sysman_os_global_operations.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -28,7 +28,6 @@ class OsGlobalOperations { virtual void getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) = 0; virtual void getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) = 0; virtual void getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) = 0; - virtual void getWedgedStatus(zes_device_state_t *pState) = 0; virtual void getRepairStatus(zes_device_state_t *pState) = 0; virtual bool getUuid(std::array &uuid) = 0; virtual bool generateUuidFromPciBusInfo(const NEO::PhysicalDevicePciBusInfo &pciBusInfo, std::array &uuid) = 0; diff --git a/level_zero/sysman/source/api/global_operations/windows/sysman_os_global_operations_imp.cpp b/level_zero/sysman/source/api/global_operations/windows/sysman_os_global_operations_imp.cpp index edbfe420d2..d5d78ec6d5 100644 --- a/level_zero/sysman/source/api/global_operations/windows/sysman_os_global_operations_imp.cpp +++ b/level_zero/sysman/source/api/global_operations/windows/sysman_os_global_operations_imp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -33,8 +33,6 @@ void WddmGlobalOperationsImp::getVendorName(char (&vendorName)[ZES_STRING_PROPER void WddmGlobalOperationsImp::getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) { } -void WddmGlobalOperationsImp::getWedgedStatus(zes_device_state_t *pState) { -} void WddmGlobalOperationsImp::getRepairStatus(zes_device_state_t *pState) { } bool WddmGlobalOperationsImp::getUuid(std::array &uuid) { diff --git a/level_zero/sysman/source/api/global_operations/windows/sysman_os_global_operations_imp.h b/level_zero/sysman/source/api/global_operations/windows/sysman_os_global_operations_imp.h index e23302f3d2..7a1159a44d 100644 --- a/level_zero/sysman/source/api/global_operations/windows/sysman_os_global_operations_imp.h +++ b/level_zero/sysman/source/api/global_operations/windows/sysman_os_global_operations_imp.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ class WddmGlobalOperationsImp : public OsGlobalOperations, NEO::NonCopyableOrMov void getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) override; void getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) override; void getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) override; - void getWedgedStatus(zes_device_state_t *pState) override; void getRepairStatus(zes_device_state_t *pState) override; ze_result_t reset(ze_bool_t force) override; ze_result_t scanProcessesState(std::vector &pProcessList) override; diff --git a/level_zero/sysman/source/shared/linux/sysman_kmd_interface.cpp b/level_zero/sysman/source/shared/linux/sysman_kmd_interface.cpp index d24573a6fe..6739b4ce6f 100644 --- a/level_zero/sysman/source/shared/linux/sysman_kmd_interface.cpp +++ b/level_zero/sysman/source/shared/linux/sysman_kmd_interface.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -165,6 +165,22 @@ uint32_t SysmanKmdInterface::getEventTypeImpl(std::string &dirName, const bool i return eventTypeVal; } +void SysmanKmdInterface::getWedgedStatusImpl(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) { + NEO::GemContextCreateExt gcc{}; + auto hwDeviceId = pLinuxSysmanImp->getSysmanHwDeviceIdInstance(); + auto pDrm = pLinuxSysmanImp->getDrm(); + // Device is said to be in wedged if context creation returns EIO. + auto ret = pDrm->getIoctlHelper()->ioctl(NEO::DrmIoctl::gemContextCreateExt, &gcc); + if (ret == 0) { + pDrm->destroyDrmContext(gcc.contextId); + return; + } + + if (pDrm->getErrno() == EIO) { + pState->reset |= ZES_RESET_REASON_FLAG_WEDGED; + } +} + std::string SysmanKmdInterfaceI915::getBasePathI915(uint32_t subDeviceId) { return "gt/gt" + std::to_string(subDeviceId) + "/"; } diff --git a/level_zero/sysman/source/shared/linux/sysman_kmd_interface.h b/level_zero/sysman/source/shared/linux/sysman_kmd_interface.h index d69ceaea83..64afd8198b 100644 --- a/level_zero/sysman/source/shared/linux/sysman_kmd_interface.h +++ b/level_zero/sysman/source/shared/linux/sysman_kmd_interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -137,6 +137,7 @@ class SysmanKmdInterface { virtual bool isBoostFrequencyAvailable() const = 0; virtual bool isTdpFrequencyAvailable() const = 0; virtual bool isPhysicalMemorySizeSupported() const = 0; + virtual void getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) = 0; protected: std::unique_ptr pFsAccess; @@ -144,6 +145,7 @@ class SysmanKmdInterface { std::unique_ptr pSysfsAccess; virtual const std::map &getSysfsNameToNativeUnitMap() = 0; uint32_t getEventTypeImpl(std::string &dirName, const bool isIntegratedDevice); + void getWedgedStatusImpl(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState); }; class SysmanKmdInterfaceI915 { @@ -184,6 +186,7 @@ class SysmanKmdInterfaceI915Upstream : public SysmanKmdInterface, SysmanKmdInter bool isBoostFrequencyAvailable() const override { return true; } bool isTdpFrequencyAvailable() const override { return true; } bool isPhysicalMemorySizeSupported() const override { return false; } + void getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) override; protected: std::map sysfsNameToFileMap; @@ -226,6 +229,7 @@ class SysmanKmdInterfaceI915Prelim : public SysmanKmdInterface, SysmanKmdInterfa bool isBoostFrequencyAvailable() const override { return true; } bool isTdpFrequencyAvailable() const override { return true; } bool isPhysicalMemorySizeSupported() const override { return true; } + void getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) override; protected: std::map sysfsNameToFileMap; @@ -269,6 +273,9 @@ class SysmanKmdInterfaceXe : public SysmanKmdInterface { bool isTdpFrequencyAvailable() const override { return false; } bool isPhysicalMemorySizeSupported() const override { return true; } + // Wedged state is not supported in XE. + void getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) override{}; + protected: std::map sysfsNameToFileMap; void initSysfsNameToFileMap(const PRODUCT_FAMILY productFamily); diff --git a/level_zero/sysman/source/shared/linux/sysman_kmd_interface_i915_prelim.cpp b/level_zero/sysman/source/shared/linux/sysman_kmd_interface_i915_prelim.cpp index 0133cffe54..e9ab3e5e58 100644 --- a/level_zero/sysman/source/shared/linux/sysman_kmd_interface_i915_prelim.cpp +++ b/level_zero/sysman/source/shared/linux/sysman_kmd_interface_i915_prelim.cpp @@ -94,5 +94,9 @@ uint32_t SysmanKmdInterfaceI915Prelim::getEventType(const bool isIntegratedDevic return 0; } +void SysmanKmdInterfaceI915Prelim::getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) { + getWedgedStatusImpl(pLinuxSysmanImp, pState); +} + } // namespace Sysman } // namespace L0 \ No newline at end of file diff --git a/level_zero/sysman/source/shared/linux/sysman_kmd_interface_i915_upstream.cpp b/level_zero/sysman/source/shared/linux/sysman_kmd_interface_i915_upstream.cpp index da511b6e7a..d94e6195a0 100644 --- a/level_zero/sysman/source/shared/linux/sysman_kmd_interface_i915_upstream.cpp +++ b/level_zero/sysman/source/shared/linux/sysman_kmd_interface_i915_upstream.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -108,5 +108,9 @@ uint32_t SysmanKmdInterfaceI915Upstream::getEventType(const bool isIntegratedDev return getEventTypeImpl(i915DirName, isIntegratedDevice); } +void SysmanKmdInterfaceI915Upstream::getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) { + getWedgedStatusImpl(pLinuxSysmanImp, pState); +} + } // namespace Sysman } // namespace L0 \ No newline at end of file diff --git a/level_zero/sysman/test/unit_tests/sources/global_operations/linux/CMakeLists.txt b/level_zero/sysman/test/unit_tests/sources/global_operations/linux/CMakeLists.txt index 49b1ba4003..372e5f77e9 100644 --- a/level_zero/sysman/test/unit_tests/sources/global_operations/linux/CMakeLists.txt +++ b/level_zero/sysman/test/unit_tests/sources/global_operations/linux/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -7,6 +7,7 @@ set(L0_TESTS_SYSMAN_GLOBAL_OPERATIONS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_global_operations.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_global_operations_xe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_global_operations_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_global_operations.h ) diff --git a/level_zero/sysman/test/unit_tests/sources/global_operations/linux/test_zes_global_operations_xe.cpp b/level_zero/sysman/test/unit_tests/sources/global_operations/linux/test_zes_global_operations_xe.cpp new file mode 100644 index 0000000000..08cc944c4f --- /dev/null +++ b/level_zero/sysman/test/unit_tests/sources/global_operations/linux/test_zes_global_operations_xe.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/test/common/mocks/mock_execution_environment.h" + +#include "level_zero/sysman/test/unit_tests/sources/global_operations/linux/mock_global_operations.h" +#include "level_zero/sysman/test/unit_tests/sources/linux/mock_sysman_fixture.h" + +namespace L0 { + +namespace Sysman { + +namespace ult { + +using SysmanGlobalOperationsXeFixture = SysmanDeviceFixture; + +TEST_F(SysmanGlobalOperationsXeFixture, GivenValidDeviceHandleWhenCallingDeviceGetStateThenVerifyDeviceIsNotWedged) { + pLinuxSysmanImp->pSysmanKmdInterface = std::make_unique(pLinuxSysmanImp->getProductFamily()); + auto executionEnvironment = std::make_unique(); + auto pDrm = new DrmGlobalOpsMock(const_cast(pSysmanDeviceImp->getRootDeviceEnvironment())); + pDrm->setupIoctlHelper(pSysmanDeviceImp->getRootDeviceEnvironment().getHardwareInfo()->platform.eProductFamily); + auto &osInterface = pSysmanDeviceImp->getRootDeviceEnvironment().osInterface; + osInterface->setDriverModel(std::unique_ptr(pDrm)); + + zes_device_state_t deviceState; + ze_result_t result = zesDeviceGetState(pSysmanDevice, &deviceState); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, deviceState.reset & ZES_RESET_REASON_FLAG_WEDGED); +} + +} // namespace ult +} // namespace Sysman +} // namespace L0 diff --git a/level_zero/sysman/test/unit_tests/sources/shared/linux/sysman_kmd_interface_tests_i915_prelim.cpp b/level_zero/sysman/test/unit_tests/sources/shared/linux/sysman_kmd_interface_tests_i915_prelim.cpp index 09ec523033..ed5d816033 100644 --- a/level_zero/sysman/test/unit_tests/sources/shared/linux/sysman_kmd_interface_tests_i915_prelim.cpp +++ b/level_zero/sysman/test/unit_tests/sources/shared/linux/sysman_kmd_interface_tests_i915_prelim.cpp @@ -7,11 +7,14 @@ #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/string.h" +#include "shared/source/os_interface/linux/drm_neo.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/variable_backup.h" +#include "shared/test/common/mocks/mock_execution_environment.h" #include "level_zero/sysman/source/shared/linux/sysman_kmd_interface.h" #include "level_zero/sysman/test/unit_tests/sources/linux/mock_sysman_fixture.h" +#include "level_zero/sysman/test/unit_tests/sources/linux/mock_sysman_hw_device_id.h" #include "level_zero/sysman/test/unit_tests/sources/shared/linux/sysman_kmd_interface_tests.h" #include "gtest/gtest.h" @@ -125,7 +128,7 @@ TEST_F(SysmanFixtureDeviceI915Prelim, GivenSysmanKmdInterfaceInstanceWhenCalling EXPECT_EQ(std::nullopt, pSysmanKmdInterface->getEngineClassString(EngineClass::ENGINE_CLASS_COMPUTE)); } -TEST_F(SysmanFixtureDeviceI915Prelim, GivenSysmanKmdInterfaceInstanceWhenCallingGetNumEngineTypeAndInstancesTenErrorIsReturned) { +TEST_F(SysmanFixtureDeviceI915Prelim, GivenSysmanKmdInterfaceInstanceWhenCallingGetNumEngineTypeAndInstancesThenErrorIsReturned) { std::vector mockVecString = {"rcs"}; std::map> mockMapofEngine = {{ZES_ENGINE_TYPE_FLAG_RENDER, mockVecString}}; auto pSysmanKmdInterface = pLinuxSysmanImp->getSysmanKmdInterface(); @@ -134,6 +137,30 @@ TEST_F(SysmanFixtureDeviceI915Prelim, GivenSysmanKmdInterfaceInstanceWhenCalling mockMapofEngine, pLinuxSysmanImp, nullptr, true, 0)); } +TEST_F(SysmanFixtureDeviceI915Prelim, GivenSysmanKmdInterfaceInstanceWhenCallingGetDeviceWedgedStatusThenVerifyDeviceIsNotWedged) { + class DrmMock : public Drm { + public: + DrmMock(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(mockFd, ""), rootDeviceEnvironment) {} + using Drm::setupIoctlHelper; + int ioctlRetVal = 0; + int ioctlErrno = 0; + int mockFd = 33; + int ioctl(DrmIoctl request, void *arg) override { + return ioctlRetVal; + } + int getErrno() override { return ioctlErrno; } + }; + auto executionEnvironment = std::make_unique(); + auto pDrm = new DrmMock(const_cast(pSysmanDeviceImp->getRootDeviceEnvironment())); + pDrm->setupIoctlHelper(pSysmanDeviceImp->getRootDeviceEnvironment().getHardwareInfo()->platform.eProductFamily); + auto &osInterface = pSysmanDeviceImp->getRootDeviceEnvironment().osInterface; + osInterface->setDriverModel(std::unique_ptr(pDrm)); + auto pSysmanKmdInterface = pLinuxSysmanImp->getSysmanKmdInterface(); + zes_device_state_t deviceState = {}; + pSysmanKmdInterface->getWedgedStatus(pLinuxSysmanImp, &deviceState); + EXPECT_EQ(0u, deviceState.reset & ZES_RESET_REASON_FLAG_WEDGED); +} + } // namespace ult } // namespace Sysman -} // namespace L0 \ No newline at end of file +} // namespace L0