fix: Fix abort on device get state

Related-To: NEO-9980

Signed-off-by: Bellekallu Rajkiran <bellekallu.rajkiran@intel.com>
This commit is contained in:
Bellekallu Rajkiran 2024-01-19 11:59:41 +00:00 committed by Compute-Runtime-Automation
parent c073106907
commit 1002cb9f34
12 changed files with 109 additions and 33 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -707,22 +707,6 @@ ze_result_t LinuxGlobalOperationsImp::scanProcessesState(std::vector<zes_process
return result;
}
void LinuxGlobalOperationsImp::getWedgedStatus(zes_device_state_t *pState) {
NEO::GemContextCreateExt gcc{};
auto hwDeviceId = pLinuxSysmanImp->getSysmanHwDeviceIdInstance();
auto pDrm = pLinuxSysmanImp->getDrm();
// Device is said to be in wedged if context creation returns EIO.
auto ret = pDrm->getIoctlHelper()->ioctl(NEO::DrmIoctl::gemContextCreateExt, &gcc);
if (ret == 0) {
pDrm->destroyDrmContext(gcc.contextId);
return;
}
if (pDrm->getErrno() == EIO) {
pState->reset |= ZES_RESET_REASON_FLAG_WEDGED;
}
}
void LinuxGlobalOperationsImp::getRepairStatus(zes_device_state_t *pState) {
SysmanProductHelper *pSysmanProductHelper = pLinuxSysmanImp->getSysmanProductHelper();
if (pSysmanProductHelper->isRepairStatusSupported()) {
@ -744,7 +728,8 @@ void LinuxGlobalOperationsImp::getRepairStatus(zes_device_state_t *pState) {
ze_result_t LinuxGlobalOperationsImp::deviceGetState(zes_device_state_t *pState) {
memset(pState, 0, sizeof(zes_device_state_t));
pState->repaired = ZES_REPAIR_STATUS_UNSUPPORTED;
getWedgedStatus(pState);
auto pSysmanKmdInterface = pLinuxSysmanImp->getSysmanKmdInterface();
pSysmanKmdInterface->getWedgedStatus(pLinuxSysmanImp, pState);
getRepairStatus(pState);
return ZE_RESULT_SUCCESS;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -30,7 +30,6 @@ class LinuxGlobalOperationsImp : public OsGlobalOperations, NEO::NonCopyableOrMo
void getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) override;
void getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) override;
void getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) override;
void getWedgedStatus(zes_device_state_t *pState) override;
void getRepairStatus(zes_device_state_t *pState) override;
ze_result_t reset(ze_bool_t force) override;
ze_result_t scanProcessesState(std::vector<zes_process_state_t> &pProcessList) override;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -28,7 +28,6 @@ class OsGlobalOperations {
virtual void getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) = 0;
virtual void getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) = 0;
virtual void getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) = 0;
virtual void getWedgedStatus(zes_device_state_t *pState) = 0;
virtual void getRepairStatus(zes_device_state_t *pState) = 0;
virtual bool getUuid(std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid) = 0;
virtual bool generateUuidFromPciBusInfo(const NEO::PhysicalDevicePciBusInfo &pciBusInfo, std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid) = 0;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -33,8 +33,6 @@ void WddmGlobalOperationsImp::getVendorName(char (&vendorName)[ZES_STRING_PROPER
void WddmGlobalOperationsImp::getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) {
}
void WddmGlobalOperationsImp::getWedgedStatus(zes_device_state_t *pState) {
}
void WddmGlobalOperationsImp::getRepairStatus(zes_device_state_t *pState) {
}
bool WddmGlobalOperationsImp::getUuid(std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -23,7 +23,6 @@ class WddmGlobalOperationsImp : public OsGlobalOperations, NEO::NonCopyableOrMov
void getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) override;
void getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) override;
void getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) override;
void getWedgedStatus(zes_device_state_t *pState) override;
void getRepairStatus(zes_device_state_t *pState) override;
ze_result_t reset(ze_bool_t force) override;
ze_result_t scanProcessesState(std::vector<zes_process_state_t> &pProcessList) override;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -165,6 +165,22 @@ uint32_t SysmanKmdInterface::getEventTypeImpl(std::string &dirName, const bool i
return eventTypeVal;
}
void SysmanKmdInterface::getWedgedStatusImpl(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) {
NEO::GemContextCreateExt gcc{};
auto hwDeviceId = pLinuxSysmanImp->getSysmanHwDeviceIdInstance();
auto pDrm = pLinuxSysmanImp->getDrm();
// Device is said to be in wedged if context creation returns EIO.
auto ret = pDrm->getIoctlHelper()->ioctl(NEO::DrmIoctl::gemContextCreateExt, &gcc);
if (ret == 0) {
pDrm->destroyDrmContext(gcc.contextId);
return;
}
if (pDrm->getErrno() == EIO) {
pState->reset |= ZES_RESET_REASON_FLAG_WEDGED;
}
}
std::string SysmanKmdInterfaceI915::getBasePathI915(uint32_t subDeviceId) {
return "gt/gt" + std::to_string(subDeviceId) + "/";
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -137,6 +137,7 @@ class SysmanKmdInterface {
virtual bool isBoostFrequencyAvailable() const = 0;
virtual bool isTdpFrequencyAvailable() const = 0;
virtual bool isPhysicalMemorySizeSupported() const = 0;
virtual void getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) = 0;
protected:
std::unique_ptr<FsAccessInterface> pFsAccess;
@ -144,6 +145,7 @@ class SysmanKmdInterface {
std::unique_ptr<SysFsAccessInterface> pSysfsAccess;
virtual const std::map<SysfsName, SysfsValueUnit> &getSysfsNameToNativeUnitMap() = 0;
uint32_t getEventTypeImpl(std::string &dirName, const bool isIntegratedDevice);
void getWedgedStatusImpl(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState);
};
class SysmanKmdInterfaceI915 {
@ -184,6 +186,7 @@ class SysmanKmdInterfaceI915Upstream : public SysmanKmdInterface, SysmanKmdInter
bool isBoostFrequencyAvailable() const override { return true; }
bool isTdpFrequencyAvailable() const override { return true; }
bool isPhysicalMemorySizeSupported() const override { return false; }
void getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) override;
protected:
std::map<SysfsName, valuePair> sysfsNameToFileMap;
@ -226,6 +229,7 @@ class SysmanKmdInterfaceI915Prelim : public SysmanKmdInterface, SysmanKmdInterfa
bool isBoostFrequencyAvailable() const override { return true; }
bool isTdpFrequencyAvailable() const override { return true; }
bool isPhysicalMemorySizeSupported() const override { return true; }
void getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) override;
protected:
std::map<SysfsName, valuePair> sysfsNameToFileMap;
@ -269,6 +273,9 @@ class SysmanKmdInterfaceXe : public SysmanKmdInterface {
bool isTdpFrequencyAvailable() const override { return false; }
bool isPhysicalMemorySizeSupported() const override { return true; }
// Wedged state is not supported in XE.
void getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) override{};
protected:
std::map<SysfsName, valuePair> sysfsNameToFileMap;
void initSysfsNameToFileMap(const PRODUCT_FAMILY productFamily);

View File

@ -94,5 +94,9 @@ uint32_t SysmanKmdInterfaceI915Prelim::getEventType(const bool isIntegratedDevic
return 0;
}
void SysmanKmdInterfaceI915Prelim::getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) {
getWedgedStatusImpl(pLinuxSysmanImp, pState);
}
} // namespace Sysman
} // namespace L0

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -108,5 +108,9 @@ uint32_t SysmanKmdInterfaceI915Upstream::getEventType(const bool isIntegratedDev
return getEventTypeImpl(i915DirName, isIntegratedDevice);
}
void SysmanKmdInterfaceI915Upstream::getWedgedStatus(LinuxSysmanImp *pLinuxSysmanImp, zes_device_state_t *pState) {
getWedgedStatusImpl(pLinuxSysmanImp, pState);
}
} // namespace Sysman
} // namespace L0

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2023 Intel Corporation
# Copyright (C) 2023-2024 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@ -7,6 +7,7 @@
set(L0_TESTS_SYSMAN_GLOBAL_OPERATIONS_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_global_operations.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_global_operations_xe.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_global_operations_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_global_operations.h
)

View File

@ -0,0 +1,37 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "level_zero/sysman/test/unit_tests/sources/global_operations/linux/mock_global_operations.h"
#include "level_zero/sysman/test/unit_tests/sources/linux/mock_sysman_fixture.h"
namespace L0 {
namespace Sysman {
namespace ult {
using SysmanGlobalOperationsXeFixture = SysmanDeviceFixture;
TEST_F(SysmanGlobalOperationsXeFixture, GivenValidDeviceHandleWhenCallingDeviceGetStateThenVerifyDeviceIsNotWedged) {
pLinuxSysmanImp->pSysmanKmdInterface = std::make_unique<SysmanKmdInterfaceXe>(pLinuxSysmanImp->getProductFamily());
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
auto pDrm = new DrmGlobalOpsMock(const_cast<NEO::RootDeviceEnvironment &>(pSysmanDeviceImp->getRootDeviceEnvironment()));
pDrm->setupIoctlHelper(pSysmanDeviceImp->getRootDeviceEnvironment().getHardwareInfo()->platform.eProductFamily);
auto &osInterface = pSysmanDeviceImp->getRootDeviceEnvironment().osInterface;
osInterface->setDriverModel(std::unique_ptr<DrmGlobalOpsMock>(pDrm));
zes_device_state_t deviceState;
ze_result_t result = zesDeviceGetState(pSysmanDevice, &deviceState);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, deviceState.reset & ZES_RESET_REASON_FLAG_WEDGED);
}
} // namespace ult
} // namespace Sysman
} // namespace L0

View File

@ -7,11 +7,14 @@
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/string.h"
#include "shared/source/os_interface/linux/drm_neo.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "level_zero/sysman/source/shared/linux/sysman_kmd_interface.h"
#include "level_zero/sysman/test/unit_tests/sources/linux/mock_sysman_fixture.h"
#include "level_zero/sysman/test/unit_tests/sources/linux/mock_sysman_hw_device_id.h"
#include "level_zero/sysman/test/unit_tests/sources/shared/linux/sysman_kmd_interface_tests.h"
#include "gtest/gtest.h"
@ -125,7 +128,7 @@ TEST_F(SysmanFixtureDeviceI915Prelim, GivenSysmanKmdInterfaceInstanceWhenCalling
EXPECT_EQ(std::nullopt, pSysmanKmdInterface->getEngineClassString(EngineClass::ENGINE_CLASS_COMPUTE));
}
TEST_F(SysmanFixtureDeviceI915Prelim, GivenSysmanKmdInterfaceInstanceWhenCallingGetNumEngineTypeAndInstancesTenErrorIsReturned) {
TEST_F(SysmanFixtureDeviceI915Prelim, GivenSysmanKmdInterfaceInstanceWhenCallingGetNumEngineTypeAndInstancesThenErrorIsReturned) {
std::vector<std::string> mockVecString = {"rcs"};
std::map<zes_engine_type_flag_t, std::vector<std::string>> mockMapofEngine = {{ZES_ENGINE_TYPE_FLAG_RENDER, mockVecString}};
auto pSysmanKmdInterface = pLinuxSysmanImp->getSysmanKmdInterface();
@ -134,6 +137,30 @@ TEST_F(SysmanFixtureDeviceI915Prelim, GivenSysmanKmdInterfaceInstanceWhenCalling
mockMapofEngine, pLinuxSysmanImp, nullptr, true, 0));
}
TEST_F(SysmanFixtureDeviceI915Prelim, GivenSysmanKmdInterfaceInstanceWhenCallingGetDeviceWedgedStatusThenVerifyDeviceIsNotWedged) {
class DrmMock : public Drm {
public:
DrmMock(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique<MockSysmanHwDeviceIdDrm>(mockFd, ""), rootDeviceEnvironment) {}
using Drm::setupIoctlHelper;
int ioctlRetVal = 0;
int ioctlErrno = 0;
int mockFd = 33;
int ioctl(DrmIoctl request, void *arg) override {
return ioctlRetVal;
}
int getErrno() override { return ioctlErrno; }
};
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
auto pDrm = new DrmMock(const_cast<NEO::RootDeviceEnvironment &>(pSysmanDeviceImp->getRootDeviceEnvironment()));
pDrm->setupIoctlHelper(pSysmanDeviceImp->getRootDeviceEnvironment().getHardwareInfo()->platform.eProductFamily);
auto &osInterface = pSysmanDeviceImp->getRootDeviceEnvironment().osInterface;
osInterface->setDriverModel(std::unique_ptr<DrmMock>(pDrm));
auto pSysmanKmdInterface = pLinuxSysmanImp->getSysmanKmdInterface();
zes_device_state_t deviceState = {};
pSysmanKmdInterface->getWedgedStatus(pLinuxSysmanImp, &deviceState);
EXPECT_EQ(0u, deviceState.reset & ZES_RESET_REASON_FLAG_WEDGED);
}
} // namespace ult
} // namespace Sysman
} // namespace L0
} // namespace L0