diff --git a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp index a5a462bc70..9aad10e419 100644 --- a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp +++ b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp @@ -11,7 +11,7 @@ namespace L0 { -ze_result_t OsRas::getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman) { +ze_result_t OsRas::getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -22,15 +22,15 @@ ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state) { ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) { properties.pNext = nullptr; properties.type = osRasErrorType; - properties.onSubdevice = false; - properties.subdeviceId = 0; + properties.onSubdevice = isSubdevice; + properties.subdeviceId = subdeviceId; return ZE_RESULT_SUCCESS; } -LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type) : osRasErrorType(type) { +LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) { } -OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type) { - LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type); +OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) { + LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type, onSubdevice, subdeviceId); return static_cast(pLinuxRasImp); } diff --git a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h index dde3000bbf..3a7b0746db 100644 --- a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h +++ b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h @@ -16,12 +16,16 @@ class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass { public: ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override; ze_result_t osRasGetState(zes_ras_state_t &state) override; - LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type); + LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId); LinuxRasImp() = default; ~LinuxRasImp() override = default; protected: zes_ras_error_type_t osRasErrorType = {}; + + private: + bool isSubdevice = false; + uint32_t subdeviceId = 0; }; } // namespace L0 diff --git a/level_zero/tools/source/sysman/ras/os_ras.h b/level_zero/tools/source/sysman/ras/os_ras.h index ec12e77cc0..9205b0814c 100644 --- a/level_zero/tools/source/sysman/ras/os_ras.h +++ b/level_zero/tools/source/sysman/ras/os_ras.h @@ -18,8 +18,8 @@ class OsRas { public: virtual ze_result_t osRasGetProperties(zes_ras_properties_t &properties) = 0; virtual ze_result_t osRasGetState(zes_ras_state_t &state) = 0; - static OsRas *create(OsSysman *pOsSysman, zes_ras_error_type_t type); - static ze_result_t getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman); + static OsRas *create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId); + static ze_result_t getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle); virtual ~OsRas() = default; }; diff --git a/level_zero/tools/source/sysman/ras/ras.cpp b/level_zero/tools/source/sysman/ras/ras.cpp index 20d0848660..3995b37281 100644 --- a/level_zero/tools/source/sysman/ras/ras.cpp +++ b/level_zero/tools/source/sysman/ras/ras.cpp @@ -16,16 +16,18 @@ RasHandleContext::~RasHandleContext() { delete pRas; } } -void RasHandleContext::createHandle(zes_ras_error_type_t type) { - Ras *pRas = new RasImp(pOsSysman, type); +void RasHandleContext::createHandle(zes_ras_error_type_t type, ze_device_handle_t deviceHandle) { + Ras *pRas = new RasImp(pOsSysman, type, deviceHandle); handleList.push_back(pRas); } -void RasHandleContext::init() { - std::vector errorType = {}; - OsRas::getSupportedRasErrorTypes(errorType, pOsSysman); - for (const auto &type : errorType) { - createHandle(type); +void RasHandleContext::init(std::vector &deviceHandles) { + for (const auto &deviceHandle : deviceHandles) { + std::vector errorType = {}; + OsRas::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle); + for (const auto &type : errorType) { + createHandle(type, deviceHandle); + } } } ze_result_t RasHandleContext::rasGet(uint32_t *pCount, diff --git a/level_zero/tools/source/sysman/ras/ras.h b/level_zero/tools/source/sysman/ras/ras.h index 102ba0b9cf..23cb7535d9 100644 --- a/level_zero/tools/source/sysman/ras/ras.h +++ b/level_zero/tools/source/sysman/ras/ras.h @@ -6,6 +6,7 @@ */ #pragma once +#include "level_zero/core/source/device/device.h" #include #include @@ -37,7 +38,7 @@ struct RasHandleContext { RasHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~RasHandleContext(); - void init(); + void init(std::vector &deviceHandles); ze_result_t rasGet(uint32_t *pCount, zes_ras_handle_t *phRas); @@ -45,7 +46,7 @@ struct RasHandleContext { std::vector handleList = {}; private: - void createHandle(zes_ras_error_type_t type); + void createHandle(zes_ras_error_type_t type, ze_device_handle_t deviceHandle); }; } // namespace L0 diff --git a/level_zero/tools/source/sysman/ras/ras_imp.cpp b/level_zero/tools/source/sysman/ras/ras_imp.cpp index 583e70c21e..f92092ddf9 100644 --- a/level_zero/tools/source/sysman/ras/ras_imp.cpp +++ b/level_zero/tools/source/sysman/ras/ras_imp.cpp @@ -35,8 +35,10 @@ void RasImp::init() { pOsRas->osRasGetProperties(rasProperties); } -RasImp::RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type) { - pOsRas = OsRas::create(pOsSysman, type); +RasImp::RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_device_handle_t handle) : deviceHandle(handle) { + ze_device_properties_t deviceProperties = {}; + Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); + pOsRas = OsRas::create(pOsSysman, type, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); init(); } diff --git a/level_zero/tools/source/sysman/ras/ras_imp.h b/level_zero/tools/source/sysman/ras/ras_imp.h index 2c5fe35666..92672278cc 100644 --- a/level_zero/tools/source/sysman/ras/ras_imp.h +++ b/level_zero/tools/source/sysman/ras/ras_imp.h @@ -22,7 +22,7 @@ class RasImp : public Ras, NEO::NonCopyableOrMovableClass { ze_result_t rasGetState(zes_ras_state_t *pConfig) override; RasImp() = default; - RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type); + RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_device_handle_t deviceHandle); ~RasImp() override; OsRas *pOsRas = nullptr; @@ -30,6 +30,7 @@ class RasImp : public Ras, NEO::NonCopyableOrMovableClass { private: zes_ras_properties_t rasProperties = {}; + ze_device_handle_t deviceHandle = {}; }; } // namespace L0 diff --git a/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp b/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp index 615c622650..60630e9d6c 100644 --- a/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp +++ b/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp @@ -14,7 +14,7 @@ class WddmRasImp : public OsRas { ze_result_t osRasGetState(zes_ras_state_t &state) override; }; -ze_result_t OsRas::getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman) { +ze_result_t OsRas::getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -26,7 +26,7 @@ ze_result_t WddmRasImp::osRasGetState(zes_ras_state_t &state) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } -OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type) { +OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) { WddmRasImp *pWddmRasImp = new WddmRasImp(); return static_cast(pWddmRasImp); } diff --git a/level_zero/tools/source/sysman/sysman_imp.cpp b/level_zero/tools/source/sysman/sysman_imp.cpp index d6e51e5ed1..61aa7332c3 100644 --- a/level_zero/tools/source/sysman/sysman_imp.cpp +++ b/level_zero/tools/source/sysman/sysman_imp.cpp @@ -94,7 +94,7 @@ void SysmanDeviceImp::init() { pSchedulerHandleContext->init(deviceHandles); } if (pRasHandleContext) { - pRasHandleContext->init(); + pRasHandleContext->init(deviceHandles); } if (pMemoryHandleContext) { pMemoryHandleContext->init(deviceHandles); diff --git a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp index 98788d70a1..70cb4629ae 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp +++ b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp @@ -19,10 +19,19 @@ namespace ult { constexpr uint32_t mockHandleCount = 0; struct SysmanRasFixture : public SysmanDeviceFixture { protected: + std::vector deviceHandles; void SetUp() override { SysmanDeviceFixture::SetUp(); pSysmanDeviceImp->pRasHandleContext->handleList.clear(); - pSysmanDeviceImp->pRasHandleContext->init(); + uint32_t subDeviceCount = 0; + Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); + if (subDeviceCount == 0) { + deviceHandles.resize(1, device->toHandle()); + } else { + deviceHandles.resize(subDeviceCount, nullptr); + Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); + } + pSysmanDeviceImp->pRasHandleContext->init(deviceHandles); } void TearDown() override { SysmanDeviceFixture::TearDown(); @@ -51,7 +60,7 @@ TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasZeroHandlesInRet EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(count, mockHandleCount); - RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE); + RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle()); pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, mockHandleCount + 1); @@ -68,7 +77,7 @@ TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasZeroHandlesInRet } TEST_F(SysmanRasFixture, GivenValidRasHandleWhenGettingRasPropertiesThenSuccessIsReturned) { - RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE); + RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle()); pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); auto handles = get_ras_handles(mockHandleCount + 1); @@ -86,7 +95,7 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenGettingRasPropertiesThenSuccessI } TEST_F(SysmanRasFixture, GivenValidRasHandleWhileCallingzesRasGetStateThenFailureIsReturned) { - RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE); + RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle()); pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); auto handles = get_ras_handles(mockHandleCount + 1);