Add check for memory type before calculating ras hbm errors

Related-To: LOCI-3500

Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
Mayank Raghuwanshi
2023-02-14 12:30:28 +00:00
committed by Compute-Runtime-Automation
parent bfb59cc573
commit 3816b85fa0
10 changed files with 192 additions and 19 deletions

View File

@@ -12,6 +12,7 @@
#include "shared/source/helpers/sleep.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/os_interface/linux/system_info.h"
#include "level_zero/core/source/device/device_imp.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
@@ -51,6 +52,7 @@ ze_result_t LinuxSysmanImp::init() {
DEBUG_BREAK_IF(nullptr == pPmuInterface);
getMemoryType();
return createPmtHandles();
}
@@ -472,6 +474,20 @@ ze_result_t LinuxSysmanImp::osColdReset() {
return ZE_RESULT_ERROR_DEVICE_LOST; // incase the reset fails inform upper layers.
}
uint32_t LinuxSysmanImp::getMemoryType() {
if (isMemTypeRetrieved == false) {
auto pDrm = &getDrm();
if (pDrm->querySystemInfo()) {
auto memSystemInfo = pDrm->getSystemInfo();
if (memSystemInfo != nullptr) {
memType = memSystemInfo->getMemoryType();
isMemTypeRetrieved = true;
}
}
}
return memType;
}
OsSysman *OsSysman::create(SysmanDeviceImp *pParentSysmanDeviceImp) {
LinuxSysmanImp *pLinuxSysmanImp = new LinuxSysmanImp(pParentSysmanDeviceImp);
return static_cast<OsSysman *>(pLinuxSysmanImp);

View File

@@ -16,6 +16,7 @@
#include "level_zero/tools/source/sysman/linux/pmt/pmt.h"
#include "level_zero/tools/source/sysman/linux/pmu/pmu_imp.h"
#include "level_zero/tools/source/sysman/linux/udev/udev_lib.h"
#include "level_zero/tools/source/sysman/sysman_const.h"
#include "level_zero/tools/source/sysman/sysman_imp.h"
#include <linux/pci_regs.h>
@@ -58,6 +59,7 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass {
ze_device_handle_t getCoreDeviceHandle() override;
SysmanDeviceImp *getSysmanDeviceImp();
std::string getPciCardBusDirectoryPath(std::string realPciPath);
uint32_t getMemoryType();
static std::string getPciRootPortDirectoryPath(std::string realPciPath);
void releasePmtObject();
ze_result_t createPmtHandles();
@@ -94,8 +96,10 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass {
L0::UdevLib *pUdevLib = nullptr;
std::map<uint32_t, L0::PlatformMonitoringTech *> mapOfSubDeviceIdToPmtObject;
ze_result_t initLocalDeviceAndDrmHandles();
uint32_t memType = unknownMemoryType;
private:
bool isMemTypeRetrieved = false;
LinuxSysmanImp() = delete;
SysmanDeviceImp *pParentSysmanDeviceImp = nullptr;
static const std::string deviceDir;

View File

@@ -8,17 +8,31 @@
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
#include "shared/source/helpers/string.h"
#include "shared/source/os_interface/linux/system_info.h"
#include "level_zero/tools/source/sysman/linux/os_sysman_imp.h"
#include "drm/intel_hwconfig_types.h"
namespace L0 {
static bool isMemoryTypeHbm(LinuxSysmanImp *pLinuxSysmanImp) {
uint32_t memType = pLinuxSysmanImp->getMemoryType();
if (memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2e || memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2) {
return true;
}
return false;
}
void OsRas::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) {
constexpr auto maxErrorTypes = 2;
LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
if (errorType.size() < maxErrorTypes) {
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
auto pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
}
}
}
@@ -69,7 +83,9 @@ ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear)
void LinuxRasImp::initSources() {
rasSources.push_back(std::make_unique<L0::LinuxRasSourceGt>(pLinuxSysmanImp, osRasErrorType, isSubdevice, subdeviceId));
rasSources.push_back(std::make_unique<L0::LinuxRasSourceHbm>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
rasSources.push_back(std::make_unique<L0::LinuxRasSourceHbm>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
}
}
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) {

View File

@@ -59,4 +59,5 @@ constexpr uint64_t gigaUnitTransferToUnitTransfer = 1000 * 1000 * 1000;
constexpr int32_t memoryBusWidth = 128; // bus width in bytes
constexpr int32_t numMemoryChannels = 8;
constexpr uint32_t unknownMemoryType = UINT32_MAX;
#define BITS(x, at, width) (((x) >> (at)) & ((1 << (width)) - 1))

View File

@@ -6,6 +6,10 @@
*/
#pragma once
#include "shared/source/os_interface/linux/drm_neo.h"
#include "shared/source/os_interface/linux/ioctl_helper.h"
#include "shared/source/os_interface/linux/system_info.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/test/common/test_macros/mock_method_macros.h"
#include "level_zero/tools/source/sysman/events/events_imp.h"
@@ -13,6 +17,9 @@
#include "level_zero/tools/source/sysman/firmware_util/firmware_util.h"
#include "level_zero/tools/source/sysman/linux/os_sysman_driver_imp.h"
#include "drm/intel_hwconfig_types.h"
using namespace NEO;
namespace L0 {
namespace ult {
@@ -227,6 +234,40 @@ struct MockEventsFwInterface : public FirmwareUtil {
ADDMETHOD_NOBASE_VOIDRETURN(fwGetMemoryHealthIndicator, (zes_mem_health_t * health));
};
struct MockEventNeoDrm : public Drm {
using Drm::ioctlHelper;
uint32_t mockMemoryType = INTEL_HWCONFIG_MEMORY_TYPE_HBM2e;
const int mockFd = 33;
std::vector<bool> mockQuerySystemInfoReturnValue{};
bool isRepeated = false;
bool mockReturnEmptyRegions = false;
MockEventNeoDrm(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique<HwDeviceIdDrm>(mockFd, ""), rootDeviceEnvironment) {}
void setMemoryType(uint32_t memory) {
mockMemoryType = memory;
}
std::vector<uint8_t> getMemoryRegionsReturnsEmpty() {
return {};
}
bool querySystemInfo() override {
bool returnValue = true;
if (!mockQuerySystemInfoReturnValue.empty()) {
returnValue = mockQuerySystemInfoReturnValue.front();
if (isRepeated != true) {
mockQuerySystemInfoReturnValue.erase(mockQuerySystemInfoReturnValue.begin());
}
return returnValue;
}
uint32_t hwBlob[] = {INTEL_HWCONFIG_MAX_MEMORY_CHANNELS, 1, 8, INTEL_HWCONFIG_MEMORY_TYPE, 0, mockMemoryType};
std::vector<uint8_t> inputBlobData(reinterpret_cast<uint8_t *>(hwBlob), reinterpret_cast<uint8_t *>(hwBlob) + sizeof(hwBlob));
this->systemInfo.reset(new SystemInfo(inputBlobData));
return returnValue;
}
};
class PublicLinuxEventsImp : public L0::LinuxEventsImp {
public:
PublicLinuxEventsImp(OsSysman *pOsSysman) : LinuxEventsImp(pOsSysman) {}

View File

@@ -20,6 +20,8 @@ constexpr int drmDeviceFd = 0;
class SysmanEventsFixture : public SysmanDeviceFixture {
protected:
std::unique_ptr<MockEventsFsAccess> pFsAccess;
std::unique_ptr<MockEventNeoDrm> pDrm;
Drm *pOriginalDrm = nullptr;
FsAccess *pFsAccessOriginal = nullptr;
OsEvents *pOsEventsPrev = nullptr;
L0::EventsImp *pEventsImp;
@@ -38,6 +40,10 @@ class SysmanEventsFixture : public SysmanDeviceFixture {
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
pFsAccess = std::make_unique<MockEventsFsAccess>();
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
pDrm = std::make_unique<MockEventNeoDrm>(const_cast<NEO::RootDeviceEnvironment &>(neoDevice->getRootDeviceEnvironment()));
pDrm->ioctlHelper = static_cast<std::unique_ptr<NEO::IoctlHelper>>(std::make_unique<IoctlHelperPrelim20>(*pDrm));
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
pLinuxSysmanImp->pDrm = pDrm.get();
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
pSysfsAccess = std::make_unique<MockEventsSysfsAccess>();
@@ -81,6 +87,7 @@ class SysmanEventsFixture : public SysmanDeviceFixture {
pEventsImp = nullptr;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
pLinuxSysmanImp->pDrm = pOriginalDrm;
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
SysmanDeviceFixture::TearDown();

View File

@@ -38,6 +38,7 @@ class SysmanMockDrm : public Drm {
class PublicLinuxSysmanImp : public L0::LinuxSysmanImp {
public:
using LinuxSysmanImp::mapOfSubDeviceIdToPmtObject;
using LinuxSysmanImp::memType;
using LinuxSysmanImp::pDrm;
using LinuxSysmanImp::pFsAccess;
using LinuxSysmanImp::pFwUtilInterface;

View File

@@ -15,6 +15,8 @@
#include "level_zero/tools/source/sysman/ras/ras_imp.h"
#include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h"
#include "drm/intel_hwconfig_types.h"
namespace NEO {
namespace SysCalls {
extern bool allowFakeDevicePath;
@@ -633,10 +635,11 @@ TEST_F(SysmanDeviceFixture, GivenValidEnumeratedHandlesWhenReleaseIsCalledThenHa
count = 0;
RasImp *pRas = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle());
pLinuxSysmanImp->memType = INTEL_HWCONFIG_MEMORY_TYPE_LPDDR4;
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pRas);
result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 3u);
EXPECT_EQ(count, 1u);
pLinuxSysmanImp->releaseSysmanDeviceResources();

View File

@@ -6,6 +6,9 @@
*/
#pragma once
#include "shared/source/os_interface/linux/ioctl_helper.h"
#include "shared/source/os_interface/linux/system_info.h"
#include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h"
#include "level_zero/tools/source/sysman/linux/fs_access.h"
#include "level_zero/tools/source/sysman/linux/os_sysman_imp.h"
@@ -14,6 +17,8 @@
#include "level_zero/tools/source/sysman/ras/ras.h"
#include "level_zero/tools/source/sysman/ras/ras_imp.h"
#include "drm/intel_hwconfig_types.h"
using namespace NEO;
namespace L0 {
namespace ult {
@@ -651,6 +656,40 @@ struct MockRasFwInterface : public FirmwareUtil {
ADDMETHOD_NOBASE_VOIDRETURN(fwGetMemoryHealthIndicator, (zes_mem_health_t * health));
};
struct MockRasNeoDrm : public Drm {
using Drm::ioctlHelper;
uint32_t mockMemoryType = INTEL_HWCONFIG_MEMORY_TYPE_HBM2e;
const int mockFd = 33;
std::vector<bool> mockQuerySystemInfoReturnValue{};
bool isRepeated = false;
bool mockReturnEmptyRegions = false;
MockRasNeoDrm(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique<HwDeviceIdDrm>(mockFd, ""), rootDeviceEnvironment) {}
void setMemoryType(uint32_t memory) {
mockMemoryType = memory;
}
std::vector<uint8_t> getMemoryRegionsReturnsEmpty() {
return {};
}
bool querySystemInfo() override {
bool returnValue = true;
if (!mockQuerySystemInfoReturnValue.empty()) {
returnValue = mockQuerySystemInfoReturnValue.front();
if (isRepeated != true) {
mockQuerySystemInfoReturnValue.erase(mockQuerySystemInfoReturnValue.begin());
}
return returnValue;
}
uint32_t hwBlob[] = {INTEL_HWCONFIG_MAX_MEMORY_CHANNELS, 1, 8, INTEL_HWCONFIG_MEMORY_TYPE, 0, mockMemoryType};
std::vector<uint8_t> inputBlobData(reinterpret_cast<uint8_t *>(hwBlob), reinterpret_cast<uint8_t *>(hwBlob) + sizeof(hwBlob));
this->systemInfo.reset(new SystemInfo(inputBlobData));
return returnValue;
}
};
class PublicLinuxRasImp : public L0::LinuxRasImp {
public:
PublicLinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : LinuxRasImp(pOsSysman, type, onSubdevice, subdeviceId) {}

View File

@@ -21,9 +21,11 @@ struct SysmanRasFixture : public SysmanDeviceFixture {
std::unique_ptr<MockRasSysfsAccess> pSysfsAccess;
std::unique_ptr<MockRasPmuInterfaceImp> pPmuInterface;
std::unique_ptr<MockRasFwInterface> pRasFwUtilInterface;
std::unique_ptr<MockRasNeoDrm> pDrm;
MemoryManager *pMemoryManagerOriginal = nullptr;
std::unique_ptr<MockMemoryManagerInRasSysman> pMemoryManager;
FsAccess *pFsAccessOriginal = nullptr;
Drm *pOriginalDrm = nullptr;
SysfsAccess *pSysfsAccessOriginal = nullptr;
PmuInterface *pOriginalPmuInterface = nullptr;
FirmwareUtil *pFwUtilOriginal = nullptr;
@@ -41,15 +43,20 @@ struct SysmanRasFixture : public SysmanDeviceFixture {
pFsAccess = std::make_unique<MockRasFsAccess>();
pSysfsAccess = std::make_unique<MockRasSysfsAccess>();
pRasFwUtilInterface = std::make_unique<MockRasFwInterface>();
pDrm = std::make_unique<MockRasNeoDrm>(const_cast<NEO::RootDeviceEnvironment &>(neoDevice->getRootDeviceEnvironment()));
pDrm->ioctlHelper = static_cast<std::unique_ptr<NEO::IoctlHelper>>(std::make_unique<IoctlHelperPrelim20>(*pDrm));
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface;
pFwUtilOriginal = pLinuxSysmanImp->pFwUtilInterface;
pOriginalDrm = pLinuxSysmanImp->pDrm;
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
pLinuxSysmanImp->pFwUtilInterface = pRasFwUtilInterface.get();
pPmuInterface = std::make_unique<MockRasPmuInterfaceImp>(pLinuxSysmanImp);
pLinuxSysmanImp->pPmuInterface = pPmuInterface.get();
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
pLinuxSysmanImp->pDrm = pDrm.get();
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
delete handle;
@@ -74,6 +81,7 @@ struct SysmanRasFixture : public SysmanDeviceFixture {
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
pLinuxSysmanImp->pFwUtilInterface = pFwUtilOriginal;
pLinuxSysmanImp->pDrm = pOriginalDrm;
SysmanDeviceFixture::TearDown();
}
std::vector<zes_ras_handle_t> getRasHandles(uint32_t count) {
@@ -145,7 +153,6 @@ TEST_F(SysmanRasFixture, GivenValidOsSysmanPointerWhenRetrievingSupportedRasErro
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAreAbsentThenZeroHandlesAreCreated) {
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
pLinuxSysmanImp->pFwUtilInterface = nullptr;
@@ -153,7 +160,6 @@ TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEven
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -164,6 +170,50 @@ TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEven
EXPECT_EQ(testcount, 0u);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAndHbmAreAbsentThenZeroHandlesAreCreated) {
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_LPDDR4);
pRasFwUtilInterface->mockMemorySuccess = true;
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 0u);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfHbmAndFwInterfaceArePresentThenSuccessIsReturned) {
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2);
pRasFwUtilInterface->mockMemorySuccess = true;
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAreAbsentAndQuerySystemInfoSucceedsButMemSysInfoIsNullThenZeroHandlesAreCreated) {
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
pDrm->mockQuerySystemInfoReturnValue.push_back(true);
pLinuxSysmanImp->pFwUtilInterface = nullptr;
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 0u);
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForGtThenSuccessIsReturned) {
pPmuInterface->mockPmuReadCorrectable = true;
@@ -172,7 +222,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForGtThenSuc
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
for (auto handle : handles) {
@@ -209,7 +258,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForGtAfterCl
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
ze_bool_t clear = 0;
@@ -272,7 +320,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForHbmThenSu
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
@@ -297,7 +344,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForHbmWithCl
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
ze_bool_t clear = 0;
@@ -346,7 +392,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterf
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
@@ -362,7 +407,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterf
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
@@ -400,7 +444,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesGetRasStateForGtInterf
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
@@ -417,7 +460,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesGetRasStateAndFirmware
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
@@ -463,7 +505,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterf
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
@@ -479,7 +520,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterf
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
@@ -495,7 +535,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterf
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
@@ -530,10 +569,12 @@ struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
MemoryManager *pMemoryManagerOriginal = nullptr;
std::unique_ptr<MockMemoryManagerInRasSysman> pMemoryManager;
std::unique_ptr<MockRasFwInterface> pRasFwUtilInterface;
std::unique_ptr<MockRasNeoDrm> pDrm;
FsAccess *pFsAccessOriginal = nullptr;
SysfsAccess *pSysfsAccessOriginal = nullptr;
PmuInterface *pOriginalPmuInterface = nullptr;
FirmwareUtil *pFwUtilOriginal = nullptr;
Drm *pOriginalDrm = nullptr;
std::vector<ze_device_handle_t> deviceHandles;
void SetUp() override {
@@ -545,6 +586,8 @@ struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
pMemoryManager = std::make_unique<MockMemoryManagerInRasSysman>(*neoDevice->getExecutionEnvironment());
pMemoryManager->localMemorySupported[0] = true;
device->getDriverHandle()->setMemoryManager(pMemoryManager.get());
pDrm = std::make_unique<MockRasNeoDrm>(const_cast<NEO::RootDeviceEnvironment &>(neoDevice->getRootDeviceEnvironment()));
pDrm->ioctlHelper = static_cast<std::unique_ptr<NEO::IoctlHelper>>(std::make_unique<IoctlHelperPrelim20>(*pDrm));
pFsAccess = std::make_unique<MockRasFsAccess>();
pSysfsAccess = std::make_unique<MockRasSysfsAccess>();
pRasFwUtilInterface = std::make_unique<MockRasFwInterface>();
@@ -552,11 +595,14 @@ struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface;
pFwUtilOriginal = pLinuxSysmanImp->pFwUtilInterface;
pOriginalDrm = pLinuxSysmanImp->pDrm;
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
pLinuxSysmanImp->pFwUtilInterface = pRasFwUtilInterface.get();
pPmuInterface = std::make_unique<MockRasPmuInterfaceImp>(pLinuxSysmanImp);
pLinuxSysmanImp->pPmuInterface = pPmuInterface.get();
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
pLinuxSysmanImp->pDrm = pDrm.get();
pFsAccess->mockReadDirectoryForMultiDevice = true;
@@ -583,6 +629,7 @@ struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
pLinuxSysmanImp->pFwUtilInterface = pFwUtilOriginal;
pLinuxSysmanImp->pDrm = pOriginalDrm;
SysmanMultiDeviceFixture::TearDown();
}
std::vector<zes_ras_handle_t> getRasHandles(uint32_t count) {
@@ -591,7 +638,7 @@ struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
return handles;
}
};
TEST_F(SysmanMultiDeviceFixture, GivenValidSysmanHandleWithMultiDeviceWhenRetrievingRasHandlesThenSuccessIsReturned) {
TEST_F(SysmanRasMultiDeviceFixture, GivenValidSysmanHandleWithMultiDeviceWhenRetrievingRasHandlesThenSuccessIsReturned) {
RasHandleContext *pRasHandleContext = new RasHandleContext(pSysmanDeviceImp->pOsSysman);
uint32_t count = 0;
ze_result_t result = pRasHandleContext->rasGet(&count, nullptr);
@@ -638,7 +685,6 @@ TEST_F(SysmanRasMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasGeStateF
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCountForSubDevice);
uint32_t handleIndex = 0u;
for (auto handle : handles) {
@@ -692,7 +738,6 @@ TEST_F(SysmanRasMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasGeStateF
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
auto handles = getRasHandles(mockHandleCountForSubDevice);
uint32_t handleIndex = 0u;