mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 09:03:14 +08:00
Add check for memory type before calculating ras hbm errors
Related-To: LOCI-3500 Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
bfb59cc573
commit
3816b85fa0
@@ -12,6 +12,7 @@
|
||||
#include "shared/source/helpers/sleep.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
#include "shared/source/os_interface/linux/system_info.h"
|
||||
|
||||
#include "level_zero/core/source/device/device_imp.h"
|
||||
#include "level_zero/core/source/driver/driver_handle_imp.h"
|
||||
@@ -51,6 +52,7 @@ ze_result_t LinuxSysmanImp::init() {
|
||||
|
||||
DEBUG_BREAK_IF(nullptr == pPmuInterface);
|
||||
|
||||
getMemoryType();
|
||||
return createPmtHandles();
|
||||
}
|
||||
|
||||
@@ -472,6 +474,20 @@ ze_result_t LinuxSysmanImp::osColdReset() {
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST; // incase the reset fails inform upper layers.
|
||||
}
|
||||
|
||||
uint32_t LinuxSysmanImp::getMemoryType() {
|
||||
if (isMemTypeRetrieved == false) {
|
||||
auto pDrm = &getDrm();
|
||||
if (pDrm->querySystemInfo()) {
|
||||
auto memSystemInfo = pDrm->getSystemInfo();
|
||||
if (memSystemInfo != nullptr) {
|
||||
memType = memSystemInfo->getMemoryType();
|
||||
isMemTypeRetrieved = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return memType;
|
||||
}
|
||||
|
||||
OsSysman *OsSysman::create(SysmanDeviceImp *pParentSysmanDeviceImp) {
|
||||
LinuxSysmanImp *pLinuxSysmanImp = new LinuxSysmanImp(pParentSysmanDeviceImp);
|
||||
return static_cast<OsSysman *>(pLinuxSysmanImp);
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "level_zero/tools/source/sysman/linux/pmt/pmt.h"
|
||||
#include "level_zero/tools/source/sysman/linux/pmu/pmu_imp.h"
|
||||
#include "level_zero/tools/source/sysman/linux/udev/udev_lib.h"
|
||||
#include "level_zero/tools/source/sysman/sysman_const.h"
|
||||
#include "level_zero/tools/source/sysman/sysman_imp.h"
|
||||
|
||||
#include <linux/pci_regs.h>
|
||||
@@ -58,6 +59,7 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass {
|
||||
ze_device_handle_t getCoreDeviceHandle() override;
|
||||
SysmanDeviceImp *getSysmanDeviceImp();
|
||||
std::string getPciCardBusDirectoryPath(std::string realPciPath);
|
||||
uint32_t getMemoryType();
|
||||
static std::string getPciRootPortDirectoryPath(std::string realPciPath);
|
||||
void releasePmtObject();
|
||||
ze_result_t createPmtHandles();
|
||||
@@ -94,8 +96,10 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass {
|
||||
L0::UdevLib *pUdevLib = nullptr;
|
||||
std::map<uint32_t, L0::PlatformMonitoringTech *> mapOfSubDeviceIdToPmtObject;
|
||||
ze_result_t initLocalDeviceAndDrmHandles();
|
||||
uint32_t memType = unknownMemoryType;
|
||||
|
||||
private:
|
||||
bool isMemTypeRetrieved = false;
|
||||
LinuxSysmanImp() = delete;
|
||||
SysmanDeviceImp *pParentSysmanDeviceImp = nullptr;
|
||||
static const std::string deviceDir;
|
||||
|
||||
@@ -8,17 +8,31 @@
|
||||
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
|
||||
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/os_interface/linux/system_info.h"
|
||||
|
||||
#include "level_zero/tools/source/sysman/linux/os_sysman_imp.h"
|
||||
|
||||
#include "drm/intel_hwconfig_types.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static bool isMemoryTypeHbm(LinuxSysmanImp *pLinuxSysmanImp) {
|
||||
uint32_t memType = pLinuxSysmanImp->getMemoryType();
|
||||
if (memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2e || memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void OsRas::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) {
|
||||
|
||||
constexpr auto maxErrorTypes = 2;
|
||||
LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
|
||||
if (errorType.size() < maxErrorTypes) {
|
||||
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
|
||||
auto pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
|
||||
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,7 +83,9 @@ ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear)
|
||||
|
||||
void LinuxRasImp::initSources() {
|
||||
rasSources.push_back(std::make_unique<L0::LinuxRasSourceGt>(pLinuxSysmanImp, osRasErrorType, isSubdevice, subdeviceId));
|
||||
rasSources.push_back(std::make_unique<L0::LinuxRasSourceHbm>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
|
||||
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
|
||||
rasSources.push_back(std::make_unique<L0::LinuxRasSourceHbm>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
|
||||
}
|
||||
}
|
||||
|
||||
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) {
|
||||
|
||||
@@ -59,4 +59,5 @@ constexpr uint64_t gigaUnitTransferToUnitTransfer = 1000 * 1000 * 1000;
|
||||
|
||||
constexpr int32_t memoryBusWidth = 128; // bus width in bytes
|
||||
constexpr int32_t numMemoryChannels = 8;
|
||||
constexpr uint32_t unknownMemoryType = UINT32_MAX;
|
||||
#define BITS(x, at, width) (((x) >> (at)) & ((1 << (width)) - 1))
|
||||
|
||||
@@ -6,6 +6,10 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/os_interface/linux/drm_neo.h"
|
||||
#include "shared/source/os_interface/linux/ioctl_helper.h"
|
||||
#include "shared/source/os_interface/linux/system_info.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
#include "shared/test/common/test_macros/mock_method_macros.h"
|
||||
|
||||
#include "level_zero/tools/source/sysman/events/events_imp.h"
|
||||
@@ -13,6 +17,9 @@
|
||||
#include "level_zero/tools/source/sysman/firmware_util/firmware_util.h"
|
||||
#include "level_zero/tools/source/sysman/linux/os_sysman_driver_imp.h"
|
||||
|
||||
#include "drm/intel_hwconfig_types.h"
|
||||
|
||||
using namespace NEO;
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
@@ -227,6 +234,40 @@ struct MockEventsFwInterface : public FirmwareUtil {
|
||||
ADDMETHOD_NOBASE_VOIDRETURN(fwGetMemoryHealthIndicator, (zes_mem_health_t * health));
|
||||
};
|
||||
|
||||
struct MockEventNeoDrm : public Drm {
|
||||
using Drm::ioctlHelper;
|
||||
uint32_t mockMemoryType = INTEL_HWCONFIG_MEMORY_TYPE_HBM2e;
|
||||
const int mockFd = 33;
|
||||
std::vector<bool> mockQuerySystemInfoReturnValue{};
|
||||
bool isRepeated = false;
|
||||
bool mockReturnEmptyRegions = false;
|
||||
MockEventNeoDrm(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique<HwDeviceIdDrm>(mockFd, ""), rootDeviceEnvironment) {}
|
||||
|
||||
void setMemoryType(uint32_t memory) {
|
||||
mockMemoryType = memory;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> getMemoryRegionsReturnsEmpty() {
|
||||
return {};
|
||||
}
|
||||
|
||||
bool querySystemInfo() override {
|
||||
bool returnValue = true;
|
||||
if (!mockQuerySystemInfoReturnValue.empty()) {
|
||||
returnValue = mockQuerySystemInfoReturnValue.front();
|
||||
if (isRepeated != true) {
|
||||
mockQuerySystemInfoReturnValue.erase(mockQuerySystemInfoReturnValue.begin());
|
||||
}
|
||||
return returnValue;
|
||||
}
|
||||
|
||||
uint32_t hwBlob[] = {INTEL_HWCONFIG_MAX_MEMORY_CHANNELS, 1, 8, INTEL_HWCONFIG_MEMORY_TYPE, 0, mockMemoryType};
|
||||
std::vector<uint8_t> inputBlobData(reinterpret_cast<uint8_t *>(hwBlob), reinterpret_cast<uint8_t *>(hwBlob) + sizeof(hwBlob));
|
||||
this->systemInfo.reset(new SystemInfo(inputBlobData));
|
||||
return returnValue;
|
||||
}
|
||||
};
|
||||
|
||||
class PublicLinuxEventsImp : public L0::LinuxEventsImp {
|
||||
public:
|
||||
PublicLinuxEventsImp(OsSysman *pOsSysman) : LinuxEventsImp(pOsSysman) {}
|
||||
|
||||
@@ -20,6 +20,8 @@ constexpr int drmDeviceFd = 0;
|
||||
class SysmanEventsFixture : public SysmanDeviceFixture {
|
||||
protected:
|
||||
std::unique_ptr<MockEventsFsAccess> pFsAccess;
|
||||
std::unique_ptr<MockEventNeoDrm> pDrm;
|
||||
Drm *pOriginalDrm = nullptr;
|
||||
FsAccess *pFsAccessOriginal = nullptr;
|
||||
OsEvents *pOsEventsPrev = nullptr;
|
||||
L0::EventsImp *pEventsImp;
|
||||
@@ -38,6 +40,10 @@ class SysmanEventsFixture : public SysmanDeviceFixture {
|
||||
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
|
||||
pFsAccess = std::make_unique<MockEventsFsAccess>();
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
|
||||
pDrm = std::make_unique<MockEventNeoDrm>(const_cast<NEO::RootDeviceEnvironment &>(neoDevice->getRootDeviceEnvironment()));
|
||||
pDrm->ioctlHelper = static_cast<std::unique_ptr<NEO::IoctlHelper>>(std::make_unique<IoctlHelperPrelim20>(*pDrm));
|
||||
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
|
||||
pLinuxSysmanImp->pDrm = pDrm.get();
|
||||
|
||||
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
|
||||
pSysfsAccess = std::make_unique<MockEventsSysfsAccess>();
|
||||
@@ -81,6 +87,7 @@ class SysmanEventsFixture : public SysmanDeviceFixture {
|
||||
pEventsImp = nullptr;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
|
||||
pLinuxSysmanImp->pDrm = pOriginalDrm;
|
||||
|
||||
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
|
||||
SysmanDeviceFixture::TearDown();
|
||||
|
||||
@@ -38,6 +38,7 @@ class SysmanMockDrm : public Drm {
|
||||
class PublicLinuxSysmanImp : public L0::LinuxSysmanImp {
|
||||
public:
|
||||
using LinuxSysmanImp::mapOfSubDeviceIdToPmtObject;
|
||||
using LinuxSysmanImp::memType;
|
||||
using LinuxSysmanImp::pDrm;
|
||||
using LinuxSysmanImp::pFsAccess;
|
||||
using LinuxSysmanImp::pFwUtilInterface;
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
#include "level_zero/tools/source/sysman/ras/ras_imp.h"
|
||||
#include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h"
|
||||
|
||||
#include "drm/intel_hwconfig_types.h"
|
||||
|
||||
namespace NEO {
|
||||
namespace SysCalls {
|
||||
extern bool allowFakeDevicePath;
|
||||
@@ -633,10 +635,11 @@ TEST_F(SysmanDeviceFixture, GivenValidEnumeratedHandlesWhenReleaseIsCalledThenHa
|
||||
|
||||
count = 0;
|
||||
RasImp *pRas = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle());
|
||||
pLinuxSysmanImp->memType = INTEL_HWCONFIG_MEMORY_TYPE_LPDDR4;
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pRas);
|
||||
result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 3u);
|
||||
EXPECT_EQ(count, 1u);
|
||||
|
||||
pLinuxSysmanImp->releaseSysmanDeviceResources();
|
||||
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/os_interface/linux/ioctl_helper.h"
|
||||
#include "shared/source/os_interface/linux/system_info.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h"
|
||||
#include "level_zero/tools/source/sysman/linux/fs_access.h"
|
||||
#include "level_zero/tools/source/sysman/linux/os_sysman_imp.h"
|
||||
@@ -14,6 +17,8 @@
|
||||
#include "level_zero/tools/source/sysman/ras/ras.h"
|
||||
#include "level_zero/tools/source/sysman/ras/ras_imp.h"
|
||||
|
||||
#include "drm/intel_hwconfig_types.h"
|
||||
|
||||
using namespace NEO;
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
@@ -651,6 +656,40 @@ struct MockRasFwInterface : public FirmwareUtil {
|
||||
ADDMETHOD_NOBASE_VOIDRETURN(fwGetMemoryHealthIndicator, (zes_mem_health_t * health));
|
||||
};
|
||||
|
||||
struct MockRasNeoDrm : public Drm {
|
||||
using Drm::ioctlHelper;
|
||||
uint32_t mockMemoryType = INTEL_HWCONFIG_MEMORY_TYPE_HBM2e;
|
||||
const int mockFd = 33;
|
||||
std::vector<bool> mockQuerySystemInfoReturnValue{};
|
||||
bool isRepeated = false;
|
||||
bool mockReturnEmptyRegions = false;
|
||||
MockRasNeoDrm(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique<HwDeviceIdDrm>(mockFd, ""), rootDeviceEnvironment) {}
|
||||
|
||||
void setMemoryType(uint32_t memory) {
|
||||
mockMemoryType = memory;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> getMemoryRegionsReturnsEmpty() {
|
||||
return {};
|
||||
}
|
||||
|
||||
bool querySystemInfo() override {
|
||||
bool returnValue = true;
|
||||
if (!mockQuerySystemInfoReturnValue.empty()) {
|
||||
returnValue = mockQuerySystemInfoReturnValue.front();
|
||||
if (isRepeated != true) {
|
||||
mockQuerySystemInfoReturnValue.erase(mockQuerySystemInfoReturnValue.begin());
|
||||
}
|
||||
return returnValue;
|
||||
}
|
||||
|
||||
uint32_t hwBlob[] = {INTEL_HWCONFIG_MAX_MEMORY_CHANNELS, 1, 8, INTEL_HWCONFIG_MEMORY_TYPE, 0, mockMemoryType};
|
||||
std::vector<uint8_t> inputBlobData(reinterpret_cast<uint8_t *>(hwBlob), reinterpret_cast<uint8_t *>(hwBlob) + sizeof(hwBlob));
|
||||
this->systemInfo.reset(new SystemInfo(inputBlobData));
|
||||
return returnValue;
|
||||
}
|
||||
};
|
||||
|
||||
class PublicLinuxRasImp : public L0::LinuxRasImp {
|
||||
public:
|
||||
PublicLinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : LinuxRasImp(pOsSysman, type, onSubdevice, subdeviceId) {}
|
||||
|
||||
@@ -21,9 +21,11 @@ struct SysmanRasFixture : public SysmanDeviceFixture {
|
||||
std::unique_ptr<MockRasSysfsAccess> pSysfsAccess;
|
||||
std::unique_ptr<MockRasPmuInterfaceImp> pPmuInterface;
|
||||
std::unique_ptr<MockRasFwInterface> pRasFwUtilInterface;
|
||||
std::unique_ptr<MockRasNeoDrm> pDrm;
|
||||
MemoryManager *pMemoryManagerOriginal = nullptr;
|
||||
std::unique_ptr<MockMemoryManagerInRasSysman> pMemoryManager;
|
||||
FsAccess *pFsAccessOriginal = nullptr;
|
||||
Drm *pOriginalDrm = nullptr;
|
||||
SysfsAccess *pSysfsAccessOriginal = nullptr;
|
||||
PmuInterface *pOriginalPmuInterface = nullptr;
|
||||
FirmwareUtil *pFwUtilOriginal = nullptr;
|
||||
@@ -41,15 +43,20 @@ struct SysmanRasFixture : public SysmanDeviceFixture {
|
||||
pFsAccess = std::make_unique<MockRasFsAccess>();
|
||||
pSysfsAccess = std::make_unique<MockRasSysfsAccess>();
|
||||
pRasFwUtilInterface = std::make_unique<MockRasFwInterface>();
|
||||
pDrm = std::make_unique<MockRasNeoDrm>(const_cast<NEO::RootDeviceEnvironment &>(neoDevice->getRootDeviceEnvironment()));
|
||||
pDrm->ioctlHelper = static_cast<std::unique_ptr<NEO::IoctlHelper>>(std::make_unique<IoctlHelperPrelim20>(*pDrm));
|
||||
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
|
||||
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
|
||||
pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface;
|
||||
pFwUtilOriginal = pLinuxSysmanImp->pFwUtilInterface;
|
||||
pOriginalDrm = pLinuxSysmanImp->pDrm;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
|
||||
pLinuxSysmanImp->pFwUtilInterface = pRasFwUtilInterface.get();
|
||||
pPmuInterface = std::make_unique<MockRasPmuInterfaceImp>(pLinuxSysmanImp);
|
||||
pLinuxSysmanImp->pPmuInterface = pPmuInterface.get();
|
||||
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
|
||||
pLinuxSysmanImp->pDrm = pDrm.get();
|
||||
|
||||
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
|
||||
delete handle;
|
||||
@@ -74,6 +81,7 @@ struct SysmanRasFixture : public SysmanDeviceFixture {
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
|
||||
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
|
||||
pLinuxSysmanImp->pFwUtilInterface = pFwUtilOriginal;
|
||||
pLinuxSysmanImp->pDrm = pOriginalDrm;
|
||||
SysmanDeviceFixture::TearDown();
|
||||
}
|
||||
std::vector<zes_ras_handle_t> getRasHandles(uint32_t count) {
|
||||
@@ -145,7 +153,6 @@ TEST_F(SysmanRasFixture, GivenValidOsSysmanPointerWhenRetrievingSupportedRasErro
|
||||
}
|
||||
|
||||
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAreAbsentThenZeroHandlesAreCreated) {
|
||||
|
||||
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
|
||||
|
||||
pLinuxSysmanImp->pFwUtilInterface = nullptr;
|
||||
@@ -153,7 +160,6 @@ TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEven
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
uint32_t count = 0;
|
||||
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
@@ -164,6 +170,50 @@ TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEven
|
||||
EXPECT_EQ(testcount, 0u);
|
||||
}
|
||||
|
||||
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAndHbmAreAbsentThenZeroHandlesAreCreated) {
|
||||
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_LPDDR4);
|
||||
pRasFwUtilInterface->mockMemorySuccess = true;
|
||||
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
|
||||
|
||||
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
uint32_t count = 0;
|
||||
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 0u);
|
||||
}
|
||||
|
||||
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfHbmAndFwInterfaceArePresentThenSuccessIsReturned) {
|
||||
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2);
|
||||
pRasFwUtilInterface->mockMemorySuccess = true;
|
||||
|
||||
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
uint32_t count = 0;
|
||||
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, mockHandleCount);
|
||||
}
|
||||
|
||||
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAreAbsentAndQuerySystemInfoSucceedsButMemSysInfoIsNullThenZeroHandlesAreCreated) {
|
||||
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
|
||||
pDrm->mockQuerySystemInfoReturnValue.push_back(true);
|
||||
|
||||
pLinuxSysmanImp->pFwUtilInterface = nullptr;
|
||||
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
uint32_t count = 0;
|
||||
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 0u);
|
||||
}
|
||||
|
||||
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForGtThenSuccessIsReturned) {
|
||||
|
||||
pPmuInterface->mockPmuReadCorrectable = true;
|
||||
@@ -172,7 +222,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForGtThenSuc
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
bool correctable = true;
|
||||
for (auto handle : handles) {
|
||||
@@ -209,7 +258,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForGtAfterCl
|
||||
}
|
||||
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
bool correctable = true;
|
||||
ze_bool_t clear = 0;
|
||||
@@ -272,7 +320,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForHbmThenSu
|
||||
}
|
||||
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
bool correctable = true;
|
||||
@@ -297,7 +344,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForHbmWithCl
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
bool correctable = true;
|
||||
ze_bool_t clear = 0;
|
||||
@@ -346,7 +392,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterf
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
zes_ras_state_t state = {};
|
||||
@@ -362,7 +407,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterf
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
zes_ras_state_t state = {};
|
||||
@@ -400,7 +444,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesGetRasStateForGtInterf
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
zes_ras_state_t state = {};
|
||||
@@ -417,7 +460,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesGetRasStateAndFirmware
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
zes_ras_state_t state = {};
|
||||
@@ -463,7 +505,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterf
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
zes_ras_state_t state = {};
|
||||
@@ -479,7 +520,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterf
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
zes_ras_state_t state = {};
|
||||
@@ -495,7 +535,6 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterf
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
zes_ras_state_t state = {};
|
||||
@@ -530,10 +569,12 @@ struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
|
||||
MemoryManager *pMemoryManagerOriginal = nullptr;
|
||||
std::unique_ptr<MockMemoryManagerInRasSysman> pMemoryManager;
|
||||
std::unique_ptr<MockRasFwInterface> pRasFwUtilInterface;
|
||||
std::unique_ptr<MockRasNeoDrm> pDrm;
|
||||
FsAccess *pFsAccessOriginal = nullptr;
|
||||
SysfsAccess *pSysfsAccessOriginal = nullptr;
|
||||
PmuInterface *pOriginalPmuInterface = nullptr;
|
||||
FirmwareUtil *pFwUtilOriginal = nullptr;
|
||||
Drm *pOriginalDrm = nullptr;
|
||||
std::vector<ze_device_handle_t> deviceHandles;
|
||||
|
||||
void SetUp() override {
|
||||
@@ -545,6 +586,8 @@ struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
|
||||
pMemoryManager = std::make_unique<MockMemoryManagerInRasSysman>(*neoDevice->getExecutionEnvironment());
|
||||
pMemoryManager->localMemorySupported[0] = true;
|
||||
device->getDriverHandle()->setMemoryManager(pMemoryManager.get());
|
||||
pDrm = std::make_unique<MockRasNeoDrm>(const_cast<NEO::RootDeviceEnvironment &>(neoDevice->getRootDeviceEnvironment()));
|
||||
pDrm->ioctlHelper = static_cast<std::unique_ptr<NEO::IoctlHelper>>(std::make_unique<IoctlHelperPrelim20>(*pDrm));
|
||||
pFsAccess = std::make_unique<MockRasFsAccess>();
|
||||
pSysfsAccess = std::make_unique<MockRasSysfsAccess>();
|
||||
pRasFwUtilInterface = std::make_unique<MockRasFwInterface>();
|
||||
@@ -552,11 +595,14 @@ struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
|
||||
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
|
||||
pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface;
|
||||
pFwUtilOriginal = pLinuxSysmanImp->pFwUtilInterface;
|
||||
pOriginalDrm = pLinuxSysmanImp->pDrm;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
|
||||
pLinuxSysmanImp->pFwUtilInterface = pRasFwUtilInterface.get();
|
||||
pPmuInterface = std::make_unique<MockRasPmuInterfaceImp>(pLinuxSysmanImp);
|
||||
pLinuxSysmanImp->pPmuInterface = pPmuInterface.get();
|
||||
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
|
||||
pLinuxSysmanImp->pDrm = pDrm.get();
|
||||
|
||||
pFsAccess->mockReadDirectoryForMultiDevice = true;
|
||||
|
||||
@@ -583,6 +629,7 @@ struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
|
||||
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
|
||||
pLinuxSysmanImp->pFwUtilInterface = pFwUtilOriginal;
|
||||
pLinuxSysmanImp->pDrm = pOriginalDrm;
|
||||
SysmanMultiDeviceFixture::TearDown();
|
||||
}
|
||||
std::vector<zes_ras_handle_t> getRasHandles(uint32_t count) {
|
||||
@@ -591,7 +638,7 @@ struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
|
||||
return handles;
|
||||
}
|
||||
};
|
||||
TEST_F(SysmanMultiDeviceFixture, GivenValidSysmanHandleWithMultiDeviceWhenRetrievingRasHandlesThenSuccessIsReturned) {
|
||||
TEST_F(SysmanRasMultiDeviceFixture, GivenValidSysmanHandleWithMultiDeviceWhenRetrievingRasHandlesThenSuccessIsReturned) {
|
||||
RasHandleContext *pRasHandleContext = new RasHandleContext(pSysmanDeviceImp->pOsSysman);
|
||||
uint32_t count = 0;
|
||||
ze_result_t result = pRasHandleContext->rasGet(&count, nullptr);
|
||||
@@ -638,7 +685,6 @@ TEST_F(SysmanRasMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasGeStateF
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCountForSubDevice);
|
||||
uint32_t handleIndex = 0u;
|
||||
for (auto handle : handles) {
|
||||
@@ -692,7 +738,6 @@ TEST_F(SysmanRasMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasGeStateF
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
pSysmanDeviceImp->pRasHandleContext->init(deviceHandles);
|
||||
auto handles = getRasHandles(mockHandleCountForSubDevice);
|
||||
uint32_t handleIndex = 0u;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user