Use physical subdeviceId for sysman ras, freq and standby module

Related-To: LOCI-2925, LOCI-2926, LOCI-3236
Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
Mayank Raghuwanshi
2022-11-01 18:49:41 +00:00
committed by Compute-Runtime-Automation
parent aac8754e67
commit ffcca3ba53
8 changed files with 194 additions and 19 deletions

View File

@@ -9,6 +9,8 @@
#include "shared/source/helpers/debug_helpers.h"
#include "level_zero/tools/source/sysman/sysman_imp.h"
#include <cmath>
namespace L0 {
@@ -114,11 +116,11 @@ void FrequencyImp::init() {
}
FrequencyImp::FrequencyImp(OsSysman *pOsSysman, ze_device_handle_t handle, zes_freq_domain_t frequencyDomainNumber) : deviceHandle(handle) {
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
pOsFrequency = OsFrequency::create(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId, frequencyDomainNumber);
uint32_t subdeviceId = 0;
ze_bool_t onSubdevice = false;
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subdeviceId, onSubdevice);
pOsFrequency = OsFrequency::create(pOsSysman, onSubdevice, subdeviceId, frequencyDomainNumber);
UNRECOVERABLE_IF(nullptr == pOsFrequency);
init();
}

View File

@@ -8,6 +8,7 @@
#include "shared/source/device/sub_device.h"
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
#include "level_zero/tools/source/sysman/sysman_imp.h"
#include "sysman/linux/fs_access.h"
#include "sysman/linux/os_sysman_imp.h"
@@ -68,9 +69,9 @@ void LinuxRasSourceFabric::getNodes(std::vector<std::string> &nodes, uint32_t su
ze_result_t LinuxRasSourceFabric::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType,
OsSysman *pOsSysman, ze_device_handle_t deviceHandle) {
LinuxSysmanImp *pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
NEO::Device *neoDevice = static_cast<Device *>(deviceHandle)->getNEODevice();
uint32_t subDeviceIndex = neoDevice->isSubDevice() ? static_cast<NEO::SubDevice *>(neoDevice)->getSubDeviceIndex() : 0;
ze_bool_t onSubDevice = false;
uint32_t subDeviceIndex = 0;
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subDeviceIndex, onSubDevice);
std::vector<std::string> nodes;
getNodes(nodes, subDeviceIndex, &pLinuxSysmanImp->getFsAccess(), ZES_RAS_ERROR_TYPE_UNCORRECTABLE);
if (nodes.size()) {

View File

@@ -6,6 +6,7 @@
*/
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
#include "level_zero/tools/source/sysman/sysman_imp.h"
#include "sysman/linux/os_sysman_imp.h"
@@ -129,10 +130,9 @@ static uint64_t convertHexToUint64(std::string strVal) {
}
static bool getErrorType(std::map<zes_ras_error_cat_t, std::vector<std::string>> categoryToListOfEvents, std::vector<std::string> &eventList, ze_device_handle_t deviceHandle) {
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
bool onSubDevice = deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE;
uint32_t subDeviceId = deviceProperties.subdeviceId;
ze_bool_t onSubDevice = false;
uint32_t subDeviceId = 0;
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subDeviceId, onSubDevice);
// Naming convention of files containing config values for errors
// error--<Name of error> Ex:- error--engine-reset (config file with no subdevice)
// error-gt<N>--<Name of error> Ex:- error-gt0--engine-reset (config file with subdevices)

View File

@@ -9,6 +9,8 @@
#include "shared/source/helpers/string.h"
#include "level_zero/tools/source/sysman/sysman_imp.h"
#include <cstring>
namespace L0 {
@@ -36,9 +38,10 @@ void RasImp::init() {
}
RasImp::RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_device_handle_t handle) : deviceHandle(handle) {
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
pOsRas = OsRas::create(pOsSysman, type, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId);
uint32_t subdeviceId = 0;
ze_bool_t onSubdevice = false;
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subdeviceId, onSubdevice);
pOsRas = OsRas::create(pOsSysman, type, onSubdevice, subdeviceId);
init();
}

View File

@@ -9,6 +9,8 @@
#include "shared/source/helpers/debug_helpers.h"
#include "level_zero/tools/source/sysman/sysman_imp.h"
namespace L0 {
ze_result_t StandbyImp::standbyGetProperties(zes_standby_properties_t *pProperties) {
@@ -30,9 +32,10 @@ void StandbyImp::init() {
}
StandbyImp::StandbyImp(OsSysman *pOsSysman, ze_device_handle_t handle) : deviceHandle(handle) {
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
pOsStandby = OsStandby::create(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId);
uint32_t subdeviceId = 0;
ze_bool_t onSubdevice = false;
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subdeviceId, onSubdevice);
pOsStandby = OsStandby::create(pOsSysman, onSubdevice, subdeviceId);
UNRECOVERABLE_IF(nullptr == pOsStandby);
init();
}

View File

@@ -857,7 +857,7 @@ TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFreq
}
}
TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyPropertiesThenValidSchedPropertiesRetrieved) {
TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyPropertiesThenValidFreqPropertiesRetrieved) {
zes_freq_properties_t properties = {};
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
Device::fromHandle(device)->getProperties(&deviceProperties);
@@ -869,5 +869,70 @@ TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyProp
delete pLinuxFrequencyImp;
}
class FreqMultiDeviceFixture : public SysmanMultiDeviceFixture {
protected:
DebugManagerStateRestore restorer;
std::unique_ptr<Mock<FrequencySysfsAccess>> pSysfsAccess;
SysfsAccess *pSysfsAccessOld = nullptr;
std::vector<ze_device_handle_t> deviceHandles;
void SetUp() override {
if (!sysmanUltsEnable) {
GTEST_SKIP();
}
NEO::DebugManager.flags.ZE_AFFINITY_MASK.set("0.1");
SysmanMultiDeviceFixture::SetUp();
pSysfsAccessOld = pLinuxSysmanImp->pSysfsAccess;
pSysfsAccess = std::make_unique<NiceMock<Mock<FrequencySysfsAccess>>>();
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
// delete handles created in initial SysmanDeviceHandleContext::init() call
for (auto handle : pSysmanDeviceImp->pFrequencyHandleContext->handleList) {
delete handle;
}
pSysmanDeviceImp->pFrequencyHandleContext->handleList.clear();
uint32_t subDeviceCount = 0;
// We received a device handle. Check for subdevices in this device
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr);
if (subDeviceCount == 0) {
deviceHandles.resize(1, device->toHandle());
} else {
deviceHandles.resize(subDeviceCount, nullptr);
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data());
}
getFreqHandles(0);
}
void TearDown() override {
if (!sysmanUltsEnable) {
GTEST_SKIP();
}
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOld;
SysmanMultiDeviceFixture::TearDown();
}
std::vector<zes_freq_handle_t> getFreqHandles(uint32_t count) {
std::vector<zes_freq_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
return handles;
}
};
TEST_F(FreqMultiDeviceFixture, GivenAffinityMaskIsSetWhenCallingFrequencyPropertiesThenPropertiesAreReturnedForTheSubDevicesAccordingToAffinityMask) {
uint32_t count = 0U;
EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumFrequencyDomains(device->toHandle(), &count, nullptr));
EXPECT_EQ(count, handleComponentCount);
auto handles = getFreqHandles(handleComponentCount);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
zes_freq_properties_t properties;
EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetProperties(handle, &properties));
EXPECT_EQ(nullptr, properties.pNext);
EXPECT_EQ(ZES_FREQ_DOMAIN_GPU, properties.type);
EXPECT_TRUE(properties.onSubdevice);
EXPECT_EQ(1u, properties.subdeviceId); //Affinity mask 0.1 is set which means only subdevice 1 is exposed
}
}
} // namespace ult
} // namespace L0

View File

@@ -798,7 +798,7 @@ TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFreq
}
}
TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyPropertiesThenValidSchedPropertiesRetrieved) {
TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyPropertiesThenValidFreqPropertiesRetrieved) {
zes_freq_properties_t properties = {};
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
Device::fromHandle(device)->getProperties(&deviceProperties);
@@ -810,5 +810,70 @@ TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyProp
delete pLinuxFrequencyImp;
}
class FreqMultiDeviceFixture : public SysmanMultiDeviceFixture {
protected:
DebugManagerStateRestore restorer;
std::unique_ptr<Mock<FrequencySysfsAccess>> pSysfsAccess;
SysfsAccess *pSysfsAccessOld = nullptr;
std::vector<ze_device_handle_t> deviceHandles;
void SetUp() override {
if (!sysmanUltsEnable) {
GTEST_SKIP();
}
NEO::DebugManager.flags.ZE_AFFINITY_MASK.set("0.1");
SysmanMultiDeviceFixture::SetUp();
pSysfsAccessOld = pLinuxSysmanImp->pSysfsAccess;
pSysfsAccess = std::make_unique<NiceMock<Mock<FrequencySysfsAccess>>>();
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
// delete handles created in initial SysmanDeviceHandleContext::init() call
for (auto handle : pSysmanDeviceImp->pFrequencyHandleContext->handleList) {
delete handle;
}
pSysmanDeviceImp->pFrequencyHandleContext->handleList.clear();
uint32_t subDeviceCount = 0;
// We received a device handle. Check for subdevices in this device
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr);
if (subDeviceCount == 0) {
deviceHandles.resize(1, device->toHandle());
} else {
deviceHandles.resize(subDeviceCount, nullptr);
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data());
}
getFreqHandles(0);
}
void TearDown() override {
if (!sysmanUltsEnable) {
GTEST_SKIP();
}
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOld;
SysmanMultiDeviceFixture::TearDown();
}
std::vector<zes_freq_handle_t> getFreqHandles(uint32_t count) {
std::vector<zes_freq_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
return handles;
}
};
TEST_F(FreqMultiDeviceFixture, GivenAffinityMaskIsSetWhenCallingFrequencyPropertiesThenAreReturnedForTheSubDevicesAccordingToAffinityMask) {
uint32_t count = 0U;
EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumFrequencyDomains(device->toHandle(), &count, nullptr));
EXPECT_EQ(count, handleComponentCount);
auto handles = getFreqHandles(handleComponentCount);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
zes_freq_properties_t properties;
EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetProperties(handle, &properties));
EXPECT_EQ(nullptr, properties.pNext);
EXPECT_EQ(ZES_FREQ_DOMAIN_GPU, properties.type);
EXPECT_TRUE(properties.onSubdevice);
EXPECT_EQ(1u, properties.subdeviceId); //Affinity mask 0.1 is set which means only subdevice 1 is exposed
}
}
} // namespace ult
} // namespace L0

View File

@@ -384,5 +384,41 @@ TEST_F(ZesStandbyMultiDeviceFixture, GivenOnSubdeviceNotSetWhenValidatingosStand
delete pLinuxStandbyImp;
}
class StandbyAffinityMaskFixture : public ZesStandbyMultiDeviceFixture {
void SetUp() override {
if (!sysmanUltsEnable) {
GTEST_SKIP();
}
NEO::DebugManager.flags.ZE_AFFINITY_MASK.set("0.1");
ZesStandbyMultiDeviceFixture::SetUp();
}
void TearDown() override {
if (!sysmanUltsEnable) {
GTEST_SKIP();
}
ZesStandbyMultiDeviceFixture::TearDown();
}
DebugManagerStateRestore restorer;
};
TEST_F(StandbyAffinityMaskFixture, GivenAffinityMaskIsSetWhenCallingStandbyPropertiesThenProertiesAreReturnedForTheSubDevicesAccordingToAffinityMask) {
uint32_t count = 0;
ze_result_t result = zesDeviceEnumStandbyDomains(device, &count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
zes_standby_properties_t properties = {};
auto handles = getStandbyHandles(mockHandleCount);
for (auto hSysmanStandby : handles) {
EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetProperties(hSysmanStandby, &properties));
EXPECT_EQ(nullptr, properties.pNext);
EXPECT_EQ(ZES_STANDBY_TYPE_GLOBAL, properties.type);
EXPECT_TRUE(properties.onSubdevice);
EXPECT_EQ(1u, properties.subdeviceId); //Affinity mask 0.1 is set which means only subdevice 1 is exposed
}
}
} // namespace ult
} // namespace L0