mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Use physical subdeviceId for sysman ras, freq and standby module
Related-To: LOCI-2925, LOCI-2926, LOCI-3236 Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
aac8754e67
commit
ffcca3ba53
@@ -9,6 +9,8 @@
|
||||
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
|
||||
#include "level_zero/tools/source/sysman/sysman_imp.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace L0 {
|
||||
@@ -114,11 +116,11 @@ void FrequencyImp::init() {
|
||||
}
|
||||
|
||||
FrequencyImp::FrequencyImp(OsSysman *pOsSysman, ze_device_handle_t handle, zes_freq_domain_t frequencyDomainNumber) : deviceHandle(handle) {
|
||||
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
|
||||
pOsFrequency = OsFrequency::create(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId, frequencyDomainNumber);
|
||||
uint32_t subdeviceId = 0;
|
||||
ze_bool_t onSubdevice = false;
|
||||
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subdeviceId, onSubdevice);
|
||||
pOsFrequency = OsFrequency::create(pOsSysman, onSubdevice, subdeviceId, frequencyDomainNumber);
|
||||
UNRECOVERABLE_IF(nullptr == pOsFrequency);
|
||||
|
||||
init();
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "shared/source/device/sub_device.h"
|
||||
|
||||
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
|
||||
#include "level_zero/tools/source/sysman/sysman_imp.h"
|
||||
|
||||
#include "sysman/linux/fs_access.h"
|
||||
#include "sysman/linux/os_sysman_imp.h"
|
||||
@@ -68,9 +69,9 @@ void LinuxRasSourceFabric::getNodes(std::vector<std::string> &nodes, uint32_t su
|
||||
ze_result_t LinuxRasSourceFabric::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType,
|
||||
OsSysman *pOsSysman, ze_device_handle_t deviceHandle) {
|
||||
LinuxSysmanImp *pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
NEO::Device *neoDevice = static_cast<Device *>(deviceHandle)->getNEODevice();
|
||||
uint32_t subDeviceIndex = neoDevice->isSubDevice() ? static_cast<NEO::SubDevice *>(neoDevice)->getSubDeviceIndex() : 0;
|
||||
|
||||
ze_bool_t onSubDevice = false;
|
||||
uint32_t subDeviceIndex = 0;
|
||||
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subDeviceIndex, onSubDevice);
|
||||
std::vector<std::string> nodes;
|
||||
getNodes(nodes, subDeviceIndex, &pLinuxSysmanImp->getFsAccess(), ZES_RAS_ERROR_TYPE_UNCORRECTABLE);
|
||||
if (nodes.size()) {
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
|
||||
#include "level_zero/tools/source/sysman/sysman_imp.h"
|
||||
|
||||
#include "sysman/linux/os_sysman_imp.h"
|
||||
|
||||
@@ -129,10 +130,9 @@ static uint64_t convertHexToUint64(std::string strVal) {
|
||||
}
|
||||
|
||||
static bool getErrorType(std::map<zes_ras_error_cat_t, std::vector<std::string>> categoryToListOfEvents, std::vector<std::string> &eventList, ze_device_handle_t deviceHandle) {
|
||||
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
|
||||
bool onSubDevice = deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE;
|
||||
uint32_t subDeviceId = deviceProperties.subdeviceId;
|
||||
ze_bool_t onSubDevice = false;
|
||||
uint32_t subDeviceId = 0;
|
||||
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subDeviceId, onSubDevice);
|
||||
// Naming convention of files containing config values for errors
|
||||
// error--<Name of error> Ex:- error--engine-reset (config file with no subdevice)
|
||||
// error-gt<N>--<Name of error> Ex:- error-gt0--engine-reset (config file with subdevices)
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
|
||||
#include "shared/source/helpers/string.h"
|
||||
|
||||
#include "level_zero/tools/source/sysman/sysman_imp.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace L0 {
|
||||
@@ -36,9 +38,10 @@ void RasImp::init() {
|
||||
}
|
||||
|
||||
RasImp::RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_device_handle_t handle) : deviceHandle(handle) {
|
||||
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
|
||||
pOsRas = OsRas::create(pOsSysman, type, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId);
|
||||
uint32_t subdeviceId = 0;
|
||||
ze_bool_t onSubdevice = false;
|
||||
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subdeviceId, onSubdevice);
|
||||
pOsRas = OsRas::create(pOsSysman, type, onSubdevice, subdeviceId);
|
||||
init();
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
|
||||
#include "level_zero/tools/source/sysman/sysman_imp.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
ze_result_t StandbyImp::standbyGetProperties(zes_standby_properties_t *pProperties) {
|
||||
@@ -30,9 +32,10 @@ void StandbyImp::init() {
|
||||
}
|
||||
|
||||
StandbyImp::StandbyImp(OsSysman *pOsSysman, ze_device_handle_t handle) : deviceHandle(handle) {
|
||||
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
|
||||
pOsStandby = OsStandby::create(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId);
|
||||
uint32_t subdeviceId = 0;
|
||||
ze_bool_t onSubdevice = false;
|
||||
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subdeviceId, onSubdevice);
|
||||
pOsStandby = OsStandby::create(pOsSysman, onSubdevice, subdeviceId);
|
||||
UNRECOVERABLE_IF(nullptr == pOsStandby);
|
||||
init();
|
||||
}
|
||||
|
||||
@@ -857,7 +857,7 @@ TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFreq
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyPropertiesThenValidSchedPropertiesRetrieved) {
|
||||
TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyPropertiesThenValidFreqPropertiesRetrieved) {
|
||||
zes_freq_properties_t properties = {};
|
||||
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
Device::fromHandle(device)->getProperties(&deviceProperties);
|
||||
@@ -869,5 +869,70 @@ TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyProp
|
||||
delete pLinuxFrequencyImp;
|
||||
}
|
||||
|
||||
class FreqMultiDeviceFixture : public SysmanMultiDeviceFixture {
|
||||
protected:
|
||||
DebugManagerStateRestore restorer;
|
||||
std::unique_ptr<Mock<FrequencySysfsAccess>> pSysfsAccess;
|
||||
SysfsAccess *pSysfsAccessOld = nullptr;
|
||||
std::vector<ze_device_handle_t> deviceHandles;
|
||||
|
||||
void SetUp() override {
|
||||
if (!sysmanUltsEnable) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
NEO::DebugManager.flags.ZE_AFFINITY_MASK.set("0.1");
|
||||
SysmanMultiDeviceFixture::SetUp();
|
||||
pSysfsAccessOld = pLinuxSysmanImp->pSysfsAccess;
|
||||
pSysfsAccess = std::make_unique<NiceMock<Mock<FrequencySysfsAccess>>>();
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
|
||||
// delete handles created in initial SysmanDeviceHandleContext::init() call
|
||||
for (auto handle : pSysmanDeviceImp->pFrequencyHandleContext->handleList) {
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pFrequencyHandleContext->handleList.clear();
|
||||
uint32_t subDeviceCount = 0;
|
||||
// We received a device handle. Check for subdevices in this device
|
||||
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr);
|
||||
if (subDeviceCount == 0) {
|
||||
deviceHandles.resize(1, device->toHandle());
|
||||
} else {
|
||||
deviceHandles.resize(subDeviceCount, nullptr);
|
||||
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data());
|
||||
}
|
||||
getFreqHandles(0);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (!sysmanUltsEnable) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOld;
|
||||
SysmanMultiDeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
std::vector<zes_freq_handle_t> getFreqHandles(uint32_t count) {
|
||||
std::vector<zes_freq_handle_t> handles(count, nullptr);
|
||||
EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
|
||||
return handles;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(FreqMultiDeviceFixture, GivenAffinityMaskIsSetWhenCallingFrequencyPropertiesThenPropertiesAreReturnedForTheSubDevicesAccordingToAffinityMask) {
|
||||
uint32_t count = 0U;
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumFrequencyDomains(device->toHandle(), &count, nullptr));
|
||||
EXPECT_EQ(count, handleComponentCount);
|
||||
auto handles = getFreqHandles(handleComponentCount);
|
||||
for (auto handle : handles) {
|
||||
EXPECT_NE(handle, nullptr);
|
||||
zes_freq_properties_t properties;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetProperties(handle, &properties));
|
||||
EXPECT_EQ(nullptr, properties.pNext);
|
||||
EXPECT_EQ(ZES_FREQ_DOMAIN_GPU, properties.type);
|
||||
EXPECT_TRUE(properties.onSubdevice);
|
||||
EXPECT_EQ(1u, properties.subdeviceId); //Affinity mask 0.1 is set which means only subdevice 1 is exposed
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -798,7 +798,7 @@ TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFreq
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyPropertiesThenValidSchedPropertiesRetrieved) {
|
||||
TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyPropertiesThenValidFreqPropertiesRetrieved) {
|
||||
zes_freq_properties_t properties = {};
|
||||
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
Device::fromHandle(device)->getProperties(&deviceProperties);
|
||||
@@ -810,5 +810,70 @@ TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyProp
|
||||
delete pLinuxFrequencyImp;
|
||||
}
|
||||
|
||||
class FreqMultiDeviceFixture : public SysmanMultiDeviceFixture {
|
||||
protected:
|
||||
DebugManagerStateRestore restorer;
|
||||
std::unique_ptr<Mock<FrequencySysfsAccess>> pSysfsAccess;
|
||||
SysfsAccess *pSysfsAccessOld = nullptr;
|
||||
std::vector<ze_device_handle_t> deviceHandles;
|
||||
|
||||
void SetUp() override {
|
||||
if (!sysmanUltsEnable) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
NEO::DebugManager.flags.ZE_AFFINITY_MASK.set("0.1");
|
||||
SysmanMultiDeviceFixture::SetUp();
|
||||
pSysfsAccessOld = pLinuxSysmanImp->pSysfsAccess;
|
||||
pSysfsAccess = std::make_unique<NiceMock<Mock<FrequencySysfsAccess>>>();
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
|
||||
// delete handles created in initial SysmanDeviceHandleContext::init() call
|
||||
for (auto handle : pSysmanDeviceImp->pFrequencyHandleContext->handleList) {
|
||||
delete handle;
|
||||
}
|
||||
pSysmanDeviceImp->pFrequencyHandleContext->handleList.clear();
|
||||
uint32_t subDeviceCount = 0;
|
||||
// We received a device handle. Check for subdevices in this device
|
||||
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr);
|
||||
if (subDeviceCount == 0) {
|
||||
deviceHandles.resize(1, device->toHandle());
|
||||
} else {
|
||||
deviceHandles.resize(subDeviceCount, nullptr);
|
||||
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data());
|
||||
}
|
||||
getFreqHandles(0);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (!sysmanUltsEnable) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOld;
|
||||
SysmanMultiDeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
std::vector<zes_freq_handle_t> getFreqHandles(uint32_t count) {
|
||||
std::vector<zes_freq_handle_t> handles(count, nullptr);
|
||||
EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
|
||||
return handles;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(FreqMultiDeviceFixture, GivenAffinityMaskIsSetWhenCallingFrequencyPropertiesThenAreReturnedForTheSubDevicesAccordingToAffinityMask) {
|
||||
uint32_t count = 0U;
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumFrequencyDomains(device->toHandle(), &count, nullptr));
|
||||
EXPECT_EQ(count, handleComponentCount);
|
||||
auto handles = getFreqHandles(handleComponentCount);
|
||||
for (auto handle : handles) {
|
||||
EXPECT_NE(handle, nullptr);
|
||||
zes_freq_properties_t properties;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetProperties(handle, &properties));
|
||||
EXPECT_EQ(nullptr, properties.pNext);
|
||||
EXPECT_EQ(ZES_FREQ_DOMAIN_GPU, properties.type);
|
||||
EXPECT_TRUE(properties.onSubdevice);
|
||||
EXPECT_EQ(1u, properties.subdeviceId); //Affinity mask 0.1 is set which means only subdevice 1 is exposed
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -384,5 +384,41 @@ TEST_F(ZesStandbyMultiDeviceFixture, GivenOnSubdeviceNotSetWhenValidatingosStand
|
||||
delete pLinuxStandbyImp;
|
||||
}
|
||||
|
||||
class StandbyAffinityMaskFixture : public ZesStandbyMultiDeviceFixture {
|
||||
void SetUp() override {
|
||||
if (!sysmanUltsEnable) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
NEO::DebugManager.flags.ZE_AFFINITY_MASK.set("0.1");
|
||||
ZesStandbyMultiDeviceFixture::SetUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (!sysmanUltsEnable) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
ZesStandbyMultiDeviceFixture::TearDown();
|
||||
}
|
||||
DebugManagerStateRestore restorer;
|
||||
};
|
||||
|
||||
TEST_F(StandbyAffinityMaskFixture, GivenAffinityMaskIsSetWhenCallingStandbyPropertiesThenProertiesAreReturnedForTheSubDevicesAccordingToAffinityMask) {
|
||||
uint32_t count = 0;
|
||||
|
||||
ze_result_t result = zesDeviceEnumStandbyDomains(device, &count, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, mockHandleCount);
|
||||
zes_standby_properties_t properties = {};
|
||||
auto handles = getStandbyHandles(mockHandleCount);
|
||||
|
||||
for (auto hSysmanStandby : handles) {
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetProperties(hSysmanStandby, &properties));
|
||||
EXPECT_EQ(nullptr, properties.pNext);
|
||||
EXPECT_EQ(ZES_STANDBY_TYPE_GLOBAL, properties.type);
|
||||
EXPECT_TRUE(properties.onSubdevice);
|
||||
EXPECT_EQ(1u, properties.subdeviceId); //Affinity mask 0.1 is set which means only subdevice 1 is exposed
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user