feature: Affinity mask plus ReturnSubDevicesAsApiDevices

When using ReturnSubDevicesAsApiDevices=1 to have
sub-devices-as-root-devices, then the driver should read the values
passed in the mask as those corresponding to the physical
sub-devices.

For instance, in a dual system with multi-tile device, we would have:

card 0, tile 0
card 0, tile 1
card 1, tile 0
card 1, tile 1

With:
ReturnSubDevicesAsApiDevices=0
ZE_AFFINITY_MASK=0,1

Then all tiles in card 0 and card 1 need to be exposed.

With:
ReturnSubDevicesAsApiDevices=1
ZE_AFFINITY_MASK=0,3

Then card 0 tile 0, and card 1 tile 1 need to be exposed.

Related-To: NEO-7137

Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
Jaime Arteaga 2023-02-02 02:54:47 +00:00 committed by Compute-Runtime-Automation
parent 275bed2a31
commit 07598fb5e0
5 changed files with 190 additions and 20 deletions

View File

@ -4257,7 +4257,10 @@ TEST(DeviceReturnSubDevicesAsApiDevicesTest, GivenReturnSubDevicesAsApiDevicesIs
uint32_t count = 0;
std::vector<ze_device_handle_t> hDevices;
EXPECT_EQ(multiDeviceFixture.driverHandle->getDevice(&count, nullptr), ZE_RESULT_SUCCESS);
EXPECT_EQ(count, 5u);
// mask is "0,1.1,2", but with ReturnSubDevicesAsApiDevices 1.1
// is not valid, so expected count is 2.
EXPECT_EQ(count, 2u);
hDevices.resize(count);
EXPECT_EQ(multiDeviceFixture.driverHandle->getDevice(&count, hDevices.data()), ZE_RESULT_SUCCESS);

View File

@ -230,41 +230,106 @@ class FabricVertexSubdeviceAsDeviceTestFixture : public MultiDeviceFixture,
};
TEST_F(FabricVertexSubdeviceAsDeviceTestFixture, GivenReturnSubDevicesAsApiDevicesIsSetWhenFabricVerticesGetExpIsCalledCorrectVerticesAreReturned) {
uint32_t count = 0;
std::vector<ze_fabric_vertex_handle_t> phVertices;
EXPECT_EQ(driverHandle->fabricVertexGetExp(&count, nullptr), ZE_RESULT_SUCCESS);
EXPECT_EQ(count, 5u);
// only 2 vertexes for mask "0,1.1,2":
// 0 and 2
// 1.1 is ignored with ReturnSubDevicesAsApiDevices
uint32_t expectedVertexes = 2u;
EXPECT_EQ(count, expectedVertexes);
// Requesting for a reduced count
count -= 1;
phVertices.resize(count);
EXPECT_EQ(driverHandle->fabricVertexGetExp(&count, phVertices.data()), ZE_RESULT_SUCCESS);
ze_device_handle_t hDevice{};
// 0.0
// Device 0 associated with value 0 in mask
EXPECT_EQ(L0::zeFabricVertexGetDeviceExp(phVertices[0], &hDevice), ZE_RESULT_SUCCESS);
DeviceImp *deviceImp = static_cast<DeviceImp *>(hDevice);
EXPECT_TRUE(deviceImp->isSubdevice);
EXPECT_EQ(deviceImp->getPhysicalSubDeviceId(), 0u);
EXPECT_FALSE(deviceImp->isSubdevice);
// 0.1
// Device 1 associated with value 2 in mask
EXPECT_EQ(L0::zeFabricVertexGetDeviceExp(phVertices[1], &hDevice), ZE_RESULT_SUCCESS);
deviceImp = static_cast<DeviceImp *>(hDevice);
EXPECT_TRUE(deviceImp->isSubdevice);
EXPECT_EQ(deviceImp->getPhysicalSubDeviceId(), 1u);
EXPECT_FALSE(deviceImp->isSubdevice);
}
// 1.1
EXPECT_EQ(L0::zeFabricVertexGetDeviceExp(phVertices[2], &hDevice), ZE_RESULT_SUCCESS);
class FabricVertexTestFixture : public MultiDeviceFixture,
public ::testing::Test {
void SetUp() override {
NEO::DebugManager.flags.ZE_AFFINITY_MASK.set("0,1.1,2");
NEO::DebugManager.flags.ReturnSubDevicesAsApiDevices.set(0);
MultiDeviceFixture::setUp();
}
void TearDown() override {
MultiDeviceFixture::tearDown();
}
DebugManagerStateRestore restorer;
};
TEST_F(FabricVertexTestFixture, GivenReturnSubDevicesAsApiDevicesIsSetToFalseWhenFabricVerticesGetExpIsCalledCorrectVerticesAreReturned) {
uint32_t count = 0;
std::vector<ze_fabric_vertex_handle_t> phVertices;
NEO::DebugManager.flags.ReturnSubDevicesAsApiDevices.set(0);
EXPECT_EQ(driverHandle->fabricVertexGetExp(&count, nullptr), ZE_RESULT_SUCCESS);
uint32_t expectedVertexes = 3u;
EXPECT_EQ(count, expectedVertexes);
phVertices.resize(count);
EXPECT_EQ(driverHandle->fabricVertexGetExp(&count, phVertices.data()), ZE_RESULT_SUCCESS);
// Device 0 associated with value 0 in mask
ze_device_handle_t hDevice{};
EXPECT_EQ(L0::zeFabricVertexGetDeviceExp(phVertices[0], &hDevice), ZE_RESULT_SUCCESS);
DeviceImp *deviceImp = static_cast<DeviceImp *>(hDevice);
EXPECT_FALSE(deviceImp->isSubdevice);
uint32_t countSubDevices = 0;
EXPECT_EQ(L0::zeFabricVertexGetSubVerticesExp(phVertices[0], &countSubDevices, nullptr), ZE_RESULT_SUCCESS);
EXPECT_EQ(countSubDevices, numSubDevices);
std::vector<ze_fabric_vertex_handle_t> phSubvertices(countSubDevices);
EXPECT_EQ(L0::zeFabricVertexGetSubVerticesExp(phVertices[0], &countSubDevices, phSubvertices.data()), ZE_RESULT_SUCCESS);
for (auto subVertex : phSubvertices) {
ze_device_handle_t hSubDevice{};
EXPECT_EQ(L0::zeFabricVertexGetDeviceExp(subVertex, &hSubDevice), ZE_RESULT_SUCCESS);
DeviceImp *subDeviceImp = static_cast<DeviceImp *>(hSubDevice);
EXPECT_TRUE(subDeviceImp->isSubdevice);
}
// Device 1 associated with value 1.1 in mask
EXPECT_EQ(L0::zeFabricVertexGetDeviceExp(phVertices[1], &hDevice), ZE_RESULT_SUCCESS);
deviceImp = static_cast<DeviceImp *>(hDevice);
EXPECT_FALSE(deviceImp->isSubdevice);
EXPECT_EQ(deviceImp->getPhysicalSubDeviceId(), 1u);
// 2.0
EXPECT_EQ(L0::zeFabricVertexGetDeviceExp(phVertices[3], &hDevice), ZE_RESULT_SUCCESS);
phSubvertices.clear();
countSubDevices = 0;
EXPECT_EQ(L0::zeFabricVertexGetSubVerticesExp(phVertices[1], &countSubDevices, nullptr), ZE_RESULT_SUCCESS);
EXPECT_EQ(countSubDevices, 0u);
// Device 2 associated with value 2 in mask
EXPECT_EQ(L0::zeFabricVertexGetDeviceExp(phVertices[1], &hDevice), ZE_RESULT_SUCCESS);
deviceImp = static_cast<DeviceImp *>(hDevice);
EXPECT_TRUE(deviceImp->isSubdevice);
EXPECT_EQ(deviceImp->getPhysicalSubDeviceId(), 0u);
EXPECT_FALSE(deviceImp->isSubdevice);
phSubvertices.clear();
countSubDevices = 0;
EXPECT_EQ(L0::zeFabricVertexGetSubVerticesExp(phVertices[2], &countSubDevices, nullptr), ZE_RESULT_SUCCESS);
EXPECT_EQ(countSubDevices, numSubDevices);
}
TEST_F(FabricVertexTestFixture, GivenReturnSubDevicesAsApiDevicesIsSetToTrueWhenFabricVerticesGetExpIsCalledCorrectVerticesAreReturned) {
uint32_t count = 0;
std::vector<ze_fabric_vertex_handle_t> phVertices;
NEO::DebugManager.flags.ReturnSubDevicesAsApiDevices.set(1);
EXPECT_EQ(driverHandle->fabricVertexGetExp(&count, nullptr), ZE_RESULT_SUCCESS);
uint32_t expectedVertexes = 5u;
EXPECT_EQ(count, expectedVertexes);
phVertices.resize(count);
EXPECT_EQ(driverHandle->fabricVertexGetExp(&count, phVertices.data()), ZE_RESULT_SUCCESS);
}
using FabricEdgeFixture = Test<MultiDeviceFixture>;

View File

@ -147,7 +147,33 @@ void ExecutionEnvironment::parseAffinityMask() {
return;
}
const uint32_t numRootDevices = static_cast<uint32_t>(rootDeviceEnvironments.size());
bool exposeSubDevicesAsApiDevices = false;
if (NEO::DebugManager.flags.ReturnSubDevicesAsApiDevices.get() != -1) {
exposeSubDevicesAsApiDevices = NEO::DebugManager.flags.ReturnSubDevicesAsApiDevices.get();
}
uint32_t numRootDevices = static_cast<uint32_t>(rootDeviceEnvironments.size());
RootDeviceIndicesMap mapOfIndexes;
// Reserve at least for a size equal to rootDeviceEnvironments.size() times four,
// which is enough for typical configurations
size_t reservedSizeForIndices = numRootDevices * 4;
mapOfIndexes.reserve(reservedSizeForIndices);
if (exposeSubDevicesAsApiDevices) {
uint32_t currentDeviceIndex = 0;
for (uint32_t currentRootDevice = 0u; currentRootDevice < static_cast<uint32_t>(rootDeviceEnvironments.size()); currentRootDevice++) {
auto hwInfo = rootDeviceEnvironments[currentRootDevice]->getHardwareInfo();
auto subDevicesCount = GfxCoreHelper::getSubDevicesCount(hwInfo);
uint32_t currentSubDevice = 0;
mapOfIndexes[currentDeviceIndex++] = std::make_tuple(currentRootDevice, currentSubDevice);
for (currentSubDevice = 1; currentSubDevice < subDevicesCount; currentSubDevice++) {
mapOfIndexes[currentDeviceIndex++] = std::make_tuple(currentRootDevice, currentSubDevice);
}
}
numRootDevices = currentDeviceIndex;
UNRECOVERABLE_IF(numRootDevices > reservedSizeForIndices);
}
std::vector<AffinityMaskHelper> affinityMaskHelper(numRootDevices);
@ -157,6 +183,27 @@ void ExecutionEnvironment::parseAffinityMask() {
auto subEntries = StringHelpers::split(entry, ".");
uint32_t rootDeviceIndex = StringHelpers::toUint32t(subEntries[0]);
// tiles as devices
if (exposeSubDevicesAsApiDevices) {
if (rootDeviceIndex > numRootDevices) {
continue;
}
// ReturnSubDevicesAsApiDevices not supported with AllowSingleTileEngineInstancedSubDevices
// so ignore X.Y
if (subEntries.size() > 1) {
continue;
}
std::tuple<uint32_t, uint32_t> indexKey = mapOfIndexes[rootDeviceIndex];
auto deviceIndex = std::get<0>(indexKey);
auto tileIndex = std::get<1>(indexKey);
affinityMaskHelper[deviceIndex].enableGenericSubDevice(tileIndex);
continue;
}
// cards as devices
if (rootDeviceIndex < numRootDevices) {
auto hwInfo = rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo();
auto subDevicesCount = GfxCoreHelper::getSubDevicesCount(hwInfo);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -12,6 +12,7 @@
#include <algorithm>
#include <cstdint>
#include <limits>
#include <tuple>
#include <vector>
template <size_t OnStackCapacity>
@ -479,4 +480,6 @@ bool operator!=(const StackVec<T, LhsStackCaps> &lhs,
return false == (lhs == rhs);
}
using RootDeviceIndicesContainer = StackVec<uint32_t, 16>;
constexpr size_t MaxRootDeviceIndices = 16;
using RootDeviceIndicesContainer = StackVec<uint32_t, MaxRootDeviceIndices>;
using RootDeviceIndicesMap = StackVec<std::tuple<uint32_t, uint32_t>, MaxRootDeviceIndices>;

View File

@ -626,6 +626,58 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenZexNumberOfCssAndZeAffinityMaskSe
}
}
HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenZeAffinityMaskSetAndTilesAsDevicesModelThenThenProperNumberDevicesIsExposed) {
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
DebugManagerStateRestore restorer;
uint32_t numRootDevices = 4;
uint32_t numSubDevices = 4;
DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices);
DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices);
uint32_t expectedRootDevices = 4;
DebugManager.flags.ZE_AFFINITY_MASK.set("0,3,4,1.1,9,15,25");
DebugManager.flags.SetCommandStreamReceiver.set(1);
DebugManager.flags.ReturnSubDevicesAsApiDevices.set(1);
auto hwInfo = *defaultHwInfo;
MockExecutionEnvironment executionEnvironment(&hwInfo, false, numRootDevices);
executionEnvironment.incRefInternal();
auto devices = DeviceFactory::createDevices(executionEnvironment);
EXPECT_EQ(devices.size(), expectedRootDevices);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenZeAffinityMaskSetAndRootDevicesAsDevicesModelThenThenProperNumberRootDevicesIsExposed) {
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
DebugManagerStateRestore restorer;
uint32_t numRootDevices = 4;
uint32_t numSubDevices = 4;
DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices);
DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices);
uint32_t expectedRootDevices = 3;
DebugManager.flags.ZE_AFFINITY_MASK.set("0,3,1.1,15,25");
DebugManager.flags.SetCommandStreamReceiver.set(1);
DebugManager.flags.ReturnSubDevicesAsApiDevices.set(0);
auto hwInfo = *defaultHwInfo;
MockExecutionEnvironment executionEnvironment(&hwInfo, false, numRootDevices);
executionEnvironment.incRefInternal();
auto devices = DeviceFactory::createDevices(executionEnvironment);
EXPECT_EQ(devices.size(), expectedRootDevices);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenZexNumberOfCssEnvVariableIsLargerThanNumberOfAvailableCcsCountWhenDeviceIsCreatedThenCreateDevicesWithAvailableCcsCount) {
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;