feature: report multi-hop fabric connections

In additon to physical connections we should also report multi-hop
logical connections (MDFI + XeLink) as have positive bandwidth.

Use a modified BFS algorithm to try to find a path between fabric
vertices that are not directly connected together because the KMD always
try to use MDFI link first, then go to XeLink.

Multi-hop connections are bi-directional but might not be symmetric, so
for every pair of vertices A & B that are not directly connected, we
need to try to find both `A -> B` and `B -> A`.

Related-To: GSD-7126

Signed-off-by: Wenbin Lu <wenbin.lu@intel.com>
This commit is contained in:
Wenbin Lu
2024-03-04 06:00:20 +00:00
committed by Compute-Runtime-Automation
parent a04c67ec52
commit a0faad6558
15 changed files with 1623 additions and 118 deletions

View File

@@ -552,7 +552,7 @@ void DeviceImp::getP2PPropertiesDirectFabricConnection(DeviceImp *peerDeviceImp,
ze_fabric_edge_exp_properties_t edgeProperties{};
fabricEdge->getProperties(&edgeProperties);
if (strcmp(edgeProperties.model, "XeLink") == 0) {
if (strstr(edgeProperties.model, "XeLink") != nullptr) {
bandwidthPropertiesDesc->logicalBandwidth = edgeProperties.bandwidth;
bandwidthPropertiesDesc->physicalBandwidth = edgeProperties.bandwidth;
bandwidthPropertiesDesc->bandwidthUnit = edgeProperties.bandwidthUnit;

View File

@@ -193,6 +193,11 @@ DriverHandleImp::~DriverHandleImp() {
}
this->fabricEdges.clear();
for (auto &edge : this->fabricIndirectEdges) {
delete edge;
}
this->fabricIndirectEdges.clear();
if (this->svmAllocsManager) {
this->svmAllocsManager->trimUSMDeviceAllocCache();
delete this->svmAllocsManager;
@@ -903,7 +908,7 @@ void DriverHandleImp::initializeVertexes() {
this->fabricVertices.push_back(fabricVertex);
}
FabricEdge::createEdgesFromVertices(this->fabricVertices, this->fabricEdges);
FabricEdge::createEdgesFromVertices(this->fabricVertices, this->fabricEdges, this->fabricIndirectEdges);
}
ze_result_t DriverHandleImp::fabricVertexGetExp(uint32_t *pCount, ze_fabric_vertex_handle_t *phVertices) {
@@ -957,9 +962,9 @@ ze_result_t DriverHandleImp::fabricEdgeGetExp(ze_fabric_vertex_handle_t hVertexA
bool updateEdges = false;
if (*pCount == 0) {
maxEdges = static_cast<uint32_t>(fabricEdges.size());
maxEdges = static_cast<uint32_t>(fabricEdges.size() + fabricIndirectEdges.size());
} else {
maxEdges = std::min<uint32_t>(*pCount, static_cast<uint32_t>(fabricEdges.size()));
maxEdges = std::min<uint32_t>(*pCount, static_cast<uint32_t>(fabricEdges.size() + fabricIndirectEdges.size()));
}
if (phEdges != nullptr) {
@@ -967,7 +972,10 @@ ze_result_t DriverHandleImp::fabricEdgeGetExp(ze_fabric_vertex_handle_t hVertexA
}
for (const auto &edge : fabricEdges) {
// Fabric Connections are bi-directional
if (edgeUpdateIndex >= maxEdges) {
break;
}
// Direct physical fabric connections are bi-directional
if ((edge->vertexA == queryVertexA && edge->vertexB == queryVertexB) ||
(edge->vertexA == queryVertexB && edge->vertexB == queryVertexA)) {
@@ -976,11 +984,19 @@ ze_result_t DriverHandleImp::fabricEdgeGetExp(ze_fabric_vertex_handle_t hVertexA
}
++edgeUpdateIndex;
}
}
// Stop if the edges overflow the count
for (const auto &edge : fabricIndirectEdges) {
if (edgeUpdateIndex >= maxEdges) {
break;
}
// Logical multi-hop edges might not be symmetric
if (edge->vertexA == queryVertexA && edge->vertexB == queryVertexB) {
if (updateEdges == true) {
phEdges[edgeUpdateIndex] = edge->toHandle();
}
++edgeUpdateIndex;
}
}
*pCount = edgeUpdateIndex;

View File

@@ -133,6 +133,7 @@ struct DriverHandleImp : public DriverHandle {
std::vector<Device *> devices;
std::vector<FabricVertex *> fabricVertices;
std::vector<FabricEdge *> fabricEdges;
std::vector<FabricEdge *> fabricIndirectEdges;
std::mutex rtasLock;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -121,31 +121,4 @@ FabricEdge *FabricEdge::create(FabricVertex *vertexA, FabricVertex *vertexB, ze_
return edge;
}
void FabricEdge::createEdgesFromVertices(const std::vector<FabricVertex *> &vertices, std::vector<FabricEdge *> &edges) {
// Get all vertices and sub-vertices
std::vector<FabricVertex *> allVertices = {};
for (auto &fabricVertex : vertices) {
allVertices.push_back(fabricVertex);
for (auto &fabricSubVertex : fabricVertex->subVertices) {
allVertices.push_back(fabricSubVertex);
}
}
// Get edges between all vertices
for (uint32_t vertexAIndex = 0; vertexAIndex < allVertices.size(); vertexAIndex++) {
for (uint32_t vertexBIndex = vertexAIndex + 1; vertexBIndex < allVertices.size(); vertexBIndex++) {
ze_fabric_edge_exp_properties_t edgeProperty = {};
for (auto const &fabricDeviceInterface : allVertices[vertexAIndex]->pFabricDeviceInterfaces) {
bool isConnected =
fabricDeviceInterface.second->getEdgeProperty(allVertices[vertexBIndex], edgeProperty);
if (isConnected) {
edges.push_back(create(allVertices[vertexAIndex], allVertices[vertexBIndex], edgeProperty));
}
}
}
}
}
} // namespace L0

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -46,7 +46,7 @@ struct FabricEdge : _ze_fabric_edge_handle_t {
public:
virtual ~FabricEdge() = default;
static void createEdgesFromVertices(const std::vector<FabricVertex *> &vertices, std::vector<FabricEdge *> &edges);
static void createEdgesFromVertices(const std::vector<FabricVertex *> &vertices, std::vector<FabricEdge *> &edges, std::vector<FabricEdge *> &indirectEdges);
static FabricEdge *create(FabricVertex *vertexA, FabricVertex *vertexB, ze_fabric_edge_exp_properties_t &properties);
ze_result_t getProperties(ze_fabric_edge_exp_properties_t *pEdgeProperties) const {
*pEdgeProperties = properties;

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2022-2023 Intel Corporation
# Copyright (C) 2022-2024 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -10,6 +10,7 @@ if(UNIX)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/fabric.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_iaf.h
${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_iaf.cpp
)
@@ -17,6 +18,7 @@ if(UNIX)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/fabric.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_iaf_stub.h
${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_iaf_stub.cpp
)

View File

@@ -0,0 +1,149 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/fabric/fabric.h"
#include "shared/source/helpers/debug_helpers.h"
#include <algorithm>
#include <cstring>
#include <deque>
#include <limits>
#include <map>
#include <string>
#include <vector>
namespace L0 {
void FabricEdge::createEdgesFromVertices(const std::vector<FabricVertex *> &vertices, std::vector<FabricEdge *> &edges, std::vector<FabricEdge *> &indirectEdges) {
// Get all vertices and sub-vertices
std::vector<FabricVertex *> allVertices = {};
for (auto &fabricVertex : vertices) {
allVertices.push_back(fabricVertex);
for (auto &fabricSubVertex : fabricVertex->subVertices) {
allVertices.push_back(fabricSubVertex);
}
}
// Get direct physical edges between all vertices
std::map<uint32_t, std::vector<std::pair<uint32_t, ze_fabric_edge_exp_properties_t *>>> adjacentVerticesMap;
std::map<uint32_t, std::vector<uint32_t>> nonAdjacentVerticesMap;
for (uint32_t vertexAIndex = 0; vertexAIndex < allVertices.size(); vertexAIndex++) {
for (uint32_t vertexBIndex = vertexAIndex + 1; vertexBIndex < allVertices.size(); vertexBIndex++) {
bool isAdjacent = false;
auto vertexA = allVertices[vertexAIndex];
auto vertexB = allVertices[vertexBIndex];
ze_fabric_edge_exp_properties_t edgeProperty = {};
for (auto const &fabricDeviceInterface : vertexA->pFabricDeviceInterfaces) {
bool isConnected =
fabricDeviceInterface.second->getEdgeProperty(vertexB, edgeProperty);
if (isConnected) {
edges.push_back(create(vertexA, vertexB, edgeProperty));
adjacentVerticesMap[vertexAIndex].emplace_back(vertexBIndex, &edges.back()->properties);
adjacentVerticesMap[vertexBIndex].emplace_back(vertexAIndex, &edges.back()->properties);
isAdjacent = true;
}
}
if (!isAdjacent) {
auto &subVerticesOfA = vertexA->subVertices;
if (std::find(subVerticesOfA.begin(), subVerticesOfA.end(), vertexB) == subVerticesOfA.end()) {
nonAdjacentVerticesMap[vertexAIndex].push_back(vertexBIndex);
nonAdjacentVerticesMap[vertexBIndex].push_back(vertexAIndex);
}
}
}
}
// Find logical multi-hop edges between vertices not directly connected
for (const auto &[vertexAIndex, nonAdjacentVertices] : nonAdjacentVerticesMap) {
for (auto vertexBIndex : nonAdjacentVertices) {
std::map<uint32_t, uint32_t> visited;
visited[vertexAIndex] = vertexAIndex;
std::deque<uint32_t> toVisit;
toVisit.push_back(vertexAIndex);
uint32_t currVertexIndex = vertexAIndex;
while (true) {
std::deque<uint32_t> toVisitIaf, toVisitMdfi;
while (!toVisit.empty()) {
currVertexIndex = toVisit.front();
toVisit.pop_front();
if (currVertexIndex == vertexBIndex) {
break;
}
for (auto [vertexIndex, edgeProperty] : adjacentVerticesMap[currVertexIndex]) {
if (visited.find(vertexIndex) == visited.end()) {
if (strncmp(edgeProperty->model, "XeLink", 7) == 0) {
toVisitIaf.push_back(vertexIndex);
} else {
DEBUG_BREAK_IF(strncmp(edgeProperty->model, "MDFI", 5) != 0);
toVisitMdfi.push_back(vertexIndex);
}
visited[vertexIndex] = currVertexIndex;
}
}
}
if (currVertexIndex != vertexBIndex) {
if (toVisitIaf.size() + toVisitMdfi.size() != 0) {
toVisit = toVisitMdfi;
toVisit.insert(toVisit.end(), toVisitIaf.begin(), toVisitIaf.end());
} else {
break;
}
} else {
std::string path = "";
ze_fabric_edge_exp_properties_t properties = {};
properties.stype = ZE_STRUCTURE_TYPE_FABRIC_EDGE_EXP_PROPERTIES;
properties.pNext = nullptr;
memset(properties.uuid.id, 0, ZE_MAX_UUID_SIZE);
memset(properties.model, 0, ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE);
properties.bandwidth = std::numeric_limits<uint32_t>::max();
properties.bandwidthUnit = ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC;
properties.latency = std::numeric_limits<uint32_t>::max();
properties.latencyUnit = ZE_LATENCY_UNIT_UNKNOWN;
properties.duplexity = ZE_FABRIC_EDGE_EXP_DUPLEXITY_FULL_DUPLEX;
while (true) {
const auto parentIndex = visited[currVertexIndex];
ze_fabric_edge_exp_properties_t *currEdgeProperty = nullptr;
for (const auto &[vertexIndex, edgeProperty] : adjacentVerticesMap[parentIndex]) {
if (vertexIndex == currVertexIndex) {
currEdgeProperty = edgeProperty;
break;
}
}
UNRECOVERABLE_IF(currEdgeProperty == nullptr);
path = std::string(currEdgeProperty->model) + path;
if ((strncmp(currEdgeProperty->model, "XeLink", 7) == 0) &&
(currEdgeProperty->bandwidth < properties.bandwidth)) {
properties.bandwidth = currEdgeProperty->bandwidth;
}
currVertexIndex = parentIndex;
if (currVertexIndex == vertexAIndex) {
path.resize(ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE - 1, '\0');
path.copy(properties.model, path.size());
break;
} else {
path = '-' + path;
}
}
indirectEdges.push_back(create(allVertices[vertexAIndex], allVertices[vertexBIndex], properties));
break;
}
}
}
}
}
} // namespace L0

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2022-2023 Intel Corporation
# Copyright (C) 2022-2024 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -8,6 +8,7 @@ if(WIN32)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/fabric.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_iaf.h
${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_iaf.cpp
)

View File

@@ -0,0 +1,43 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/fabric/fabric.h"
#include <vector>
namespace L0 {
void FabricEdge::createEdgesFromVertices(const std::vector<FabricVertex *> &vertices, std::vector<FabricEdge *> &edges, std::vector<FabricEdge *> &) {
// Get all vertices and sub-vertices
std::vector<FabricVertex *> allVertices = {};
for (auto &fabricVertex : vertices) {
allVertices.push_back(fabricVertex);
for (auto &fabricSubVertex : fabricVertex->subVertices) {
allVertices.push_back(fabricSubVertex);
}
}
// Get direct physical edges between all vertices
for (uint32_t vertexAIndex = 0; vertexAIndex < allVertices.size(); vertexAIndex++) {
for (uint32_t vertexBIndex = vertexAIndex + 1; vertexBIndex < allVertices.size(); vertexBIndex++) {
auto vertexA = allVertices[vertexAIndex];
auto vertexB = allVertices[vertexBIndex];
ze_fabric_edge_exp_properties_t edgeProperty = {};
for (auto const &fabricDeviceInterface : vertexA->pFabricDeviceInterfaces) {
bool isConnected =
fabricDeviceInterface.second->getEdgeProperty(vertexB, edgeProperty);
if (isConnected) {
edges.push_back(create(vertexA, vertexB, edgeProperty));
}
}
}
}
}
} // namespace L0

View File

@@ -0,0 +1,89 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "level_zero/core/source/fabric/linux/fabric_device_iaf.h"
namespace L0 {
namespace ult {
class MockIafNlApi : public IafNlApi {
public:
ze_result_t fPortStatusQueryStatus = ZE_RESULT_SUCCESS;
ze_result_t fportPropertiesStatus = ZE_RESULT_SUCCESS;
ze_result_t subDevicePropertiesStatus = ZE_RESULT_SUCCESS;
ze_result_t getPortsStatus = ZE_RESULT_SUCCESS;
iaf_fport_health fPortStatusQueryHealthStatus = IAF_FPORT_HEALTH_HEALTHY;
bool portEnumerationEnable = true;
ze_result_t handleResponse(const uint16_t cmdOp, struct genl_info *info, void *pOutput) override { return ZE_RESULT_SUCCESS; };
ze_result_t fPortStatusQuery(const IafPortId portId, IafPortState &state) override {
state.healthStatus = fPortStatusQueryHealthStatus;
return fPortStatusQueryStatus;
};
ze_result_t getThroughput(const IafPortId portId, IafPortThroughPut &throughput) override { return ZE_RESULT_SUCCESS; };
ze_result_t portStateQuery(const IafPortId portId, bool &enabled) override { return ZE_RESULT_SUCCESS; };
ze_result_t portBeaconStateQuery(const IafPortId portId, bool &enabled) override { return ZE_RESULT_SUCCESS; };
ze_result_t portBeaconEnable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t portBeaconDisable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t portEnable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t portDisable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t portUsageEnable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t portUsageDisable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t remRequest() override { return ZE_RESULT_SUCCESS; };
ze_result_t routingGenQuery(uint32_t &start, uint32_t &end) override { return ZE_RESULT_SUCCESS; };
ze_result_t deviceEnum(std::vector<uint32_t> &fabricIds) override { return ZE_RESULT_SUCCESS; };
ze_result_t fabricDeviceProperties(const uint32_t fabricId, uint32_t &numSubdevices) override { return ZE_RESULT_SUCCESS; };
ze_result_t subdevicePropertiesGet(const uint32_t fabricId, const uint32_t attachId, uint64_t &guid, std::vector<uint8_t> &ports) override {
return subDevicePropertiesStatus;
};
ze_result_t fportProperties(const IafPortId portId, uint64_t &neighborGuid, uint8_t &neighborPortNumber,
IafPortSpeed &maxRxSpeed, IafPortSpeed &maxTxSpeed,
IafPortSpeed &rxSpeed, IafPortSpeed &txSpeed) override {
neighborPortNumber = 8;
neighborGuid = 0xFEEDBEAD;
maxRxSpeed.width = 4;
maxRxSpeed.bitRate = 53125000000;
maxTxSpeed = maxRxSpeed;
rxSpeed = maxRxSpeed;
txSpeed = maxRxSpeed;
return fportPropertiesStatus;
};
ze_result_t getPorts(const std::string &devicePciPath, std::vector<IafPort> &ports) override {
if (portEnumerationEnable) {
IafPort defaultPort;
defaultPort.onSubdevice = true;
defaultPort.portId.fabricId = testPortId.fabricId;
defaultPort.portId.attachId = testPortId.attachId;
defaultPort.portId.portNumber = testPortId.portNumber;
defaultPort.model = "XeLink";
defaultPort.maxRxSpeed.width = 4;
defaultPort.maxRxSpeed.bitRate = 53125000000;
defaultPort.maxTxSpeed = defaultPort.maxRxSpeed;
ports.push_back(defaultPort);
defaultPort.portId.fabricId = testPortId.fabricId + 1;
defaultPort.portId.attachId = testPortId.attachId + 1;
defaultPort.portId.portNumber = testPortId.portNumber + 1;
ports.push_back(defaultPort);
defaultPort.portId.fabricId = testPortId.fabricId + 2;
defaultPort.portId.attachId = testPortId.attachId;
defaultPort.portId.portNumber = testPortId.portNumber + 2;
ports.push_back(defaultPort);
}
return getPortsStatus;
};
IafPortId testPortId = {};
};
} // namespace ult
} // namespace L0

View File

@@ -4671,6 +4671,159 @@ TEST_F(P2pBandwidthPropertiesTest, GivenNoXeLinkFabricConnectionBetweenDevicesWh
driverHandle->fabricEdges.pop_back();
}
TEST_F(P2pBandwidthPropertiesTest, GivenXeLinkAndMdfiFabricConnectionBetweenSubDevicesWhenQueryingBandwidthPropertiesThenCorrectPropertiesAreSet) {
constexpr uint32_t xeLinkBandwidth = 3;
driverHandle->initializeVertexes();
uint32_t subDeviceCount = 2;
ze_device_handle_t device0SubDevices[2];
ze_device_handle_t device1SubDevices[2];
EXPECT_EQ(ZE_RESULT_SUCCESS, driverHandle->devices[0]->getSubDevices(&subDeviceCount, device0SubDevices));
EXPECT_EQ(ZE_RESULT_SUCCESS, driverHandle->devices[1]->getSubDevices(&subDeviceCount, device1SubDevices));
EXPECT_NE(nullptr, device0SubDevices[0]);
EXPECT_NE(nullptr, device0SubDevices[1]);
EXPECT_NE(nullptr, device1SubDevices[0]);
EXPECT_NE(nullptr, device1SubDevices[1]);
ze_fabric_vertex_handle_t vertex00 = nullptr;
ze_fabric_vertex_handle_t vertex01 = nullptr;
ze_fabric_vertex_handle_t vertex10 = nullptr;
ze_fabric_vertex_handle_t vertex11 = nullptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::Device::fromHandle(device0SubDevices[0])->getFabricVertex(&vertex00));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::Device::fromHandle(device0SubDevices[1])->getFabricVertex(&vertex01));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::Device::fromHandle(device1SubDevices[0])->getFabricVertex(&vertex10));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::Device::fromHandle(device1SubDevices[1])->getFabricVertex(&vertex11));
EXPECT_NE(nullptr, vertex00);
EXPECT_NE(nullptr, vertex01);
EXPECT_NE(nullptr, vertex10);
EXPECT_NE(nullptr, vertex11);
const char *mdfiModel = "MDFI";
const char *xeLinkModel = "XeLink";
const char *xeLinkMdfiModel = "XeLink-MDFI";
const char *mdfiXeLinkModel = "MDFI-XeLink";
const char *mdfiXeLinkMdfiModel = "MDFI-XeLink-MDFI";
// MDFI between 00 & 01
auto testEdgeMdfi0 = new FabricEdge;
testEdgeMdfi0->vertexA = FabricVertex::fromHandle(vertex00);
testEdgeMdfi0->vertexB = FabricVertex::fromHandle(vertex01);
memcpy_s(testEdgeMdfi0->properties.model, ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE, mdfiModel, strlen(mdfiModel));
testEdgeMdfi0->properties.bandwidth = 0u;
testEdgeMdfi0->properties.bandwidthUnit = ZE_BANDWIDTH_UNIT_UNKNOWN;
testEdgeMdfi0->properties.latency = 0u;
testEdgeMdfi0->properties.latencyUnit = ZE_LATENCY_UNIT_UNKNOWN;
driverHandle->fabricEdges.push_back(testEdgeMdfi0);
// MDFI between 10 & 11
auto testEdgeMdfi1 = new FabricEdge;
testEdgeMdfi1->vertexA = FabricVertex::fromHandle(vertex10);
testEdgeMdfi1->vertexB = FabricVertex::fromHandle(vertex11);
memcpy_s(testEdgeMdfi1->properties.model, ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE, mdfiModel, strlen(mdfiModel));
testEdgeMdfi1->properties.bandwidth = 0u;
testEdgeMdfi1->properties.bandwidthUnit = ZE_BANDWIDTH_UNIT_UNKNOWN;
testEdgeMdfi1->properties.latency = 0u;
testEdgeMdfi1->properties.latencyUnit = ZE_LATENCY_UNIT_UNKNOWN;
driverHandle->fabricEdges.push_back(testEdgeMdfi1);
// XeLink between 01 & 10
auto testEdgeXeLink = new FabricEdge;
testEdgeXeLink->vertexA = FabricVertex::fromHandle(vertex01);
testEdgeXeLink->vertexB = FabricVertex::fromHandle(vertex10);
memcpy_s(testEdgeXeLink->properties.model, ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE, xeLinkModel, strlen(xeLinkModel));
testEdgeXeLink->properties.bandwidth = xeLinkBandwidth;
testEdgeXeLink->properties.bandwidthUnit = ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC;
testEdgeXeLink->properties.latency = 1u;
testEdgeXeLink->properties.latencyUnit = ZE_LATENCY_UNIT_HOP;
driverHandle->fabricEdges.push_back(testEdgeXeLink);
// MDFI-XeLink between 00 & 10
auto testEdgeMdfiXeLink = new FabricEdge;
testEdgeMdfiXeLink->vertexA = FabricVertex::fromHandle(vertex00);
testEdgeMdfiXeLink->vertexB = FabricVertex::fromHandle(vertex10);
memcpy_s(testEdgeMdfiXeLink->properties.model, ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE, mdfiXeLinkModel, strlen(mdfiXeLinkModel));
testEdgeMdfiXeLink->properties.bandwidth = xeLinkBandwidth;
testEdgeMdfiXeLink->properties.bandwidthUnit = ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC;
testEdgeMdfiXeLink->properties.latency = std::numeric_limits<uint32_t>::max();
testEdgeMdfiXeLink->properties.latencyUnit = ZE_LATENCY_UNIT_UNKNOWN;
driverHandle->fabricIndirectEdges.push_back(testEdgeMdfiXeLink);
// MDFI-XeLink-MDFI between 00 & 11
auto testEdgeMdfiXeLinkMdfi = new FabricEdge;
testEdgeMdfiXeLinkMdfi->vertexA = FabricVertex::fromHandle(vertex00);
testEdgeMdfiXeLinkMdfi->vertexB = FabricVertex::fromHandle(vertex11);
memcpy_s(testEdgeMdfiXeLinkMdfi->properties.model, ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE, mdfiXeLinkMdfiModel, strlen(mdfiXeLinkMdfiModel));
testEdgeMdfiXeLinkMdfi->properties.bandwidth = xeLinkBandwidth;
testEdgeMdfiXeLinkMdfi->properties.bandwidthUnit = ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC;
testEdgeMdfiXeLinkMdfi->properties.latency = std::numeric_limits<uint32_t>::max();
testEdgeMdfiXeLinkMdfi->properties.latencyUnit = ZE_LATENCY_UNIT_UNKNOWN;
driverHandle->fabricIndirectEdges.push_back(testEdgeMdfiXeLinkMdfi);
// XeLink-MDFI between 01 & 11
auto testEdgeXeLinkMdfi = new FabricEdge;
testEdgeXeLinkMdfi->vertexA = FabricVertex::fromHandle(vertex01);
testEdgeXeLinkMdfi->vertexB = FabricVertex::fromHandle(vertex11);
memcpy_s(testEdgeXeLinkMdfi->properties.model, ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE, xeLinkMdfiModel, strlen(xeLinkMdfiModel));
testEdgeXeLinkMdfi->properties.bandwidth = xeLinkBandwidth;
testEdgeXeLinkMdfi->properties.bandwidthUnit = ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC;
testEdgeXeLinkMdfi->properties.latency = std::numeric_limits<uint32_t>::max();
testEdgeXeLinkMdfi->properties.latencyUnit = ZE_LATENCY_UNIT_UNKNOWN;
driverHandle->fabricIndirectEdges.push_back(testEdgeXeLinkMdfi);
ze_device_p2p_properties_t p2pProperties = {};
ze_device_p2p_bandwidth_exp_properties_t p2pBandwidthProps = {};
p2pProperties.pNext = &p2pBandwidthProps;
p2pBandwidthProps.stype = ZE_STRUCTURE_TYPE_DEVICE_P2P_BANDWIDTH_EXP_PROPERTIES;
p2pBandwidthProps.pNext = nullptr;
// Check MDFI
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::Device::fromHandle(device0SubDevices[0])->getP2PProperties(device0SubDevices[1], &p2pProperties));
EXPECT_EQ(0u, p2pBandwidthProps.logicalBandwidth);
EXPECT_EQ(0u, p2pBandwidthProps.physicalBandwidth);
EXPECT_EQ(ZE_BANDWIDTH_UNIT_UNKNOWN, p2pBandwidthProps.bandwidthUnit);
EXPECT_EQ(0u, p2pBandwidthProps.logicalLatency);
EXPECT_EQ(0u, p2pBandwidthProps.physicalLatency);
EXPECT_EQ(ZE_LATENCY_UNIT_UNKNOWN, p2pBandwidthProps.latencyUnit);
// Check XeLink
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::Device::fromHandle(device0SubDevices[1])->getP2PProperties(device1SubDevices[0], &p2pProperties));
EXPECT_EQ(xeLinkBandwidth, p2pBandwidthProps.logicalBandwidth);
EXPECT_EQ(xeLinkBandwidth, p2pBandwidthProps.physicalBandwidth);
EXPECT_EQ(ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC, p2pBandwidthProps.bandwidthUnit);
EXPECT_EQ(1u, p2pBandwidthProps.logicalLatency);
EXPECT_EQ(1u, p2pBandwidthProps.physicalLatency);
EXPECT_EQ(ZE_LATENCY_UNIT_HOP, p2pBandwidthProps.latencyUnit);
// Check MDFI-XeLink
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::Device::fromHandle(device0SubDevices[0])->getP2PProperties(device1SubDevices[0], &p2pProperties));
EXPECT_EQ(xeLinkBandwidth, p2pBandwidthProps.logicalBandwidth);
EXPECT_EQ(xeLinkBandwidth, p2pBandwidthProps.physicalBandwidth);
EXPECT_EQ(ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC, p2pBandwidthProps.bandwidthUnit);
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), p2pBandwidthProps.logicalLatency);
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), p2pBandwidthProps.physicalLatency);
EXPECT_EQ(ZE_LATENCY_UNIT_UNKNOWN, p2pBandwidthProps.latencyUnit);
// Check MDFI-XeLink-MDFI
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::Device::fromHandle(device0SubDevices[0])->getP2PProperties(device1SubDevices[1], &p2pProperties));
EXPECT_EQ(xeLinkBandwidth, p2pBandwidthProps.logicalBandwidth);
EXPECT_EQ(xeLinkBandwidth, p2pBandwidthProps.physicalBandwidth);
EXPECT_EQ(ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC, p2pBandwidthProps.bandwidthUnit);
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), p2pBandwidthProps.logicalLatency);
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), p2pBandwidthProps.physicalLatency);
EXPECT_EQ(ZE_LATENCY_UNIT_UNKNOWN, p2pBandwidthProps.latencyUnit);
// Check XeLink-MDFI
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::Device::fromHandle(device0SubDevices[1])->getP2PProperties(device1SubDevices[1], &p2pProperties));
EXPECT_EQ(xeLinkBandwidth, p2pBandwidthProps.logicalBandwidth);
EXPECT_EQ(xeLinkBandwidth, p2pBandwidthProps.physicalBandwidth);
EXPECT_EQ(ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC, p2pBandwidthProps.bandwidthUnit);
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), p2pBandwidthProps.logicalLatency);
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), p2pBandwidthProps.physicalLatency);
EXPECT_EQ(ZE_LATENCY_UNIT_UNKNOWN, p2pBandwidthProps.latencyUnit);
}
TEST_F(P2pBandwidthPropertiesTest, GivenNoDirectFabricConnectionBetweenDevicesWhenQueryingBandwidthPropertiesThenBandwidthIsZero) {
driverHandle->initializeVertexes();

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2022 Intel Corporation
# Copyright (C) 2022-2024 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -9,6 +9,7 @@ if(UNIX)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_fabric_iaf.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_fabric_multi_hop.cpp
)
endif()
endif()
endif()

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -16,86 +16,13 @@
#include "level_zero/core/source/fabric/fabric.h"
#include "level_zero/core/source/fabric/linux/fabric_device_iaf.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_fabric.h"
#include "gtest/gtest.h"
namespace L0 {
namespace ult {
class MockIafNlApi : public IafNlApi {
public:
ze_result_t fPortStatusQueryStatus = ZE_RESULT_SUCCESS;
ze_result_t fportPropertiesStatus = ZE_RESULT_SUCCESS;
ze_result_t subDevicePropertiesStatus = ZE_RESULT_SUCCESS;
ze_result_t getPortsStatus = ZE_RESULT_SUCCESS;
iaf_fport_health fPortStatusQueryHealthStatus = IAF_FPORT_HEALTH_HEALTHY;
bool portEnumerationEnable = true;
ze_result_t handleResponse(const uint16_t cmdOp, struct genl_info *info, void *pOutput) override { return ZE_RESULT_SUCCESS; };
ze_result_t fPortStatusQuery(const IafPortId portId, IafPortState &state) override {
state.healthStatus = fPortStatusQueryHealthStatus;
return fPortStatusQueryStatus;
};
ze_result_t getThroughput(const IafPortId portId, IafPortThroughPut &throughput) override { return ZE_RESULT_SUCCESS; };
ze_result_t portStateQuery(const IafPortId portId, bool &enabled) override { return ZE_RESULT_SUCCESS; };
ze_result_t portBeaconStateQuery(const IafPortId portId, bool &enabled) override { return ZE_RESULT_SUCCESS; };
ze_result_t portBeaconEnable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t portBeaconDisable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t portEnable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t portDisable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t portUsageEnable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t portUsageDisable(const IafPortId portId) override { return ZE_RESULT_SUCCESS; };
ze_result_t remRequest() override { return ZE_RESULT_SUCCESS; };
ze_result_t routingGenQuery(uint32_t &start, uint32_t &end) override { return ZE_RESULT_SUCCESS; };
ze_result_t deviceEnum(std::vector<uint32_t> &fabricIds) override { return ZE_RESULT_SUCCESS; };
ze_result_t fabricDeviceProperties(const uint32_t fabricId, uint32_t &numSubdevices) override { return ZE_RESULT_SUCCESS; };
ze_result_t subdevicePropertiesGet(const uint32_t fabricId, const uint32_t attachId, uint64_t &guid, std::vector<uint8_t> &ports) override {
return subDevicePropertiesStatus;
};
ze_result_t fportProperties(const IafPortId portId, uint64_t &neighborGuid, uint8_t &neighborPortNumber,
IafPortSpeed &maxRxSpeed, IafPortSpeed &maxTxSpeed,
IafPortSpeed &rxSpeed, IafPortSpeed &txSpeed) override {
neighborPortNumber = 8;
neighborGuid = 0xFEEDBEAD;
maxRxSpeed.width = 4;
maxRxSpeed.bitRate = 53125000000;
maxTxSpeed = maxRxSpeed;
rxSpeed = maxRxSpeed;
txSpeed = maxRxSpeed;
return fportPropertiesStatus;
};
ze_result_t getPorts(const std::string &devicePciPath, std::vector<IafPort> &ports) override {
if (portEnumerationEnable) {
IafPort defaultPort;
defaultPort.onSubdevice = true;
defaultPort.portId.fabricId = testPortId.fabricId;
defaultPort.portId.attachId = testPortId.attachId;
defaultPort.portId.portNumber = testPortId.portNumber;
defaultPort.model = "XeLink";
defaultPort.maxRxSpeed.width = 4;
defaultPort.maxRxSpeed.bitRate = 53125000000;
defaultPort.maxTxSpeed = defaultPort.maxRxSpeed;
ports.push_back(defaultPort);
defaultPort.portId.fabricId = testPortId.fabricId + 1;
defaultPort.portId.attachId = testPortId.attachId + 1;
defaultPort.portId.portNumber = testPortId.portNumber + 1;
ports.push_back(defaultPort);
defaultPort.portId.fabricId = testPortId.fabricId + 2;
defaultPort.portId.attachId = testPortId.attachId;
defaultPort.portId.portNumber = testPortId.portNumber + 2;
ports.push_back(defaultPort);
}
return getPortsStatus;
};
IafPortId testPortId = {};
};
struct TestFabricIaf : public ::testing::Test {
void SetUp() override {
debugManager.flags.CreateMultipleSubDevices.set(1);
@@ -421,7 +348,11 @@ TEST_F(FabricIafEdgeFixture, GivenMultipleDevicesAndSubDevicesWhenCreatingEdgesT
delete edge;
}
driverHandle->fabricEdges.clear();
FabricEdge::createEdgesFromVertices(driverHandle->fabricVertices, driverHandle->fabricEdges);
for (auto &edge : driverHandle->fabricIndirectEdges) {
delete edge;
}
driverHandle->fabricIndirectEdges.clear();
FabricEdge::createEdgesFromVertices(driverHandle->fabricVertices, driverHandle->fabricEdges, driverHandle->fabricIndirectEdges);
constexpr uint32_t root2root = 1;
constexpr uint32_t subDevice2root = 4; // 2 root to 2 sub-devices each
@@ -617,7 +548,11 @@ TEST_F(FabricIafEdgeFixture, GivenMultipleDevicesAndSubDevicesWhenLatencyRequest
delete edge;
}
driverHandle->fabricEdges.clear();
FabricEdge::createEdgesFromVertices(driverHandle->fabricVertices, driverHandle->fabricEdges);
for (auto &edge : driverHandle->fabricIndirectEdges) {
delete edge;
}
driverHandle->fabricIndirectEdges.clear();
FabricEdge::createEdgesFromVertices(driverHandle->fabricVertices, driverHandle->fabricEdges, driverHandle->fabricIndirectEdges);
count = 0;
std::vector<ze_fabric_edge_handle_t> edges(30);

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -368,6 +368,59 @@ TEST_F(FabricEdgeFixture, GivenFabricVerticesAreCreatedWhenZeFabricEdgeGetExpIsC
edgeHandles.data()));
}
TEST_F(FabricEdgeFixture, GivenFabricVerticesAreCreatedForIndirectEdgesWhenZeFabricEdgeGetExpIsCalledThenReturnSuccess) {
// initialize
uint32_t count = 0;
std::vector<ze_fabric_vertex_handle_t> phVertices;
ze_result_t res = driverHandle->fabricVertexGetExp(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
phVertices.resize(count);
res = driverHandle->fabricVertexGetExp(&count, phVertices.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
// Delete existing fabric edges
for (auto edge : driverHandle->fabricEdges) {
delete edge;
}
driverHandle->fabricEdges.clear();
for (auto edge : driverHandle->fabricIndirectEdges) {
delete edge;
}
driverHandle->fabricIndirectEdges.clear();
ze_fabric_edge_exp_properties_t dummyProperties = {};
driverHandle->fabricEdges.push_back(FabricEdge::create(driverHandle->fabricVertices[0], driverHandle->fabricVertices[1], dummyProperties));
driverHandle->fabricEdges.push_back(FabricEdge::create(driverHandle->fabricVertices[1], driverHandle->fabricVertices[0], dummyProperties));
driverHandle->fabricIndirectEdges.push_back(FabricEdge::create(driverHandle->fabricVertices[0], driverHandle->fabricVertices[1], dummyProperties));
driverHandle->fabricIndirectEdges.push_back(FabricEdge::create(driverHandle->fabricVertices[1], driverHandle->fabricVertices[0], dummyProperties));
driverHandle->fabricIndirectEdges.push_back(FabricEdge::create(driverHandle->fabricVertices[0], driverHandle->fabricVertices[1], dummyProperties));
driverHandle->fabricIndirectEdges.push_back(FabricEdge::create(driverHandle->fabricVertices[1], driverHandle->fabricVertices[0], dummyProperties));
std::vector<ze_fabric_edge_handle_t> edgeHandles(10);
count = 0;
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeFabricEdgeGetExp(driverHandle->fabricVertices[0]->toHandle(),
driverHandle->fabricVertices[1]->toHandle(),
&count,
edgeHandles.data()));
EXPECT_EQ(count, 4u);
count = 3;
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeFabricEdgeGetExp(driverHandle->fabricVertices[1]->toHandle(),
driverHandle->fabricVertices[0]->toHandle(),
&count,
edgeHandles.data()));
for (auto edge : driverHandle->fabricEdges) {
delete edge;
}
driverHandle->fabricEdges.clear();
for (auto edge : driverHandle->fabricIndirectEdges) {
delete edge;
}
driverHandle->fabricIndirectEdges.clear();
}
TEST_F(FabricEdgeFixture, GivenFabricEdgesAreCreatedWhenZeFabricEdgeGetVerticesExpIsCalledThenReturnCorrectVertices) {
// initialize
uint32_t count = 0;