refactor: xe topology

Signed-off-by: Radoslaw Jablonski <radoslaw.jablonski@intel.com>
This commit is contained in:
Radoslaw Jablonski
2025-08-05 22:33:38 +00:00
committed by Compute-Runtime-Automation
parent e9ff1260f0
commit 59c3b06eea
7 changed files with 456 additions and 98 deletions

View File

@@ -159,6 +159,8 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.h
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_container.h
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_constants.h
${CMAKE_CURRENT_SOURCE_DIR}/topology.h
${CMAKE_CURRENT_SOURCE_DIR}/topology.cpp
${CMAKE_CURRENT_SOURCE_DIR}/topology_map.h
${CMAKE_CURRENT_SOURCE_DIR}/uint16_avx2.h
${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4.h

View File

@@ -0,0 +1,120 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/topology.h"
#include <bit>
#include <cstdint>
#include <numeric>
#include <span>
#include <vector>
namespace NEO {
TopologyInfo getTopologyInfo(const TopologyBitmap &topologyBitmap, const TopologyLimits &topologyLimits, TopologyMapping &topologyMapping) {
TopologyInfo topologyInfo{};
std::vector<int> sliceIndices;
sliceIndices.reserve(topologyLimits.maxSlices);
std::vector<int> subSliceIndices;
subSliceIndices.reserve(topologyLimits.maxSubSlicesPerSlice);
auto processSubSlices = [&](const std::span<const uint8_t> &subSliceBitmap) -> std::pair<int, int> {
int sliceCount = 0;
int subSliceCountTotal = 0;
for (auto sliceId = 0; sliceId < topologyLimits.maxSlices; ++sliceId) {
int subSliceCount = 0;
for (auto subSliceId = 0; subSliceId < topologyLimits.maxSubSlicesPerSlice; ++subSliceId) {
const auto idx = sliceId * topologyLimits.maxSubSlicesPerSlice + subSliceId;
const auto byte = idx / 8u;
const auto bit = idx % 8u;
if (idx >= std::ssize(subSliceBitmap) * 8) {
break;
}
if (subSliceBitmap[byte] & (1u << bit)) {
subSliceIndices.push_back(subSliceId);
subSliceCount += 1;
}
}
if (subSliceCount) {
sliceIndices.push_back(sliceId);
sliceCount += 1;
subSliceCountTotal += subSliceCount;
}
if (sliceCount == 1) {
topologyMapping.subsliceIndices = std::move(subSliceIndices);
}
subSliceIndices.clear();
}
return {sliceCount, subSliceCountTotal};
};
auto [sliceCount, subSliceCount] = processSubSlices(topologyBitmap.dssCompute);
if (!subSliceCount) {
std::tie(sliceCount, subSliceCount) = processSubSlices(topologyBitmap.dssGeometry);
}
topologyMapping.sliceIndices = std::move(sliceIndices);
if (sliceCount != 1) {
topologyMapping.subsliceIndices.clear();
}
auto bitmapCount = [](const std::span<const uint8_t> &bitmap) {
return std::transform_reduce(bitmap.begin(), bitmap.end(), 0, std::plus{}, std::popcount<uint8_t>);
};
topologyInfo.sliceCount = sliceCount;
topologyInfo.subSliceCount = subSliceCount;
topologyInfo.euCount = bitmapCount(topologyBitmap.eu) * topologyInfo.subSliceCount;
topologyInfo.l3BankCount = bitmapCount(topologyBitmap.l3Banks);
return topologyInfo;
}
TopologyInfo getTopologyInfoMultiTile(const std::span<TopologyBitmap> &topologyBitmap, const TopologyLimits &topologyLimits, TopologyMap &topologyMap) {
const auto numTiles = std::ssize(topologyBitmap);
if (0 == numTiles)
return TopologyInfo{};
std::vector<TopologyInfo> topologyInfos;
topologyInfos.reserve(numTiles);
for (auto i = 0; i < numTiles; ++i) {
topologyInfos.push_back(getTopologyInfo(topologyBitmap[i], topologyLimits, topologyMap[i]));
}
TopologyInfo topologyInfo{
.sliceCount = std::numeric_limits<decltype(TopologyInfo::sliceCount)>::max(),
.subSliceCount = std::numeric_limits<decltype(TopologyInfo::subSliceCount)>::max(),
.euCount = std::numeric_limits<decltype(TopologyInfo::euCount)>::max(),
.l3BankCount = std::numeric_limits<decltype(TopologyInfo::l3BankCount)>::max(),
};
topologyInfo = std::reduce(topologyInfos.cbegin(), topologyInfos.cend(), topologyInfo, [](const TopologyInfo &topoInfo1, const TopologyInfo &topoInfo2) {
return TopologyInfo{
.sliceCount = std::min(topoInfo1.sliceCount, topoInfo2.sliceCount),
.subSliceCount = std::min(topoInfo1.subSliceCount, topoInfo2.subSliceCount),
.euCount = std::min(topoInfo1.euCount, topoInfo2.euCount),
.l3BankCount = std::min(topoInfo1.l3BankCount, topoInfo2.l3BankCount),
};
});
return topologyInfo;
}
} // namespace NEO

View File

@@ -0,0 +1,40 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/topology_map.h"
#include <cstdint>
#include <span>
namespace NEO {
struct TopologyBitmap {
std::span<const uint8_t> dssGeometry;
std::span<const uint8_t> dssCompute;
std::span<const uint8_t> l3Banks;
std::span<const uint8_t> eu; // shared by all subslices
};
struct TopologyInfo {
int sliceCount;
int subSliceCount;
int euCount;
int l3BankCount;
};
struct TopologyLimits {
int maxSlices;
int maxSubSlicesPerSlice;
int maxEusPerSubSlice;
};
TopologyInfo getTopologyInfo(const TopologyBitmap &topologyBitmap, const TopologyLimits &topologyLimits, TopologyMapping &topologyMapping);
TopologyInfo getTopologyInfoMultiTile(const std::span<TopologyBitmap> &topologyBitmap, const TopologyLimits &topologyLimits, TopologyMap &topologyMap);
} // namespace NEO

View File

@@ -21,6 +21,7 @@
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/helpers/topology.h"
#include "shared/source/os_interface/linux/drm_buffer_object.h"
#include "shared/source/os_interface/linux/drm_neo.h"
#include "shared/source/os_interface/linux/engine_info.h"
@@ -516,131 +517,65 @@ bool IoctlHelperXe::setGpuCpuTimes(TimeStampData *pGpuCpuTime, OSTime *osTime) {
}
bool IoctlHelperXe::getTopologyDataAndMap(const HardwareInfo &hwInfo, DrmQueryTopologyData &topologyData, TopologyMap &topologyMap) {
const auto queryGtTopology = queryData<uint8_t>(DRM_XE_DEVICE_QUERY_GT_TOPOLOGY);
auto queryGtTopology = queryData<uint8_t>(DRM_XE_DEVICE_QUERY_GT_TOPOLOGY);
auto fillMask = [](std::vector<std::bitset<8>> &vec, drm_xe_query_topology_mask *topo) {
for (uint32_t j = 0; j < topo->num_bytes; j++) {
vec.push_back(topo->mask[j]);
}
};
StackVec<std::vector<std::bitset<8>>, 2> geomDss;
StackVec<std::vector<std::bitset<8>>, 2> computeDss;
StackVec<std::vector<std::bitset<8>>, 2> euDss;
StackVec<std::vector<std::bitset<8>>, 2> l3Banks;
const auto numTiles = tileIdToGtId.size();
std::vector<TopologyBitmap> topologyBitmap(numTiles);
auto topologySize = queryGtTopology.size();
auto dataPtr = queryGtTopology.data();
auto numTiles = tileIdToGtId.size();
geomDss.resize(numTiles);
computeDss.resize(numTiles);
euDss.resize(numTiles);
l3Banks.resize(numTiles);
bool receivedDssInfo = false;
while (topologySize >= sizeof(drm_xe_query_topology_mask)) {
drm_xe_query_topology_mask *topo = reinterpret_cast<drm_xe_query_topology_mask *>(dataPtr);
const drm_xe_query_topology_mask *topo = reinterpret_cast<const drm_xe_query_topology_mask *>(dataPtr);
UNRECOVERABLE_IF(topo == nullptr);
uint32_t gtId = topo->gt_id;
auto tileId = gtIdToTileId[gtId];
const auto gtId = topo->gt_id;
const auto tileId = gtIdToTileId[gtId];
if (tileId != invalidIndex) {
const auto bytes = std::span<const uint8_t>(topo->mask, topo->num_bytes);
switch (topo->type) {
case DRM_XE_TOPO_DSS_GEOMETRY:
fillMask(geomDss[tileId], topo);
receivedDssInfo = true;
topologyBitmap[tileId].dssGeometry = bytes;
break;
case DRM_XE_TOPO_DSS_COMPUTE:
fillMask(computeDss[tileId], topo);
receivedDssInfo = true;
topologyBitmap[tileId].dssCompute = bytes;
break;
case DRM_XE_TOPO_L3_BANK:
fillMask(l3Banks[tileId], topo);
topologyBitmap[tileId].l3Banks = bytes;
break;
case DRM_XE_TOPO_EU_PER_DSS:
case DRM_XE_TOPO_SIMD16_EU_PER_DSS:
fillMask(euDss[tileId], topo);
topologyBitmap[tileId].eu = bytes;
break;
default:
xeLog("Unhandle GT Topo type: %d\n", topo->type);
}
}
uint32_t itemSize = sizeof(drm_xe_query_topology_mask) + topo->num_bytes;
const auto itemSize = sizeof(drm_xe_query_topology_mask) + topo->num_bytes;
topologySize -= itemSize;
dataPtr = ptrOffset(dataPtr, itemSize);
}
int sliceCount = 0;
int subSliceCount = 0;
int euPerDss = 0;
int l3BankCount = 0;
uint32_t hwMaxSubSliceCount = hwInfo.gtSystemInfo.MaxSubSlicesSupported;
topologyData.maxSlices = hwInfo.gtSystemInfo.MaxSlicesSupported ? hwInfo.gtSystemInfo.MaxSlicesSupported : 1;
topologyData.maxSubSlicesPerSlice = hwMaxSubSliceCount / topologyData.maxSlices;
topologyData.maxEusPerSubSlice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const TopologyLimits topologyLimits{
.maxSlices = static_cast<int>(hwInfo.gtSystemInfo.MaxSlicesSupported),
.maxSubSlicesPerSlice = static_cast<int>(hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported),
.maxEusPerSubSlice = static_cast<int>(hwInfo.gtSystemInfo.MaxEuPerSubSlice),
};
for (auto tileId = 0u; tileId < numTiles; tileId++) {
const auto topologyInfo = getTopologyInfoMultiTile(topologyBitmap, topologyLimits, topologyMap);
int subSliceCountPerTile = 0;
topologyData.sliceCount = topologyInfo.sliceCount;
topologyData.subSliceCount = topologyInfo.subSliceCount;
topologyData.numL3Banks = topologyInfo.l3BankCount;
topologyData.euCount = topologyInfo.euCount;
topologyData.maxSlices = topologyLimits.maxSlices;
topologyData.maxSubSlicesPerSlice = topologyLimits.maxSubSlicesPerSlice;
topologyData.maxEusPerSubSlice = topologyLimits.maxEusPerSubSlice;
std::vector<int> sliceIndices;
std::vector<int> subSliceIndices;
int previouslyEnabledSlice = -1;
auto processSubSliceInfo = [&](const std::vector<std::bitset<8>> &subSliceInfo) -> void {
for (auto subSliceId = 0u; subSliceId < std::min(hwMaxSubSliceCount, static_cast<uint32_t>(subSliceInfo.size() * 8)); subSliceId++) {
auto byte = subSliceId / 8;
auto bit = subSliceId & 0b111;
int sliceId = static_cast<int>(subSliceId / topologyData.maxSubSlicesPerSlice);
int subSliceIdRelative = static_cast<int>(subSliceId % topologyData.maxSubSlicesPerSlice);
if (subSliceInfo[byte].test(bit)) {
subSliceIndices.push_back(subSliceIdRelative);
subSliceCountPerTile++;
if (sliceId != previouslyEnabledSlice) {
previouslyEnabledSlice = sliceId;
sliceIndices.push_back(sliceId);
}
}
}
};
processSubSliceInfo(computeDss[tileId]);
if (subSliceCountPerTile == 0) {
processSubSliceInfo(geomDss[tileId]);
}
topologyMap[tileId].sliceIndices = std::move(sliceIndices);
if (topologyMap[tileId].sliceIndices.size() < 2u) {
topologyMap[tileId].subsliceIndices = std::move(subSliceIndices);
}
int sliceCountPerTile = static_cast<int>(topologyMap[tileId].sliceIndices.size());
int euPerDssPerTile = 0;
for (auto byte = 0u; byte < euDss[tileId].size(); byte++) {
euPerDssPerTile += euDss[tileId][byte].count();
}
int l3BankCountPerTile = 0;
for (auto byte = 0u; byte < l3Banks[tileId].size(); byte++) {
l3BankCountPerTile += l3Banks[tileId][byte].count();
}
// pick smallest config
sliceCount = (sliceCount == 0) ? sliceCountPerTile : std::min(sliceCount, sliceCountPerTile);
subSliceCount = (subSliceCount == 0) ? subSliceCountPerTile : std::min(subSliceCount, subSliceCountPerTile);
euPerDss = (euPerDss == 0) ? euPerDssPerTile : std::min(euPerDss, euPerDssPerTile);
l3BankCount = (l3BankCount == 0) ? l3BankCountPerTile : std::min(l3BankCount, l3BankCountPerTile);
}
topologyData.sliceCount = sliceCount;
topologyData.subSliceCount = subSliceCount;
topologyData.euCount = subSliceCount * euPerDss;
topologyData.numL3Banks = l3BankCount;
return receivedDssInfo;
return topologyInfo.subSliceCount != 0;
}
void IoctlHelperXe::updateBindInfo(uint64_t userPtr) {