mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 22:43:00 +08:00
refactor: xe topology
Signed-off-by: Radoslaw Jablonski <radoslaw.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e9ff1260f0
commit
59c3b06eea
@@ -159,6 +159,8 @@ set(NEO_CORE_HELPERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_container.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_constants.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/topology.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/topology.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/topology_map.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/uint16_avx2.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4.h
|
||||
|
||||
120
shared/source/helpers/topology.cpp
Normal file
120
shared/source/helpers/topology.cpp
Normal file
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/topology.h"
|
||||
|
||||
#include <bit>
|
||||
#include <cstdint>
|
||||
#include <numeric>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
TopologyInfo getTopologyInfo(const TopologyBitmap &topologyBitmap, const TopologyLimits &topologyLimits, TopologyMapping &topologyMapping) {
|
||||
TopologyInfo topologyInfo{};
|
||||
|
||||
std::vector<int> sliceIndices;
|
||||
sliceIndices.reserve(topologyLimits.maxSlices);
|
||||
|
||||
std::vector<int> subSliceIndices;
|
||||
subSliceIndices.reserve(topologyLimits.maxSubSlicesPerSlice);
|
||||
|
||||
auto processSubSlices = [&](const std::span<const uint8_t> &subSliceBitmap) -> std::pair<int, int> {
|
||||
int sliceCount = 0;
|
||||
int subSliceCountTotal = 0;
|
||||
|
||||
for (auto sliceId = 0; sliceId < topologyLimits.maxSlices; ++sliceId) {
|
||||
int subSliceCount = 0;
|
||||
|
||||
for (auto subSliceId = 0; subSliceId < topologyLimits.maxSubSlicesPerSlice; ++subSliceId) {
|
||||
const auto idx = sliceId * topologyLimits.maxSubSlicesPerSlice + subSliceId;
|
||||
const auto byte = idx / 8u;
|
||||
const auto bit = idx % 8u;
|
||||
|
||||
if (idx >= std::ssize(subSliceBitmap) * 8) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (subSliceBitmap[byte] & (1u << bit)) {
|
||||
subSliceIndices.push_back(subSliceId);
|
||||
subSliceCount += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (subSliceCount) {
|
||||
sliceIndices.push_back(sliceId);
|
||||
sliceCount += 1;
|
||||
subSliceCountTotal += subSliceCount;
|
||||
}
|
||||
|
||||
if (sliceCount == 1) {
|
||||
topologyMapping.subsliceIndices = std::move(subSliceIndices);
|
||||
}
|
||||
|
||||
subSliceIndices.clear();
|
||||
}
|
||||
|
||||
return {sliceCount, subSliceCountTotal};
|
||||
};
|
||||
|
||||
auto [sliceCount, subSliceCount] = processSubSlices(topologyBitmap.dssCompute);
|
||||
|
||||
if (!subSliceCount) {
|
||||
std::tie(sliceCount, subSliceCount) = processSubSlices(topologyBitmap.dssGeometry);
|
||||
}
|
||||
|
||||
topologyMapping.sliceIndices = std::move(sliceIndices);
|
||||
if (sliceCount != 1) {
|
||||
topologyMapping.subsliceIndices.clear();
|
||||
}
|
||||
|
||||
auto bitmapCount = [](const std::span<const uint8_t> &bitmap) {
|
||||
return std::transform_reduce(bitmap.begin(), bitmap.end(), 0, std::plus{}, std::popcount<uint8_t>);
|
||||
};
|
||||
|
||||
topologyInfo.sliceCount = sliceCount;
|
||||
topologyInfo.subSliceCount = subSliceCount;
|
||||
topologyInfo.euCount = bitmapCount(topologyBitmap.eu) * topologyInfo.subSliceCount;
|
||||
topologyInfo.l3BankCount = bitmapCount(topologyBitmap.l3Banks);
|
||||
|
||||
return topologyInfo;
|
||||
}
|
||||
|
||||
TopologyInfo getTopologyInfoMultiTile(const std::span<TopologyBitmap> &topologyBitmap, const TopologyLimits &topologyLimits, TopologyMap &topologyMap) {
|
||||
const auto numTiles = std::ssize(topologyBitmap);
|
||||
|
||||
if (0 == numTiles)
|
||||
return TopologyInfo{};
|
||||
|
||||
std::vector<TopologyInfo> topologyInfos;
|
||||
topologyInfos.reserve(numTiles);
|
||||
|
||||
for (auto i = 0; i < numTiles; ++i) {
|
||||
topologyInfos.push_back(getTopologyInfo(topologyBitmap[i], topologyLimits, topologyMap[i]));
|
||||
}
|
||||
|
||||
TopologyInfo topologyInfo{
|
||||
.sliceCount = std::numeric_limits<decltype(TopologyInfo::sliceCount)>::max(),
|
||||
.subSliceCount = std::numeric_limits<decltype(TopologyInfo::subSliceCount)>::max(),
|
||||
.euCount = std::numeric_limits<decltype(TopologyInfo::euCount)>::max(),
|
||||
.l3BankCount = std::numeric_limits<decltype(TopologyInfo::l3BankCount)>::max(),
|
||||
};
|
||||
|
||||
topologyInfo = std::reduce(topologyInfos.cbegin(), topologyInfos.cend(), topologyInfo, [](const TopologyInfo &topoInfo1, const TopologyInfo &topoInfo2) {
|
||||
return TopologyInfo{
|
||||
.sliceCount = std::min(topoInfo1.sliceCount, topoInfo2.sliceCount),
|
||||
.subSliceCount = std::min(topoInfo1.subSliceCount, topoInfo2.subSliceCount),
|
||||
.euCount = std::min(topoInfo1.euCount, topoInfo2.euCount),
|
||||
.l3BankCount = std::min(topoInfo1.l3BankCount, topoInfo2.l3BankCount),
|
||||
};
|
||||
});
|
||||
|
||||
return topologyInfo;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
40
shared/source/helpers/topology.h
Normal file
40
shared/source/helpers/topology.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/helpers/topology_map.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct TopologyBitmap {
|
||||
std::span<const uint8_t> dssGeometry;
|
||||
std::span<const uint8_t> dssCompute;
|
||||
std::span<const uint8_t> l3Banks;
|
||||
std::span<const uint8_t> eu; // shared by all subslices
|
||||
};
|
||||
|
||||
struct TopologyInfo {
|
||||
int sliceCount;
|
||||
int subSliceCount;
|
||||
int euCount;
|
||||
int l3BankCount;
|
||||
};
|
||||
|
||||
struct TopologyLimits {
|
||||
int maxSlices;
|
||||
int maxSubSlicesPerSlice;
|
||||
int maxEusPerSubSlice;
|
||||
};
|
||||
|
||||
TopologyInfo getTopologyInfo(const TopologyBitmap &topologyBitmap, const TopologyLimits &topologyLimits, TopologyMapping &topologyMapping);
|
||||
TopologyInfo getTopologyInfoMultiTile(const std::span<TopologyBitmap> &topologyBitmap, const TopologyLimits &topologyLimits, TopologyMap &topologyMap);
|
||||
|
||||
} // namespace NEO
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/helpers/topology.h"
|
||||
#include "shared/source/os_interface/linux/drm_buffer_object.h"
|
||||
#include "shared/source/os_interface/linux/drm_neo.h"
|
||||
#include "shared/source/os_interface/linux/engine_info.h"
|
||||
@@ -516,131 +517,65 @@ bool IoctlHelperXe::setGpuCpuTimes(TimeStampData *pGpuCpuTime, OSTime *osTime) {
|
||||
}
|
||||
|
||||
bool IoctlHelperXe::getTopologyDataAndMap(const HardwareInfo &hwInfo, DrmQueryTopologyData &topologyData, TopologyMap &topologyMap) {
|
||||
const auto queryGtTopology = queryData<uint8_t>(DRM_XE_DEVICE_QUERY_GT_TOPOLOGY);
|
||||
|
||||
auto queryGtTopology = queryData<uint8_t>(DRM_XE_DEVICE_QUERY_GT_TOPOLOGY);
|
||||
|
||||
auto fillMask = [](std::vector<std::bitset<8>> &vec, drm_xe_query_topology_mask *topo) {
|
||||
for (uint32_t j = 0; j < topo->num_bytes; j++) {
|
||||
vec.push_back(topo->mask[j]);
|
||||
}
|
||||
};
|
||||
|
||||
StackVec<std::vector<std::bitset<8>>, 2> geomDss;
|
||||
StackVec<std::vector<std::bitset<8>>, 2> computeDss;
|
||||
StackVec<std::vector<std::bitset<8>>, 2> euDss;
|
||||
StackVec<std::vector<std::bitset<8>>, 2> l3Banks;
|
||||
const auto numTiles = tileIdToGtId.size();
|
||||
std::vector<TopologyBitmap> topologyBitmap(numTiles);
|
||||
|
||||
auto topologySize = queryGtTopology.size();
|
||||
auto dataPtr = queryGtTopology.data();
|
||||
|
||||
auto numTiles = tileIdToGtId.size();
|
||||
geomDss.resize(numTiles);
|
||||
computeDss.resize(numTiles);
|
||||
euDss.resize(numTiles);
|
||||
l3Banks.resize(numTiles);
|
||||
bool receivedDssInfo = false;
|
||||
while (topologySize >= sizeof(drm_xe_query_topology_mask)) {
|
||||
drm_xe_query_topology_mask *topo = reinterpret_cast<drm_xe_query_topology_mask *>(dataPtr);
|
||||
const drm_xe_query_topology_mask *topo = reinterpret_cast<const drm_xe_query_topology_mask *>(dataPtr);
|
||||
UNRECOVERABLE_IF(topo == nullptr);
|
||||
|
||||
uint32_t gtId = topo->gt_id;
|
||||
auto tileId = gtIdToTileId[gtId];
|
||||
const auto gtId = topo->gt_id;
|
||||
const auto tileId = gtIdToTileId[gtId];
|
||||
|
||||
if (tileId != invalidIndex) {
|
||||
const auto bytes = std::span<const uint8_t>(topo->mask, topo->num_bytes);
|
||||
|
||||
switch (topo->type) {
|
||||
case DRM_XE_TOPO_DSS_GEOMETRY:
|
||||
fillMask(geomDss[tileId], topo);
|
||||
receivedDssInfo = true;
|
||||
topologyBitmap[tileId].dssGeometry = bytes;
|
||||
break;
|
||||
case DRM_XE_TOPO_DSS_COMPUTE:
|
||||
fillMask(computeDss[tileId], topo);
|
||||
receivedDssInfo = true;
|
||||
topologyBitmap[tileId].dssCompute = bytes;
|
||||
break;
|
||||
case DRM_XE_TOPO_L3_BANK:
|
||||
fillMask(l3Banks[tileId], topo);
|
||||
topologyBitmap[tileId].l3Banks = bytes;
|
||||
break;
|
||||
case DRM_XE_TOPO_EU_PER_DSS:
|
||||
case DRM_XE_TOPO_SIMD16_EU_PER_DSS:
|
||||
fillMask(euDss[tileId], topo);
|
||||
topologyBitmap[tileId].eu = bytes;
|
||||
break;
|
||||
default:
|
||||
xeLog("Unhandle GT Topo type: %d\n", topo->type);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t itemSize = sizeof(drm_xe_query_topology_mask) + topo->num_bytes;
|
||||
const auto itemSize = sizeof(drm_xe_query_topology_mask) + topo->num_bytes;
|
||||
topologySize -= itemSize;
|
||||
dataPtr = ptrOffset(dataPtr, itemSize);
|
||||
}
|
||||
|
||||
int sliceCount = 0;
|
||||
int subSliceCount = 0;
|
||||
int euPerDss = 0;
|
||||
int l3BankCount = 0;
|
||||
uint32_t hwMaxSubSliceCount = hwInfo.gtSystemInfo.MaxSubSlicesSupported;
|
||||
topologyData.maxSlices = hwInfo.gtSystemInfo.MaxSlicesSupported ? hwInfo.gtSystemInfo.MaxSlicesSupported : 1;
|
||||
topologyData.maxSubSlicesPerSlice = hwMaxSubSliceCount / topologyData.maxSlices;
|
||||
topologyData.maxEusPerSubSlice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
const TopologyLimits topologyLimits{
|
||||
.maxSlices = static_cast<int>(hwInfo.gtSystemInfo.MaxSlicesSupported),
|
||||
.maxSubSlicesPerSlice = static_cast<int>(hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported),
|
||||
.maxEusPerSubSlice = static_cast<int>(hwInfo.gtSystemInfo.MaxEuPerSubSlice),
|
||||
};
|
||||
|
||||
for (auto tileId = 0u; tileId < numTiles; tileId++) {
|
||||
const auto topologyInfo = getTopologyInfoMultiTile(topologyBitmap, topologyLimits, topologyMap);
|
||||
|
||||
int subSliceCountPerTile = 0;
|
||||
topologyData.sliceCount = topologyInfo.sliceCount;
|
||||
topologyData.subSliceCount = topologyInfo.subSliceCount;
|
||||
topologyData.numL3Banks = topologyInfo.l3BankCount;
|
||||
topologyData.euCount = topologyInfo.euCount;
|
||||
topologyData.maxSlices = topologyLimits.maxSlices;
|
||||
topologyData.maxSubSlicesPerSlice = topologyLimits.maxSubSlicesPerSlice;
|
||||
topologyData.maxEusPerSubSlice = topologyLimits.maxEusPerSubSlice;
|
||||
|
||||
std::vector<int> sliceIndices;
|
||||
std::vector<int> subSliceIndices;
|
||||
|
||||
int previouslyEnabledSlice = -1;
|
||||
|
||||
auto processSubSliceInfo = [&](const std::vector<std::bitset<8>> &subSliceInfo) -> void {
|
||||
for (auto subSliceId = 0u; subSliceId < std::min(hwMaxSubSliceCount, static_cast<uint32_t>(subSliceInfo.size() * 8)); subSliceId++) {
|
||||
auto byte = subSliceId / 8;
|
||||
auto bit = subSliceId & 0b111;
|
||||
int sliceId = static_cast<int>(subSliceId / topologyData.maxSubSlicesPerSlice);
|
||||
int subSliceIdRelative = static_cast<int>(subSliceId % topologyData.maxSubSlicesPerSlice);
|
||||
if (subSliceInfo[byte].test(bit)) {
|
||||
subSliceIndices.push_back(subSliceIdRelative);
|
||||
subSliceCountPerTile++;
|
||||
if (sliceId != previouslyEnabledSlice) {
|
||||
previouslyEnabledSlice = sliceId;
|
||||
sliceIndices.push_back(sliceId);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
processSubSliceInfo(computeDss[tileId]);
|
||||
|
||||
if (subSliceCountPerTile == 0) {
|
||||
processSubSliceInfo(geomDss[tileId]);
|
||||
}
|
||||
|
||||
topologyMap[tileId].sliceIndices = std::move(sliceIndices);
|
||||
if (topologyMap[tileId].sliceIndices.size() < 2u) {
|
||||
topologyMap[tileId].subsliceIndices = std::move(subSliceIndices);
|
||||
}
|
||||
int sliceCountPerTile = static_cast<int>(topologyMap[tileId].sliceIndices.size());
|
||||
|
||||
int euPerDssPerTile = 0;
|
||||
for (auto byte = 0u; byte < euDss[tileId].size(); byte++) {
|
||||
euPerDssPerTile += euDss[tileId][byte].count();
|
||||
}
|
||||
|
||||
int l3BankCountPerTile = 0;
|
||||
for (auto byte = 0u; byte < l3Banks[tileId].size(); byte++) {
|
||||
l3BankCountPerTile += l3Banks[tileId][byte].count();
|
||||
}
|
||||
|
||||
// pick smallest config
|
||||
sliceCount = (sliceCount == 0) ? sliceCountPerTile : std::min(sliceCount, sliceCountPerTile);
|
||||
subSliceCount = (subSliceCount == 0) ? subSliceCountPerTile : std::min(subSliceCount, subSliceCountPerTile);
|
||||
euPerDss = (euPerDss == 0) ? euPerDssPerTile : std::min(euPerDss, euPerDssPerTile);
|
||||
l3BankCount = (l3BankCount == 0) ? l3BankCountPerTile : std::min(l3BankCount, l3BankCountPerTile);
|
||||
}
|
||||
|
||||
topologyData.sliceCount = sliceCount;
|
||||
topologyData.subSliceCount = subSliceCount;
|
||||
topologyData.euCount = subSliceCount * euPerDss;
|
||||
topologyData.numL3Banks = l3BankCount;
|
||||
return receivedDssInfo;
|
||||
return topologyInfo.subSliceCount != 0;
|
||||
}
|
||||
|
||||
void IoctlHelperXe::updateBindInfo(uint64_t userPtr) {
|
||||
|
||||
Reference in New Issue
Block a user