Add method translating attention bitmask to threads

Related-To: NEO-4558 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
2025-09-10 12:53:42 +08:00 · 2021-06-25 17:43:56 +02:00
parent 1d3d32cf39
commit 8526521f63
3 changed files with 148 additions and 0 deletions
--- a/level_zero/core/source/debugger/debugger_l0.cpp
+++ b/level_zero/core/source/debugger/debugger_l0.cpp
@ -167,4 +167,37 @@ void DebuggerL0::getAttentionBitmaskForThread(uint32_t slice, uint32_t subslice,
    }
 }

+std::vector<ze_device_thread_t> DebuggerL0::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) {
+    const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
+    const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
+    const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
+    const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
+    const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
+
+    UNRECOVERABLE_IF(bytesPerEu != 1);
+    std::vector<ze_device_thread_t> threads;
+
+    for (uint32_t slice = 0; slice < hwInfo.gtSystemInfo.MaxSlicesSupported; slice++) {
+        for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
+            for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
+                size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
+
+                if (offset >= bitmaskSize) {
+                    return threads;
+                }
+
+                std::bitset<8> bits(bitmask[offset]);
+                for (uint32_t i = 0; i < 8; i++) {
+                    if (bits.test(i)) {
+                        threads.emplace_back(ze_device_thread_t{slice, subslice, eu, i});
+                    }
+                }
+            }
+        }
+    }
+
+    return threads;
+}
+
 } // namespace L0
--- a/level_zero/core/source/debugger/debugger_l0.h
+++ b/level_zero/core/source/debugger/debugger_l0.h
@ -10,6 +10,8 @@
 #include "shared/source/helpers/non_copyable_or_moveable.h"
 #include "shared/source/memory_manager/memory_manager.h"

+#include <level_zero/ze_api.h>
+
 #include <memory>
 #include <unordered_map>

@ -89,6 +91,7 @@ class DebuggerL0 : public NEO::Debugger, NEO::NonCopyableOrMovableClass {
    virtual void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) = 0;

    static void getAttentionBitmaskForThread(uint32_t slice, uint32_t subslice, uint32_t eu, uint32_t thread, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize);
+    static std::vector<ze_device_thread_t> getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize);

  protected:
    static bool isAnyTrackedAddressChanged(SbaAddresses sba) {
--- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger.cpp
+++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger.cpp
@ -986,5 +986,117 @@ TEST(DebuggerL0, givenAllSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrec
    EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
 }

+TEST(DebuggerL0, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned) {
+    auto hwInfo = *NEO::defaultHwInfo.get();
+    std::unique_ptr<uint8_t[]> bitmask;
+    size_t size = 0;
+
+    uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0;
+
+    uint32_t threadID = 3;
+    DebuggerL0::getAttentionBitmaskForThread(0, subsliceID, 0, threadID, hwInfo, bitmask, size);
+
+    auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size);
+
+    ASSERT_EQ(1u, threads.size());
+
+    EXPECT_EQ(0u, threads[0].slice);
+    EXPECT_EQ(subsliceID, threads[0].subslice);
+    EXPECT_EQ(0u, threads[0].eu);
+    EXPECT_EQ(threadID, threads[0].thread);
+}
+
+TEST(DebuggerL0, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned) {
+    auto hwInfo = *NEO::defaultHwInfo.get();
+    std::unique_ptr<uint8_t[]> bitmask;
+    size_t size = 0;
+
+    uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    uint32_t threadID = 3;
+    DebuggerL0::getAttentionBitmaskForThread(0, UINT32_MAX, 0, threadID, hwInfo, bitmask, size);
+
+    auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size);
+
+    ASSERT_EQ(subslicesPerSlice, threads.size());
+
+    for (uint32_t i = 0; i < subslicesPerSlice; i++) {
+        EXPECT_EQ(0u, threads[i].slice);
+        EXPECT_EQ(i, threads[i].subslice);
+        EXPECT_EQ(0u, threads[i].eu);
+        EXPECT_EQ(threadID, threads[i].thread);
+    }
+}
+
+TEST(DebuggerL0, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned) {
+    auto hwInfo = *NEO::defaultHwInfo.get();
+    std::unique_ptr<uint8_t[]> bitmask;
+    size_t size = 0;
+
+    uint32_t maxEUs = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
+    uint32_t threadID = 3;
+    DebuggerL0::getAttentionBitmaskForThread(0, 0, UINT32_MAX, threadID, hwInfo, bitmask, size);
+
+    auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size);
+
+    ASSERT_EQ(maxEUs, threads.size());
+
+    for (uint32_t i = 0; i < maxEUs; i++) {
+        EXPECT_EQ(0u, threads[i].slice);
+        EXPECT_EQ(0u, threads[i].subslice);
+        EXPECT_EQ(i, threads[i].eu);
+        EXPECT_EQ(threadID, threads[i].thread);
+    }
+}
+
+TEST(DebuggerL0, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned) {
+    auto hwInfo = *NEO::defaultHwInfo.get();
+    uint8_t data[2] = {0x0f, 0x0f};
+    auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, data, sizeof(data));
+
+    ASSERT_EQ(8u, threads.size());
+
+    ze_device_thread_t expectedThreads[] = {
+        {0, 0, 0, 0},
+        {0, 0, 0, 1},
+        {0, 0, 0, 2},
+        {0, 0, 0, 3},
+        {0, 0, 1, 0},
+        {0, 0, 1, 1},
+        {0, 0, 1, 2},
+        {0, 0, 1, 3}};
+
+    for (uint32_t i = 0; i < 8u; i++) {
+        EXPECT_EQ(expectedThreads[i].slice, threads[i].slice);
+        EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice);
+        EXPECT_EQ(expectedThreads[i].eu, threads[i].eu);
+        EXPECT_EQ(expectedThreads[i].thread, threads[i].thread);
+    }
+}
+
+TEST(DebuggerL0, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned) {
+    auto hwInfo = *NEO::defaultHwInfo.get();
+    std::unique_ptr<uint8_t[]> bitmask;
+    size_t size = 0;
+
+    uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    uint32_t threadID = 3;
+    DebuggerL0::getAttentionBitmaskForThread(0, UINT32_MAX, 0, threadID, hwInfo, bitmask, size);
+
+    auto bitmaskSizePerSingleSubslice = size / hwInfo.gtSystemInfo.MaxSlicesSupported / subslicesPerSlice;
+    auto numOfActiveSubslices = ((subslicesPerSlice + 1) / 2);
+
+    auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), bitmaskSizePerSingleSubslice * numOfActiveSubslices);
+
+    ASSERT_EQ(numOfActiveSubslices, threads.size());
+
+    for (uint32_t i = 0; i < numOfActiveSubslices; i++) {
+        EXPECT_EQ(0u, threads[i].slice);
+        EXPECT_EQ(i, threads[i].subslice);
+        EXPECT_EQ(0u, threads[i].eu);
+        EXPECT_EQ(threadID, threads[i].thread);
+    }
+}
+
 } // namespace ult
 } // namespace L0