diff --git a/level_zero/core/source/debugger/debugger_l0.cpp b/level_zero/core/source/debugger/debugger_l0.cpp index e23a66577d..ffc20c0265 100644 --- a/level_zero/core/source/debugger/debugger_l0.cpp +++ b/level_zero/core/source/debugger/debugger_l0.cpp @@ -167,4 +167,37 @@ void DebuggerL0::getAttentionBitmaskForThread(uint32_t slice, uint32_t subslice, } } +std::vector DebuggerL0::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) { + const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; + const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); + const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; + const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; + const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu; + + UNRECOVERABLE_IF(bytesPerEu != 1); + std::vector threads; + + for (uint32_t slice = 0; slice < hwInfo.gtSystemInfo.MaxSlicesSupported; slice++) { + for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) { + for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) { + size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu; + + if (offset >= bitmaskSize) { + return threads; + } + + std::bitset<8> bits(bitmask[offset]); + for (uint32_t i = 0; i < 8; i++) { + if (bits.test(i)) { + threads.emplace_back(ze_device_thread_t{slice, subslice, eu, i}); + } + } + } + } + } + + return threads; +} + } // namespace L0 diff --git a/level_zero/core/source/debugger/debugger_l0.h b/level_zero/core/source/debugger/debugger_l0.h index ea9d3eea62..2dfbe88ee9 100644 --- a/level_zero/core/source/debugger/debugger_l0.h +++ b/level_zero/core/source/debugger/debugger_l0.h @@ -10,6 +10,8 @@ #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/memory_manager/memory_manager.h" +#include + #include #include @@ -89,6 +91,7 @@ class DebuggerL0 : public NEO::Debugger, NEO::NonCopyableOrMovableClass { virtual void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) = 0; static void getAttentionBitmaskForThread(uint32_t slice, uint32_t subslice, uint32_t eu, uint32_t thread, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize); + static std::vector getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize); protected: static bool isAnyTrackedAddressChanged(SbaAddresses sba) { diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger.cpp index 457f65d4fe..d261381712 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger.cpp @@ -986,5 +986,117 @@ TEST(DebuggerL0, givenAllSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrec EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size)); } +TEST(DebuggerL0, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned) { + auto hwInfo = *NEO::defaultHwInfo.get(); + std::unique_ptr bitmask; + size_t size = 0; + + uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0; + + uint32_t threadID = 3; + DebuggerL0::getAttentionBitmaskForThread(0, subsliceID, 0, threadID, hwInfo, bitmask, size); + + auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size); + + ASSERT_EQ(1u, threads.size()); + + EXPECT_EQ(0u, threads[0].slice); + EXPECT_EQ(subsliceID, threads[0].subslice); + EXPECT_EQ(0u, threads[0].eu); + EXPECT_EQ(threadID, threads[0].thread); +} + +TEST(DebuggerL0, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned) { + auto hwInfo = *NEO::defaultHwInfo.get(); + std::unique_ptr bitmask; + size_t size = 0; + + uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + uint32_t threadID = 3; + DebuggerL0::getAttentionBitmaskForThread(0, UINT32_MAX, 0, threadID, hwInfo, bitmask, size); + + auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size); + + ASSERT_EQ(subslicesPerSlice, threads.size()); + + for (uint32_t i = 0; i < subslicesPerSlice; i++) { + EXPECT_EQ(0u, threads[i].slice); + EXPECT_EQ(i, threads[i].subslice); + EXPECT_EQ(0u, threads[i].eu); + EXPECT_EQ(threadID, threads[i].thread); + } +} + +TEST(DebuggerL0, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned) { + auto hwInfo = *NEO::defaultHwInfo.get(); + std::unique_ptr bitmask; + size_t size = 0; + + uint32_t maxEUs = hwInfo.gtSystemInfo.MaxEuPerSubSlice; + uint32_t threadID = 3; + DebuggerL0::getAttentionBitmaskForThread(0, 0, UINT32_MAX, threadID, hwInfo, bitmask, size); + + auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size); + + ASSERT_EQ(maxEUs, threads.size()); + + for (uint32_t i = 0; i < maxEUs; i++) { + EXPECT_EQ(0u, threads[i].slice); + EXPECT_EQ(0u, threads[i].subslice); + EXPECT_EQ(i, threads[i].eu); + EXPECT_EQ(threadID, threads[i].thread); + } +} + +TEST(DebuggerL0, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned) { + auto hwInfo = *NEO::defaultHwInfo.get(); + uint8_t data[2] = {0x0f, 0x0f}; + auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, data, sizeof(data)); + + ASSERT_EQ(8u, threads.size()); + + ze_device_thread_t expectedThreads[] = { + {0, 0, 0, 0}, + {0, 0, 0, 1}, + {0, 0, 0, 2}, + {0, 0, 0, 3}, + {0, 0, 1, 0}, + {0, 0, 1, 1}, + {0, 0, 1, 2}, + {0, 0, 1, 3}}; + + for (uint32_t i = 0; i < 8u; i++) { + EXPECT_EQ(expectedThreads[i].slice, threads[i].slice); + EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice); + EXPECT_EQ(expectedThreads[i].eu, threads[i].eu); + EXPECT_EQ(expectedThreads[i].thread, threads[i].thread); + } +} + +TEST(DebuggerL0, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned) { + auto hwInfo = *NEO::defaultHwInfo.get(); + std::unique_ptr bitmask; + size_t size = 0; + + uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + uint32_t threadID = 3; + DebuggerL0::getAttentionBitmaskForThread(0, UINT32_MAX, 0, threadID, hwInfo, bitmask, size); + + auto bitmaskSizePerSingleSubslice = size / hwInfo.gtSystemInfo.MaxSlicesSupported / subslicesPerSlice; + auto numOfActiveSubslices = ((subslicesPerSlice + 1) / 2); + + auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), bitmaskSizePerSingleSubslice * numOfActiveSubslices); + + ASSERT_EQ(numOfActiveSubslices, threads.size()); + + for (uint32_t i = 0; i < numOfActiveSubslices; i++) { + EXPECT_EQ(0u, threads[i].slice); + EXPECT_EQ(i, threads[i].subslice); + EXPECT_EQ(0u, threads[i].eu); + EXPECT_EQ(threadID, threads[i].thread); + } +} + } // namespace ult } // namespace L0