Add method translating attention bitmask to threads

Related-To: NEO-4558

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2021-06-25 17:43:56 +02:00
committed by Compute-Runtime-Automation
parent 1d3d32cf39
commit 8526521f63
3 changed files with 148 additions and 0 deletions

View File

@ -167,4 +167,37 @@ void DebuggerL0::getAttentionBitmaskForThread(uint32_t slice, uint32_t subslice,
}
}
std::vector<ze_device_thread_t> DebuggerL0::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
UNRECOVERABLE_IF(bytesPerEu != 1);
std::vector<ze_device_thread_t> threads;
for (uint32_t slice = 0; slice < hwInfo.gtSystemInfo.MaxSlicesSupported; slice++) {
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
if (offset >= bitmaskSize) {
return threads;
}
std::bitset<8> bits(bitmask[offset]);
for (uint32_t i = 0; i < 8; i++) {
if (bits.test(i)) {
threads.emplace_back(ze_device_thread_t{slice, subslice, eu, i});
}
}
}
}
}
return threads;
}
} // namespace L0

View File

@ -10,6 +10,8 @@
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/memory_manager/memory_manager.h"
#include <level_zero/ze_api.h>
#include <memory>
#include <unordered_map>
@ -89,6 +91,7 @@ class DebuggerL0 : public NEO::Debugger, NEO::NonCopyableOrMovableClass {
virtual void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) = 0;
static void getAttentionBitmaskForThread(uint32_t slice, uint32_t subslice, uint32_t eu, uint32_t thread, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize);
static std::vector<ze_device_thread_t> getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize);
protected:
static bool isAnyTrackedAddressChanged(SbaAddresses sba) {

View File

@ -986,5 +986,117 @@ TEST(DebuggerL0, givenAllSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrec
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
}
TEST(DebuggerL0, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned) {
auto hwInfo = *NEO::defaultHwInfo.get();
std::unique_ptr<uint8_t[]> bitmask;
size_t size = 0;
uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0;
uint32_t threadID = 3;
DebuggerL0::getAttentionBitmaskForThread(0, subsliceID, 0, threadID, hwInfo, bitmask, size);
auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size);
ASSERT_EQ(1u, threads.size());
EXPECT_EQ(0u, threads[0].slice);
EXPECT_EQ(subsliceID, threads[0].subslice);
EXPECT_EQ(0u, threads[0].eu);
EXPECT_EQ(threadID, threads[0].thread);
}
TEST(DebuggerL0, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned) {
auto hwInfo = *NEO::defaultHwInfo.get();
std::unique_ptr<uint8_t[]> bitmask;
size_t size = 0;
uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
uint32_t threadID = 3;
DebuggerL0::getAttentionBitmaskForThread(0, UINT32_MAX, 0, threadID, hwInfo, bitmask, size);
auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size);
ASSERT_EQ(subslicesPerSlice, threads.size());
for (uint32_t i = 0; i < subslicesPerSlice; i++) {
EXPECT_EQ(0u, threads[i].slice);
EXPECT_EQ(i, threads[i].subslice);
EXPECT_EQ(0u, threads[i].eu);
EXPECT_EQ(threadID, threads[i].thread);
}
}
TEST(DebuggerL0, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned) {
auto hwInfo = *NEO::defaultHwInfo.get();
std::unique_ptr<uint8_t[]> bitmask;
size_t size = 0;
uint32_t maxEUs = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
uint32_t threadID = 3;
DebuggerL0::getAttentionBitmaskForThread(0, 0, UINT32_MAX, threadID, hwInfo, bitmask, size);
auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size);
ASSERT_EQ(maxEUs, threads.size());
for (uint32_t i = 0; i < maxEUs; i++) {
EXPECT_EQ(0u, threads[i].slice);
EXPECT_EQ(0u, threads[i].subslice);
EXPECT_EQ(i, threads[i].eu);
EXPECT_EQ(threadID, threads[i].thread);
}
}
TEST(DebuggerL0, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned) {
auto hwInfo = *NEO::defaultHwInfo.get();
uint8_t data[2] = {0x0f, 0x0f};
auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, data, sizeof(data));
ASSERT_EQ(8u, threads.size());
ze_device_thread_t expectedThreads[] = {
{0, 0, 0, 0},
{0, 0, 0, 1},
{0, 0, 0, 2},
{0, 0, 0, 3},
{0, 0, 1, 0},
{0, 0, 1, 1},
{0, 0, 1, 2},
{0, 0, 1, 3}};
for (uint32_t i = 0; i < 8u; i++) {
EXPECT_EQ(expectedThreads[i].slice, threads[i].slice);
EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice);
EXPECT_EQ(expectedThreads[i].eu, threads[i].eu);
EXPECT_EQ(expectedThreads[i].thread, threads[i].thread);
}
}
TEST(DebuggerL0, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned) {
auto hwInfo = *NEO::defaultHwInfo.get();
std::unique_ptr<uint8_t[]> bitmask;
size_t size = 0;
uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
uint32_t threadID = 3;
DebuggerL0::getAttentionBitmaskForThread(0, UINT32_MAX, 0, threadID, hwInfo, bitmask, size);
auto bitmaskSizePerSingleSubslice = size / hwInfo.gtSystemInfo.MaxSlicesSupported / subslicesPerSlice;
auto numOfActiveSubslices = ((subslicesPerSlice + 1) / 2);
auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), bitmaskSizePerSingleSubslice * numOfActiveSubslices);
ASSERT_EQ(numOfActiveSubslices, threads.size());
for (uint32_t i = 0; i < numOfActiveSubslices; i++) {
EXPECT_EQ(0u, threads[i].slice);
EXPECT_EQ(i, threads[i].subslice);
EXPECT_EQ(0u, threads[i].eu);
EXPECT_EQ(threadID, threads[i].thread);
}
}
} // namespace ult
} // namespace L0