mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Add method translating attention bitmask to threads
Related-To: NEO-4558 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
1d3d32cf39
commit
8526521f63
@ -167,4 +167,37 @@ void DebuggerL0::getAttentionBitmaskForThread(uint32_t slice, uint32_t subslice,
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ze_device_thread_t> DebuggerL0::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
|
||||
|
||||
UNRECOVERABLE_IF(bytesPerEu != 1);
|
||||
std::vector<ze_device_thread_t> threads;
|
||||
|
||||
for (uint32_t slice = 0; slice < hwInfo.gtSystemInfo.MaxSlicesSupported; slice++) {
|
||||
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
|
||||
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
|
||||
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
|
||||
|
||||
if (offset >= bitmaskSize) {
|
||||
return threads;
|
||||
}
|
||||
|
||||
std::bitset<8> bits(bitmask[offset]);
|
||||
for (uint32_t i = 0; i < 8; i++) {
|
||||
if (bits.test(i)) {
|
||||
threads.emplace_back(ze_device_thread_t{slice, subslice, eu, i});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return threads;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
@ -89,6 +91,7 @@ class DebuggerL0 : public NEO::Debugger, NEO::NonCopyableOrMovableClass {
|
||||
virtual void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) = 0;
|
||||
|
||||
static void getAttentionBitmaskForThread(uint32_t slice, uint32_t subslice, uint32_t eu, uint32_t thread, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize);
|
||||
static std::vector<ze_device_thread_t> getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize);
|
||||
|
||||
protected:
|
||||
static bool isAnyTrackedAddressChanged(SbaAddresses sba) {
|
||||
|
@ -986,5 +986,117 @@ TEST(DebuggerL0, givenAllSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrec
|
||||
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
|
||||
}
|
||||
|
||||
TEST(DebuggerL0, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
std::unique_ptr<uint8_t[]> bitmask;
|
||||
size_t size = 0;
|
||||
|
||||
uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0;
|
||||
|
||||
uint32_t threadID = 3;
|
||||
DebuggerL0::getAttentionBitmaskForThread(0, subsliceID, 0, threadID, hwInfo, bitmask, size);
|
||||
|
||||
auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size);
|
||||
|
||||
ASSERT_EQ(1u, threads.size());
|
||||
|
||||
EXPECT_EQ(0u, threads[0].slice);
|
||||
EXPECT_EQ(subsliceID, threads[0].subslice);
|
||||
EXPECT_EQ(0u, threads[0].eu);
|
||||
EXPECT_EQ(threadID, threads[0].thread);
|
||||
}
|
||||
|
||||
TEST(DebuggerL0, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
std::unique_ptr<uint8_t[]> bitmask;
|
||||
size_t size = 0;
|
||||
|
||||
uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
uint32_t threadID = 3;
|
||||
DebuggerL0::getAttentionBitmaskForThread(0, UINT32_MAX, 0, threadID, hwInfo, bitmask, size);
|
||||
|
||||
auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size);
|
||||
|
||||
ASSERT_EQ(subslicesPerSlice, threads.size());
|
||||
|
||||
for (uint32_t i = 0; i < subslicesPerSlice; i++) {
|
||||
EXPECT_EQ(0u, threads[i].slice);
|
||||
EXPECT_EQ(i, threads[i].subslice);
|
||||
EXPECT_EQ(0u, threads[i].eu);
|
||||
EXPECT_EQ(threadID, threads[i].thread);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DebuggerL0, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
std::unique_ptr<uint8_t[]> bitmask;
|
||||
size_t size = 0;
|
||||
|
||||
uint32_t maxEUs = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
uint32_t threadID = 3;
|
||||
DebuggerL0::getAttentionBitmaskForThread(0, 0, UINT32_MAX, threadID, hwInfo, bitmask, size);
|
||||
|
||||
auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size);
|
||||
|
||||
ASSERT_EQ(maxEUs, threads.size());
|
||||
|
||||
for (uint32_t i = 0; i < maxEUs; i++) {
|
||||
EXPECT_EQ(0u, threads[i].slice);
|
||||
EXPECT_EQ(0u, threads[i].subslice);
|
||||
EXPECT_EQ(i, threads[i].eu);
|
||||
EXPECT_EQ(threadID, threads[i].thread);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DebuggerL0, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
uint8_t data[2] = {0x0f, 0x0f};
|
||||
auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, data, sizeof(data));
|
||||
|
||||
ASSERT_EQ(8u, threads.size());
|
||||
|
||||
ze_device_thread_t expectedThreads[] = {
|
||||
{0, 0, 0, 0},
|
||||
{0, 0, 0, 1},
|
||||
{0, 0, 0, 2},
|
||||
{0, 0, 0, 3},
|
||||
{0, 0, 1, 0},
|
||||
{0, 0, 1, 1},
|
||||
{0, 0, 1, 2},
|
||||
{0, 0, 1, 3}};
|
||||
|
||||
for (uint32_t i = 0; i < 8u; i++) {
|
||||
EXPECT_EQ(expectedThreads[i].slice, threads[i].slice);
|
||||
EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice);
|
||||
EXPECT_EQ(expectedThreads[i].eu, threads[i].eu);
|
||||
EXPECT_EQ(expectedThreads[i].thread, threads[i].thread);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DebuggerL0, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
std::unique_ptr<uint8_t[]> bitmask;
|
||||
size_t size = 0;
|
||||
|
||||
uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
uint32_t threadID = 3;
|
||||
DebuggerL0::getAttentionBitmaskForThread(0, UINT32_MAX, 0, threadID, hwInfo, bitmask, size);
|
||||
|
||||
auto bitmaskSizePerSingleSubslice = size / hwInfo.gtSystemInfo.MaxSlicesSupported / subslicesPerSlice;
|
||||
auto numOfActiveSubslices = ((subslicesPerSlice + 1) / 2);
|
||||
|
||||
auto threads = DebuggerL0::getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), bitmaskSizePerSingleSubslice * numOfActiveSubslices);
|
||||
|
||||
ASSERT_EQ(numOfActiveSubslices, threads.size());
|
||||
|
||||
for (uint32_t i = 0; i < numOfActiveSubslices; i++) {
|
||||
EXPECT_EQ(0u, threads[i].slice);
|
||||
EXPECT_EQ(i, threads[i].subslice);
|
||||
EXPECT_EQ(0u, threads[i].eu);
|
||||
EXPECT_EQ(threadID, threads[i].thread);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
Reference in New Issue
Block a user