Add method to generate attention bitmask

Related-To: NEO-4558

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2021-06-24 13:19:25 +00:00
committed by Compute-Runtime-Automation
parent 6035607718
commit bef4ca5cce
3 changed files with 273 additions and 0 deletions

View File

@@ -106,4 +106,65 @@ void DebuggerL0::captureStateBaseAddress(NEO::CommandContainer &container, SbaAd
}
}
void DebuggerL0::getAttentionBitmaskForThread(uint32_t slice, uint32_t subslice, uint32_t eu, uint32_t thread, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
bitmaskSize = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * bytesPerEu;
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
int threadValue = 0xff;
if (numThreadsPerEu == 7) {
threadValue = 0x7f;
}
if (slice == UINT32_MAX && subslice == UINT32_MAX && eu == UINT32_MAX && thread == UINT32_MAX) {
memset(bitmask.get(), threadValue, bitmaskSize);
return;
}
for (uint32_t sliceID = 0; sliceID < hwInfo.gtSystemInfo.MaxSlicesSupported; sliceID++) {
if (slice != UINT32_MAX) {
sliceID = slice;
}
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * sliceID);
for (uint32_t subsliceID = 0; subsliceID < numSubslicesPerSlice; subsliceID++) {
if (subslice != UINT32_MAX) {
subsliceID = subslice;
}
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * subsliceID);
for (uint32_t euID = 0; euID < numEuPerSubslice; euID++) {
if (eu != UINT32_MAX) {
euID = eu;
}
uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * euID);
if (thread == UINT32_MAX) {
*euData = threadValue;
} else {
UNRECOVERABLE_IF(thread > 7);
*euData = (1 << thread);
}
UNRECOVERABLE_IF(numThreadsPerEu > 8);
if (eu != UINT32_MAX) {
break;
}
}
if (subslice != UINT32_MAX) {
break;
}
}
if (slice != UINT32_MAX) {
break;
}
}
}
} // namespace L0

View File

@@ -88,6 +88,8 @@ class DebuggerL0 : public NEO::Debugger, NEO::NonCopyableOrMovableClass {
virtual size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) = 0;
virtual void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) = 0;
static void getAttentionBitmaskForThread(uint32_t slice, uint32_t subslice, uint32_t eu, uint32_t thread, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize);
protected:
static bool isAnyTrackedAddressChanged(SbaAddresses sba) {
return sba.GeneralStateBaseAddress != 0 ||

View File

@@ -21,6 +21,8 @@
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h"
#include <bitset>
namespace L0 {
namespace ult {
@@ -776,5 +778,213 @@ TEST(Debugger, givenNonLegacyDebuggerWhenInitializingDeviceCapsThenUnrecoverable
EXPECT_THROW(NEO::MockDevice::create<NEO::MockDevice>(executionEnvironment, 0u), std::exception);
}
static void printAttentionBitmask(uint8_t *expected, uint8_t *actual, uint32_t maxSlices, uint32_t maxSubSlicesPerSlice, uint32_t maxEuPerSubslice, uint32_t threadsPerEu, bool printBitmask = false) {
auto bytesPerThread = threadsPerEu > 8 ? 2u : 1u;
auto bytesPerSlice = maxSubSlicesPerSlice * maxEuPerSubslice * bytesPerThread;
auto bytesPerSubSlice = maxEuPerSubslice * bytesPerThread;
for (uint32_t slice = 0; slice < maxSlices; slice++) {
for (uint32_t subslice = 0; subslice < maxSubSlicesPerSlice; subslice++) {
for (uint32_t eu = 0; eu < maxEuPerSubslice; eu++) {
for (uint32_t byte = 0; byte < bytesPerThread; byte++) {
if (printBitmask) {
std::bitset<8> bits(actual[slice * bytesPerSlice + subslice * bytesPerSubSlice + eu * bytesPerThread + byte]);
std::cout << " slice = " << slice << " subslice = " << subslice << " eu = " << eu << " threads bitmask = " << bits << "\n";
}
if (expected[slice * bytesPerSlice + subslice * bytesPerSubSlice + eu * bytesPerThread + byte] !=
actual[slice * bytesPerSlice + subslice * bytesPerSubSlice + eu * bytesPerThread + byte]) {
std::bitset<8> bits(actual[slice * bytesPerSlice + subslice * bytesPerSubSlice + eu * bytesPerThread + byte]);
std::bitset<8> bitsExpected(expected[slice * bytesPerSlice + subslice * bytesPerSubSlice + eu * bytesPerThread + byte]);
ASSERT_FALSE(true) << " got: slice = " << slice << " subslice = " << subslice << " eu = " << eu << " threads bitmask = " << bits << "\n"
<< " expected: slice = " << slice << " subslice = " << subslice << " eu = " << eu << " threads bitmask = " << bitsExpected << "\n";
;
}
}
}
}
}
if (printBitmask) {
std::cout << "\n\n";
}
}
TEST(DebuggerL0, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned) {
auto hwInfo = *NEO::defaultHwInfo.get();
std::unique_ptr<uint8_t[]> bitmask;
size_t size = 0;
uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
uint32_t subslice = subslicesPerSlice > 1 ? subslicesPerSlice - 1 : 0;
const auto threadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const auto bytesPerEu = threadsPerEu <= 8 ? 1 : 2;
const auto threadsSizePerSubSlice = hwInfo.gtSystemInfo.MaxEuPerSubSlice * bytesPerEu;
const auto threadsSizePerSlice = threadsSizePerSubSlice * subslicesPerSlice;
DebuggerL0::getAttentionBitmaskForThread(0, 0, 0, 6, hwInfo, bitmask, size);
auto expectedBitmask = std::make_unique<uint8_t[]>(size);
uint8_t *data = nullptr;
memset(expectedBitmask.get(), 0, size);
auto returnedBitmask = bitmask.get();
EXPECT_EQ(uint8_t(1u << 6), returnedBitmask[0]);
DebuggerL0::getAttentionBitmaskForThread(0, 0, 1, 3, hwInfo, bitmask, size);
returnedBitmask = bitmask.get();
returnedBitmask += bytesPerEu;
EXPECT_EQ(uint8_t(1u << 3), returnedBitmask[0]);
DebuggerL0::getAttentionBitmaskForThread(0, subslice, 3, 6, hwInfo, bitmask, size);
data = expectedBitmask.get();
memset(expectedBitmask.get(), 0, size);
data = ptrOffset(data, subslice * threadsSizePerSubSlice);
data = ptrOffset(data, 3 * bytesPerEu);
data[0] = 1 << 6;
printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu);
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
DebuggerL0::getAttentionBitmaskForThread(hwInfo.gtSystemInfo.MaxSlicesSupported - 1, subslice, 3, 6, hwInfo, bitmask, size);
data = expectedBitmask.get();
memset(expectedBitmask.get(), 0, size);
data = ptrOffset(data, (hwInfo.gtSystemInfo.MaxSlicesSupported - 1) * threadsSizePerSlice);
data = ptrOffset(data, subslice * threadsSizePerSubSlice);
data = ptrOffset(data, 3 * bytesPerEu);
data[0] = 1 << 6;
printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu);
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
DebuggerL0::getAttentionBitmaskForThread(hwInfo.gtSystemInfo.MaxSlicesSupported - 1, subslice, 5, 0, hwInfo, bitmask, size);
data = expectedBitmask.get();
memset(expectedBitmask.get(), 0, size);
data = ptrOffset(data, (hwInfo.gtSystemInfo.MaxSlicesSupported - 1) * threadsSizePerSlice);
data = ptrOffset(data, subslice * threadsSizePerSubSlice);
data = ptrOffset(data, 5 * bytesPerEu);
data[0] = 1;
printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu);
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
}
TEST(DebuggerL0, givenAllSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned) {
auto hwInfo = *NEO::defaultHwInfo.get();
std::unique_ptr<uint8_t[]> bitmask;
size_t size = 0;
uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0;
const auto threadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const auto bytesPerEu = threadsPerEu <= 8 ? 1u : 2u;
const auto threadsSizePerSubSlice = hwInfo.gtSystemInfo.MaxEuPerSubSlice * bytesPerEu;
const auto threadsSizePerSlice = threadsSizePerSubSlice * subslicesPerSlice;
const auto threadID = threadsPerEu - 1;
// ALL slices
DebuggerL0::getAttentionBitmaskForThread(UINT32_MAX, subsliceID, 0, threadID, hwInfo, bitmask, size);
auto expectedBitmask = std::make_unique<uint8_t[]>(size);
uint8_t *data = nullptr;
memset(expectedBitmask.get(), 0, size);
for (uint32_t i = 0; i < hwInfo.gtSystemInfo.MaxSlicesSupported; i++) {
data = ptrOffset(expectedBitmask.get(), i * threadsSizePerSlice);
data = ptrOffset(data, subsliceID * threadsSizePerSubSlice);
data[0] = 1 << threadID;
}
printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu);
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
// ALL Subslices
DebuggerL0::getAttentionBitmaskForThread(hwInfo.gtSystemInfo.MaxSlicesSupported - 1, UINT32_MAX, 0, threadID, hwInfo, bitmask, size);
memset(expectedBitmask.get(), 0, size);
data = ptrOffset(expectedBitmask.get(), (hwInfo.gtSystemInfo.MaxSlicesSupported - 1) * threadsSizePerSlice);
for (uint32_t i = 0; i < subslicesPerSlice; i++) {
data[i * threadsSizePerSubSlice] = 1 << threadID;
}
printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu);
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
// ALL EUs
DebuggerL0::getAttentionBitmaskForThread(hwInfo.gtSystemInfo.MaxSlicesSupported - 1, subsliceID, UINT32_MAX, threadID, hwInfo, bitmask, size);
memset(expectedBitmask.get(), 0, size);
data = ptrOffset(expectedBitmask.get(), (hwInfo.gtSystemInfo.MaxSlicesSupported - 1) * threadsSizePerSlice);
data = ptrOffset(data, subsliceID * threadsSizePerSubSlice);
for (uint32_t i = 0; i < hwInfo.gtSystemInfo.MaxEuPerSubSlice; i++) {
data[i * bytesPerEu] = 1 << threadID;
}
printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu);
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
// ALL threads
DebuggerL0::getAttentionBitmaskForThread(hwInfo.gtSystemInfo.MaxSlicesSupported - 1, subsliceID, 1, UINT32_MAX, hwInfo, bitmask, size);
memset(expectedBitmask.get(), 0, size);
data = ptrOffset(expectedBitmask.get(), (hwInfo.gtSystemInfo.MaxSlicesSupported - 1) * threadsSizePerSlice);
data = ptrOffset(data, subsliceID * threadsSizePerSubSlice);
data = ptrOffset(data, 1 * bytesPerEu);
for (uint32_t byte = 0; byte < bytesPerEu; byte++) {
for (uint32_t i = 0; i < std::min(threadsPerEu, 8u); i++) {
data[byte] |= 1 << i;
}
}
printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu);
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
// ALL slices, All subslices, ALL EUs, ALL threads
DebuggerL0::getAttentionBitmaskForThread(UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, hwInfo, bitmask, size);
auto expectedThreads = threadsPerEu == 7 ? 0x7f : 0xff;
memset(expectedBitmask.get(), expectedThreads, size);
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
// ALL slices, All subslices
DebuggerL0::getAttentionBitmaskForThread(UINT32_MAX, UINT32_MAX, 0, 0, hwInfo, bitmask, size);
memset(expectedBitmask.get(), 0, size);
data = expectedBitmask.get();
for (uint32_t slice = 0; slice < hwInfo.gtSystemInfo.MaxSlicesSupported; slice++) {
for (uint32_t subslice = 0; subslice < subslicesPerSlice; subslice++) {
data[slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice] = 1;
}
}
printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu);
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
// ALL slices, All subslices, ALL EUs
DebuggerL0::getAttentionBitmaskForThread(UINT32_MAX, UINT32_MAX, UINT32_MAX, 0, hwInfo, bitmask, size);
memset(expectedBitmask.get(), 0, size);
data = expectedBitmask.get();
for (uint32_t slice = 0; slice < hwInfo.gtSystemInfo.MaxSlicesSupported; slice++) {
for (uint32_t subslice = 0; subslice < subslicesPerSlice; subslice++) {
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
data[slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu] = 1;
}
}
}
printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu);
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
}
} // namespace ult
} // namespace L0