fix: Fix debugger bitmask parsing and setting for new platforms

Related-to: NEO-7799

Signed-off-by: Jemale Lockett <jemale.lockett@intel.com>
This commit is contained in:
Jemale Lockett 2025-01-21 21:06:09 +00:00 committed by Compute-Runtime-Automation
parent 83af0b77ff
commit 272edeabe6
10 changed files with 458 additions and 73 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -15,67 +15,6 @@
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
namespace L0 {
template <typename Family>
void L0GfxCoreHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
memset(bitmask.get(), 0, bitmaskSize);
for (auto &thread : threads) {
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu);
UNRECOVERABLE_IF(thread.thread > 7);
*euData |= (1 << thread.thread);
}
}
template <typename Family>
std::vector<EuThread::ThreadId> L0GfxCoreHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
std::vector<EuThread::ThreadId> threads;
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
if (offset >= bitmaskSize) {
return threads;
}
UNRECOVERABLE_IF(!bitmask);
for (uint32_t byte = 0; byte < bytesPerEu; byte++) {
std::bitset<8> bits(bitmask[offset + byte]);
for (uint32_t i = 0; i < 8; i++) {
if (bits.test(i)) {
threads.emplace_back(tile, slice, subslice, eu, i + 8 * byte);
}
}
}
}
}
}
return threads;
}
template <typename Family>
void L0GfxCoreHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
if (group.engineGroupType == NEO::EngineGroupType::linkedCopy) {

View File

@ -0,0 +1,78 @@
/*
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/device/device.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/definitions/engine_group_types.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/ptr_math.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
namespace L0 {
template <typename Family>
void L0GfxCoreHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
memset(bitmask.get(), 0, bitmaskSize);
for (auto &thread : threads) {
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu);
UNRECOVERABLE_IF(thread.thread > 7);
*euData |= (1 << thread.thread);
}
}
template <typename Family>
std::vector<EuThread::ThreadId> L0GfxCoreHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
std::vector<EuThread::ThreadId> threads;
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
if (offset >= bitmaskSize) {
return threads;
}
UNRECOVERABLE_IF(!bitmask);
for (uint32_t byte = 0; byte < bytesPerEu; byte++) {
std::bitset<8> bits(bitmask[offset + byte]);
for (uint32_t i = 0; i < 8; i++) {
if (bits.test(i)) {
threads.emplace_back(tile, slice, subslice, eu, i + 8 * byte);
}
}
}
}
}
}
return threads;
}
} // namespace L0

View File

@ -9,6 +9,72 @@
namespace L0 {
template <typename Family>
void L0GfxCoreHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
memset(bitmask.get(), 0, bitmaskSize);
for (auto &thread : threads) {
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
UNRECOVERABLE_IF(thread.thread > 9);
auto euByteNum = (thread.thread / 8);
uint8_t *euData = ptrOffset(subsliceData, euByteNum * numEuPerSubslice + thread.eu);
*euData |= 1 << ((thread.thread) % 8);
}
}
template <typename Family>
std::vector<EuThread::ThreadId> L0GfxCoreHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
std::vector<EuThread::ThreadId> threads;
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
if (offset >= bitmaskSize) {
return threads;
}
UNRECOVERABLE_IF(!bitmask);
for (uint32_t byte = 0; byte < bytesPerEu; byte++) {
std::bitset<8> bits(bitmask[offset + byte]);
for (uint32_t i = 0; i < 8; i++) {
if (bits.test(i)) {
threads.emplace_back(tile, slice, subslice, (((eu % (numEuPerSubslice / bytesPerEu)) * bytesPerEu)) + byte, i + 8 * (eu / (numEuPerSubslice / bytesPerEu)));
}
}
}
}
}
}
return threads;
}
template <typename Family>
ze_rtas_format_exp_t L0GfxCoreHelperHw<Family>::getSupportedRTASFormat() const {
return static_cast<ze_rtas_format_exp_t>(RTASDeviceFormatInternal::version2);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -9,6 +9,7 @@
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe2_hpg_and_later.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xehp_and_later.inl"

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -9,6 +9,7 @@
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_and_xe_hpc.inl"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl"

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -18,7 +18,6 @@ namespace ult {
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_GEN12LP_CORE);
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
using L0GfxCoreHelperTestGen12Lp = Test<DeviceFixture>;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -87,7 +87,7 @@ HWTEST2_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenAskingForUsmCompressionSu
EXPECT_FALSE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
}
HWTEST_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned) {
HWTEST2_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned, IsAtMostXe2HpgCore) {
auto printAttentionBitmask = [](uint8_t *expected, uint8_t *actual, uint32_t maxSlices, uint32_t maxSubSlicesPerSlice, uint32_t maxEuPerSubslice, uint32_t threadsPerEu, bool printBitmask = false) {
auto bytesPerThread = threadsPerEu > 8 ? 2u : 1u;
@ -215,7 +215,7 @@ HWTEST_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmask
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
}
HWTEST_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet) {
HWTEST2_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet, IsAtMostXe2HpgCore) {
auto hwInfo = *NEO::defaultHwInfo.get();
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
@ -239,6 +239,153 @@ HWTEST_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBit
EXPECT_TRUE(memoryZeroed(&data[numBytesPerThread + 1], size - numBytesPerThread - 1));
}
HWTEST2_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet, IsXe3Core) {
auto hwInfo = *NEO::defaultHwInfo.get();
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
hwInfo.gtSystemInfo.IsDynamicallyPopulated = true;
hwInfo.gtSystemInfo.MaxSlicesSupported = 4u;
hwInfo.gtSystemInfo.MaxSubSlicesSupported = 16u;
for (auto &sliceInfo : hwInfo.gtSystemInfo.SliceInfo) {
sliceInfo.Enabled = false;
}
hwInfo.gtSystemInfo.SliceInfo[2].Enabled = true;
hwInfo.gtSystemInfo.SliceInfo[3].Enabled = true;
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
std::unique_ptr<uint8_t[]> bitmask;
size_t size = 0;
std::vector<EuThread::ThreadId> threads;
threads.push_back({0, 0, 0, 0, 8});
threads.push_back({0, 0, 0, 1, 9});
threads.push_back({0, 1, 0, 0, 0});
threads.push_back({0, 1, 0, 1, 1});
threads.push_back({0, 1, 0, 0, 8});
threads.push_back({0, 1, 0, 1, 9});
threads.push_back({0, 1, 1, 0, 0});
threads.push_back({0, 1, 1, 1, 1});
threads.push_back({0, 1, 1, 0, 8});
threads.push_back({0, 1, 1, 1, 9});
threads.push_back({0, 2, 1, 0, 0});
threads.push_back({0, 2, 1, 1, 1});
threads.push_back({0, 2, 1, 0, 8});
threads.push_back({0, 2, 1, 1, 9});
threads.push_back({0, 1, 2, 0, 0});
threads.push_back({0, 1, 2, 1, 1});
threads.push_back({0, 1, 2, 0, 8});
threads.push_back({0, 1, 2, 1, 9});
auto maxSlice = hwInfo.gtSystemInfo.MaxSlicesSupported - 1;
threads.push_back({0, maxSlice, 2, 3, 0});
threads.push_back({0, maxSlice, 2, 3, 1});
threads.push_back({0, maxSlice, 2, 3, 8});
threads.push_back({0, maxSlice, 2, 3, 9});
auto maxSubSlice = numSubslicesPerSlice - 1;
threads.push_back({0, 1, maxSubSlice, 3, 0});
threads.push_back({0, 1, maxSubSlice, 3, 1});
threads.push_back({0, 1, maxSubSlice, 3, 8});
threads.push_back({0, 1, maxSubSlice, 3, 9});
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
auto data = bitmask.get();
EXPECT_EQ(1u, data[8]);
EXPECT_EQ(1u << 1, data[9]);
auto sliceOffset = threadsSizePerSlice;
EXPECT_EQ(1u, data[sliceOffset]);
EXPECT_EQ(1u << 1, data[sliceOffset + 1]);
EXPECT_EQ(1u, data[sliceOffset + 8]);
EXPECT_EQ(1u << 1, data[sliceOffset + 9]);
auto subSliceOffset = sliceOffset + numEuPerSubslice * bytesPerEu;
EXPECT_EQ(1u, data[subSliceOffset]);
EXPECT_EQ(1u << 1, data[subSliceOffset + 1]);
EXPECT_EQ(1u, data[subSliceOffset + 8]);
EXPECT_EQ(1u << 1, data[subSliceOffset + 9]);
size_t threadCount = 0;
for (size_t i = 0; i < size; i++) {
while (data[i]) {
if (data[i] & 0x01) {
threadCount++;
}
data[i] = data[i] >> 1;
}
}
EXPECT_EQ(threadCount, threads.size());
}
HWTEST2_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned, IsXe3Core) {
auto hwInfo = *NEO::defaultHwInfo.get();
const auto threadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
hwInfo.gtSystemInfo.MaxEuPerSubSlice = 16u;
hwInfo.gtSystemInfo.EUCount = hwInfo.gtSystemInfo.MaxEuPerSubSlice * hwInfo.gtSystemInfo.SubSliceCount;
hwInfo.gtSystemInfo.ThreadCount = hwInfo.gtSystemInfo.EUCount * threadsPerEu;
MockExecutionEnvironment executionEnvironment(&hwInfo);
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
auto sliceOffset = threadsSizePerSlice;
std::unique_ptr<uint8_t[]> bitmask;
size_t size = 0;
std::vector<EuThread::ThreadId> threads;
threads.push_back({0, 0, 0, 0, 6});
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
auto expectedBitmask = std::make_unique<uint8_t[]>(size);
memset(expectedBitmask.get(), 0, size);
auto returnedBitmask = bitmask.get();
EXPECT_EQ(uint8_t(1u << 6), returnedBitmask[0]);
threads.clear();
threads.push_back({0, 0, 0, 1, 3});
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
returnedBitmask = bitmask.get();
EXPECT_EQ(uint8_t(1u << 3), returnedBitmask[1]);
threads.clear();
threads.push_back({0, 0, 1, 1, 8});
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
returnedBitmask = bitmask.get();
EXPECT_EQ(1u, returnedBitmask[25]);
threads.clear();
threads.push_back({0, 1, 0, 0, 8});
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
returnedBitmask = bitmask.get();
EXPECT_EQ(1u, returnedBitmask[sliceOffset + 8]);
}
HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned) {
auto hwInfo = *NEO::defaultHwInfo.get();
MockExecutionEnvironment executionEnvironment;
@ -277,6 +424,48 @@ HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGe
EXPECT_EQ(1u, threads[0].tileIndex);
}
HWTEST2_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IsXe3Core) {
auto hwInfo = *NEO::defaultHwInfo.get();
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
std::unique_ptr<uint8_t[]> bitmask;
size_t size = 0;
uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0;
uint32_t threadID = 8;
std::vector<EuThread::ThreadId> threadsWithAtt;
threadsWithAtt.push_back({0, 0, subsliceID, 0, threadID});
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size);
auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
ASSERT_EQ(1u, threads.size());
EXPECT_EQ(0u, threads[0].slice);
EXPECT_EQ(subsliceID, threads[0].subslice);
EXPECT_EQ(0u, threads[0].eu);
EXPECT_EQ(threadID, threads[0].thread);
EXPECT_EQ(0u, threads[0].tileIndex);
std::memset(bitmask.get(), 0, size);
threadsWithAtt.clear();
threadID = 9;
threadsWithAtt.push_back({0, 0, 1, 5, threadID});
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size);
threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
ASSERT_EQ(1u, threads.size());
EXPECT_EQ(0u, threads[0].slice);
EXPECT_EQ(1u, threads[0].subslice);
EXPECT_EQ(5u, threads[0].eu);
EXPECT_EQ(threadID, threads[0].thread);
}
HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned) {
auto hwInfo = *NEO::defaultHwInfo.get();
MockExecutionEnvironment executionEnvironment;
@ -340,7 +529,7 @@ HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingT
}
}
HWTEST_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned) {
HWTEST2_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IsXeHpcCoreOrXe2HpgCore) {
auto hwInfo = *NEO::defaultHwInfo.get();
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
@ -374,6 +563,118 @@ HWTEST_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThe
}
}
HWTEST2_F(L0GfxCoreHelperTest, givenEu0To1Threads6To10BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IsXe3Core) {
auto hwInfo = *NEO::defaultHwInfo.get();
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
auto subsliceOffset = numEuPerSubslice * bytesPerEu;
auto sliceOffset = threadsSizePerSlice;
uint8_t data[1024] = {};
data[0] = 0xC0;
data[1] = 0xC0;
data[8] = 0x03;
data[9] = 0x03;
data[subsliceOffset + 8] = 0x03;
data[subsliceOffset + 9] = 0x03;
data[sliceOffset + subsliceOffset + 8] = 0x03;
data[sliceOffset + subsliceOffset + 9] = 0x03;
ze_device_thread_t expectedThreads[] = {
{0, 0, 0, 6},
{0, 0, 0, 7},
{0, 0, 1, 6},
{0, 0, 1, 7},
{0, 0, 0, 8},
{0, 0, 0, 9},
{0, 0, 1, 8},
{0, 0, 1, 9},
// subslice > 0
{0, 1, 0, 8},
{0, 1, 0, 9},
{0, 1, 1, 8},
{0, 1, 1, 9},
// slice > 0
{1, 1, 0, 8},
{1, 1, 0, 9},
{1, 1, 1, 8},
{1, 1, 1, 9}};
auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, data, sizeof(data));
ASSERT_EQ(16u, threads.size());
for (uint32_t i = 0; i < 16u; i++) {
EXPECT_EQ(expectedThreads[i].slice, threads[i].slice);
EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice);
EXPECT_EQ(expectedThreads[i].eu, threads[i].eu);
EXPECT_EQ(expectedThreads[i].thread, threads[i].thread);
EXPECT_EQ(0u, threads[i].tileIndex);
}
}
HWTEST2_F(L0GfxCoreHelperTest, givenThreadsToBitmaskThenSameThreadsReturnedParsingBitmask, IsXe3Core) {
auto hwInfo = *NEO::defaultHwInfo.get();
MockExecutionEnvironment executionEnvironment;
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
hwInfo.gtSystemInfo.IsDynamicallyPopulated = true;
hwInfo.gtSystemInfo.MaxSlicesSupported = 4u;
hwInfo.gtSystemInfo.MaxSubSlicesSupported = 16u;
for (auto &sliceInfo : hwInfo.gtSystemInfo.SliceInfo) {
sliceInfo.Enabled = false;
}
hwInfo.gtSystemInfo.SliceInfo[2].Enabled = true;
hwInfo.gtSystemInfo.SliceInfo[3].Enabled = true;
std::unique_ptr<uint8_t[]> bitmask;
size_t size = 0;
// ordering is important, byte0 of every EU is before byte1 of any EU
std::vector<EuThread::ThreadId> expectedThreads = {
{0, 0, 0, 0, 6},
{0, 0, 0, 0, 7},
{0, 0, 0, 1, 6},
{0, 0, 0, 1, 7},
{0, 0, 0, 0, 8},
{0, 0, 0, 0, 9},
{0, 0, 0, 1, 8},
{0, 0, 0, 1, 9},
{0, 0, 1, 0, 8},
{0, 0, 1, 0, 9},
{0, 0, 1, 1, 8},
{0, 0, 1, 1, 9},
{0, 1, 1, 3, 5},
{0, 1, 1, 6, 7},
{0, 1, 1, 0, 8},
{0, 1, 1, 0, 9},
{0, 1, 1, 1, 8},
{0, 1, 1, 1, 9},
{0, 1, 1, 2, 8},
{0, 1, 1, 4, 9},
{0, 2, 1, 0, 0},
{0, 2, 2, 3, 5}};
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(expectedThreads, hwInfo, bitmask, size);
auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
for (uint32_t i = 0; i < expectedThreads.size(); i++) {
EXPECT_EQ(expectedThreads[i].slice, threads[i].slice);
EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice);
EXPECT_EQ(expectedThreads[i].eu, threads[i].eu);
EXPECT_EQ(expectedThreads[i].thread, threads[i].thread);
EXPECT_EQ(0u, threads[i].tileIndex);
}
}
HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned) {
auto hwInfo = *NEO::defaultHwInfo.get();
MockExecutionEnvironment executionEnvironment;

View File

@ -20,7 +20,6 @@ namespace ult {
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_XE_HPG_CORE);
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
using L0GfxCoreHelperTestXeHpg = Test<DeviceFixture>;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -5749,7 +5749,7 @@ HWTEST2_F(DebugApiLinuxTest, GivenErrorFromSynchronousAttScanWhenMultipleThreads
l0GfxCoreHelperBackup.release();
}
TEST_F(DebugApiLinuxTest, GivenResumeWARequiredWhenCallingResumeThenWaIsAppliedToBitmask) {
HWTEST2_F(DebugApiLinuxTest, GivenResumeWARequiredWhenCallingResumeThenWaIsAppliedToBitmask, IsAtMostXe2HpgCore) {
zet_debug_config_t config = {};
config.pid = 0x1234;

View File

@ -33,6 +33,7 @@ using IsBeforeXeHpcCore = IsBeforeGfxCore<IGFX_XE_HPC_CORE>;
using IsAtLeastXe2HpgCore = IsAtLeastGfxCore<IGFX_XE2_HPG_CORE>;
using IsAtMostXe2HpgCore = IsAtMostGfxCore<IGFX_XE2_HPG_CORE>;
using IsWithinXeHpCoreAndXe2HpgCore = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE2_HPG_CORE>;
using IsXeHpcCoreOrXe2HpgCore = IsAnyGfxCores<IGFX_XE_HPC_CORE, IGFX_XE2_HPG_CORE>;
using IsWithinXeHpCoreAndXe3Core = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE3_CORE>;
using IsXeHpOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPG_CORE>;