fix: Fix debugger bitmask parsing and setting for new platforms

Related-to: NEO-7799 Signed-off-by: Jemale Lockett <jemale.lockett@intel.com>
2025-01-21 21:06:09 +00:00 · 2025-01-21 21:06:09 +00:00 · 272edeabe6
parent 83af0b77ff
commit 272edeabe6
10 changed files with 458 additions and 73 deletions
--- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl
+++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021-2024 Intel Corporation
+ * Copyright (C) 2021-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -15,67 +15,6 @@
 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"

 namespace L0 {
-
-template <typename Family>
-void L0GfxCoreHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
-    const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
-    const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
-    const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
-    const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
-    const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
-    const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
-
-    bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
-    bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
-
-    memset(bitmask.get(), 0, bitmaskSize);
-
-    for (auto &thread : threads) {
-        uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
-        uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
-        uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu);
-        UNRECOVERABLE_IF(thread.thread > 7);
-        *euData |= (1 << thread.thread);
-    }
-}
-
-template <typename Family>
-std::vector<EuThread::ThreadId> L0GfxCoreHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
-    const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
-    const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
-    const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
-    const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
-    const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
-    const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
-    const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
-
-    std::vector<EuThread::ThreadId> threads;
-
-    for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
-        for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
-            for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
-                size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
-
-                if (offset >= bitmaskSize) {
-                    return threads;
-                }
-
-                UNRECOVERABLE_IF(!bitmask);
-                for (uint32_t byte = 0; byte < bytesPerEu; byte++) {
-                    std::bitset<8> bits(bitmask[offset + byte]);
-                    for (uint32_t i = 0; i < 8; i++) {
-                        if (bits.test(i)) {
-                            threads.emplace_back(tile, slice, subslice, eu, i + 8 * byte);
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    return threads;
-}
-
 template <typename Family>
 void L0GfxCoreHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
    if (group.engineGroupType == NEO::EngineGroupType::linkedCopy) {
--- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl
+++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl
@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2021-2025 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "shared/source/device/device.h"
+#include "shared/source/helpers/aligned_memory.h"
+#include "shared/source/helpers/definitions/engine_group_types.h"
+#include "shared/source/helpers/engine_node_helper.h"
+#include "shared/source/helpers/hw_info.h"
+#include "shared/source/helpers/ptr_math.h"
+
+#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
+
+namespace L0 {
+
+template <typename Family>
+void L0GfxCoreHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
+    const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
+    const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
+    const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
+    const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
+    const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
+
+    bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
+    bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
+
+    memset(bitmask.get(), 0, bitmaskSize);
+
+    for (auto &thread : threads) {
+        uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
+        uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
+        uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu);
+        UNRECOVERABLE_IF(thread.thread > 7);
+        *euData |= (1 << thread.thread);
+    }
+}
+
+template <typename Family>
+std::vector<EuThread::ThreadId> L0GfxCoreHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
+    const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
+    const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
+    const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
+    const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
+    const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
+    const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
+
+    std::vector<EuThread::ThreadId> threads;
+
+    for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
+        for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
+            for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
+                size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
+
+                if (offset >= bitmaskSize) {
+                    return threads;
+                }
+
+                UNRECOVERABLE_IF(!bitmask);
+                for (uint32_t byte = 0; byte < bytesPerEu; byte++) {
+                    std::bitset<8> bits(bitmask[offset + byte]);
+                    for (uint32_t i = 0; i < 8; i++) {
+                        if (bits.test(i)) {
+                            threads.emplace_back(tile, slice, subslice, eu, i + 8 * byte);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return threads;
+}
+} // namespace L0
--- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe3_and_later.inl
+++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe3_and_later.inl
@ -9,6 +9,72 @@

 namespace L0 {

+template <typename Family>
+void L0GfxCoreHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
+    const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
+    const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
+    const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
+    const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
+    const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
+
+    bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
+    bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
+
+    memset(bitmask.get(), 0, bitmaskSize);
+
+    for (auto &thread : threads) {
+        uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
+
+        uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
+        UNRECOVERABLE_IF(thread.thread > 9);
+
+        auto euByteNum = (thread.thread / 8);
+        uint8_t *euData = ptrOffset(subsliceData, euByteNum * numEuPerSubslice + thread.eu);
+
+        *euData |= 1 << ((thread.thread) % 8);
+    }
+}
+
+template <typename Family>
+std::vector<EuThread::ThreadId> L0GfxCoreHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
+    const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
+    const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
+
+    const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
+    const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
+    const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
+    const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
+
+    std::vector<EuThread::ThreadId> threads;
+
+    for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
+        for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
+            for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
+
+                size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
+
+                if (offset >= bitmaskSize) {
+                    return threads;
+                }
+
+                UNRECOVERABLE_IF(!bitmask);
+                for (uint32_t byte = 0; byte < bytesPerEu; byte++) {
+                    std::bitset<8> bits(bitmask[offset + byte]);
+                    for (uint32_t i = 0; i < 8; i++) {
+                        if (bits.test(i)) {
+                            threads.emplace_back(tile, slice, subslice, (((eu % (numEuPerSubslice / bytesPerEu)) * bytesPerEu)) + byte, i + 8 * (eu / (numEuPerSubslice / bytesPerEu)));
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return threads;
+}
+
 template <typename Family>
 ze_rtas_format_exp_t L0GfxCoreHelperHw<Family>::getSupportedRTASFormat() const {
    return static_cast<ze_rtas_format_exp_t>(RTASDeviceFormatInternal::version2);
--- a/level_zero/core/source/xe2_hpg_core/l0_gfx_core_helper_xe2_hpg_core.cpp
+++ b/level_zero/core/source/xe2_hpg_core/l0_gfx_core_helper_xe2_hpg_core.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2024 Intel Corporation
+ * Copyright (C) 2024-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -9,6 +9,7 @@

 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl"
+#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl"
 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe2_hpg_and_later.inl"
 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl"
 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xehp_and_later.inl"
--- a/level_zero/core/source/xe_hpc_core/l0_gfx_core_helper_xe_hpc_core.cpp
+++ b/level_zero/core/source/xe_hpc_core/l0_gfx_core_helper_xe_hpc_core.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021-2024 Intel Corporation
+ * Copyright (C) 2021-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -9,6 +9,7 @@

 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl"
+#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl"
 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl"
 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_and_xe_hpc.inl"
 #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl"
--- a/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp
+++ b/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021-2024 Intel Corporation
+ * Copyright (C) 2021-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -18,7 +18,6 @@ namespace ult {
 HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_GEN12LP_CORE);
 HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
 HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
-HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
 HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);

 using L0GfxCoreHelperTestGen12Lp = Test<DeviceFixture>;
--- a/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp
+++ b/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021-2024 Intel Corporation
+ * Copyright (C) 2021-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -87,7 +87,7 @@ HWTEST2_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenAskingForUsmCompressionSu
    EXPECT_FALSE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
 }

-HWTEST_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned) {
+HWTEST2_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned, IsAtMostXe2HpgCore) {

    auto printAttentionBitmask = [](uint8_t *expected, uint8_t *actual, uint32_t maxSlices, uint32_t maxSubSlicesPerSlice, uint32_t maxEuPerSubslice, uint32_t threadsPerEu, bool printBitmask = false) {
        auto bytesPerThread = threadsPerEu > 8 ? 2u : 1u;
@ -215,7 +215,7 @@ HWTEST_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmask
    EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
 }

-HWTEST_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet) {
+HWTEST2_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet, IsAtMostXe2HpgCore) {
    auto hwInfo = *NEO::defaultHwInfo.get();
    MockExecutionEnvironment executionEnvironment;
    auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
@ -239,6 +239,153 @@ HWTEST_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBit
    EXPECT_TRUE(memoryZeroed(&data[numBytesPerThread + 1], size - numBytesPerThread - 1));
 }

+HWTEST2_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet, IsXe3Core) {
+    auto hwInfo = *NEO::defaultHwInfo.get();
+    MockExecutionEnvironment executionEnvironment;
+    auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
+
+    hwInfo.gtSystemInfo.IsDynamicallyPopulated = true;
+    hwInfo.gtSystemInfo.MaxSlicesSupported = 4u;
+    hwInfo.gtSystemInfo.MaxSubSlicesSupported = 16u;
+    for (auto &sliceInfo : hwInfo.gtSystemInfo.SliceInfo) {
+        sliceInfo.Enabled = false;
+    }
+    hwInfo.gtSystemInfo.SliceInfo[2].Enabled = true;
+    hwInfo.gtSystemInfo.SliceInfo[3].Enabled = true;
+
+    const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
+    const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
+    const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
+    const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
+
+    std::unique_ptr<uint8_t[]> bitmask;
+    size_t size = 0;
+
+    std::vector<EuThread::ThreadId> threads;
+    threads.push_back({0, 0, 0, 0, 8});
+    threads.push_back({0, 0, 0, 1, 9});
+
+    threads.push_back({0, 1, 0, 0, 0});
+    threads.push_back({0, 1, 0, 1, 1});
+    threads.push_back({0, 1, 0, 0, 8});
+    threads.push_back({0, 1, 0, 1, 9});
+
+    threads.push_back({0, 1, 1, 0, 0});
+    threads.push_back({0, 1, 1, 1, 1});
+    threads.push_back({0, 1, 1, 0, 8});
+    threads.push_back({0, 1, 1, 1, 9});
+
+    threads.push_back({0, 2, 1, 0, 0});
+    threads.push_back({0, 2, 1, 1, 1});
+    threads.push_back({0, 2, 1, 0, 8});
+    threads.push_back({0, 2, 1, 1, 9});
+
+    threads.push_back({0, 1, 2, 0, 0});
+    threads.push_back({0, 1, 2, 1, 1});
+    threads.push_back({0, 1, 2, 0, 8});
+    threads.push_back({0, 1, 2, 1, 9});
+
+    auto maxSlice = hwInfo.gtSystemInfo.MaxSlicesSupported - 1;
+    threads.push_back({0, maxSlice, 2, 3, 0});
+    threads.push_back({0, maxSlice, 2, 3, 1});
+    threads.push_back({0, maxSlice, 2, 3, 8});
+    threads.push_back({0, maxSlice, 2, 3, 9});
+
+    auto maxSubSlice = numSubslicesPerSlice - 1;
+    threads.push_back({0, 1, maxSubSlice, 3, 0});
+    threads.push_back({0, 1, maxSubSlice, 3, 1});
+    threads.push_back({0, 1, maxSubSlice, 3, 8});
+    threads.push_back({0, 1, maxSubSlice, 3, 9});
+
+    l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
+
+    auto data = bitmask.get();
+    EXPECT_EQ(1u, data[8]);
+    EXPECT_EQ(1u << 1, data[9]);
+
+    auto sliceOffset = threadsSizePerSlice;
+    EXPECT_EQ(1u, data[sliceOffset]);
+    EXPECT_EQ(1u << 1, data[sliceOffset + 1]);
+
+    EXPECT_EQ(1u, data[sliceOffset + 8]);
+    EXPECT_EQ(1u << 1, data[sliceOffset + 9]);
+
+    auto subSliceOffset = sliceOffset + numEuPerSubslice * bytesPerEu;
+    EXPECT_EQ(1u, data[subSliceOffset]);
+    EXPECT_EQ(1u << 1, data[subSliceOffset + 1]);
+
+    EXPECT_EQ(1u, data[subSliceOffset + 8]);
+    EXPECT_EQ(1u << 1, data[subSliceOffset + 9]);
+
+    size_t threadCount = 0;
+    for (size_t i = 0; i < size; i++) {
+        while (data[i]) {
+            if (data[i] & 0x01) {
+                threadCount++;
+            }
+            data[i] = data[i] >> 1;
+        }
+    }
+    EXPECT_EQ(threadCount, threads.size());
+}
+
+HWTEST2_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned, IsXe3Core) {
+
+    auto hwInfo = *NEO::defaultHwInfo.get();
+    const auto threadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
+
+    hwInfo.gtSystemInfo.MaxEuPerSubSlice = 16u;
+    hwInfo.gtSystemInfo.EUCount = hwInfo.gtSystemInfo.MaxEuPerSubSlice * hwInfo.gtSystemInfo.SubSliceCount;
+    hwInfo.gtSystemInfo.ThreadCount = hwInfo.gtSystemInfo.EUCount * threadsPerEu;
+    MockExecutionEnvironment executionEnvironment(&hwInfo);
+    auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
+
+    const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
+    const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
+    const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
+    const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
+    auto sliceOffset = threadsSizePerSlice;
+
+    std::unique_ptr<uint8_t[]> bitmask;
+    size_t size = 0;
+
+    std::vector<EuThread::ThreadId> threads;
+    threads.push_back({0, 0, 0, 0, 6});
+    l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
+
+    auto expectedBitmask = std::make_unique<uint8_t[]>(size);
+    memset(expectedBitmask.get(), 0, size);
+
+    auto returnedBitmask = bitmask.get();
+    EXPECT_EQ(uint8_t(1u << 6), returnedBitmask[0]);
+
+    threads.clear();
+    threads.push_back({0, 0, 0, 1, 3});
+
+    l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
+
+    returnedBitmask = bitmask.get();
+    EXPECT_EQ(uint8_t(1u << 3), returnedBitmask[1]);
+
+    threads.clear();
+    threads.push_back({0, 0, 1, 1, 8});
+
+    l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
+
+    returnedBitmask = bitmask.get();
+    EXPECT_EQ(1u, returnedBitmask[25]);
+
+    threads.clear();
+    threads.push_back({0, 1, 0, 0, 8});
+
+    l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
+
+    returnedBitmask = bitmask.get();
+    EXPECT_EQ(1u, returnedBitmask[sliceOffset + 8]);
+}
+
 HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned) {
    auto hwInfo = *NEO::defaultHwInfo.get();
    MockExecutionEnvironment executionEnvironment;
@ -277,6 +424,48 @@ HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGe
    EXPECT_EQ(1u, threads[0].tileIndex);
 }

+HWTEST2_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IsXe3Core) {
+    auto hwInfo = *NEO::defaultHwInfo.get();
+    MockExecutionEnvironment executionEnvironment;
+    auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
+
+    std::unique_ptr<uint8_t[]> bitmask;
+    size_t size = 0;
+
+    uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0;
+
+    uint32_t threadID = 8;
+    std::vector<EuThread::ThreadId> threadsWithAtt;
+    threadsWithAtt.push_back({0, 0, subsliceID, 0, threadID});
+
+    l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size);
+
+    auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
+
+    ASSERT_EQ(1u, threads.size());
+    EXPECT_EQ(0u, threads[0].slice);
+    EXPECT_EQ(subsliceID, threads[0].subslice);
+    EXPECT_EQ(0u, threads[0].eu);
+    EXPECT_EQ(threadID, threads[0].thread);
+
+    EXPECT_EQ(0u, threads[0].tileIndex);
+
+    std::memset(bitmask.get(), 0, size);
+    threadsWithAtt.clear();
+    threadID = 9;
+    threadsWithAtt.push_back({0, 0, 1, 5, threadID});
+
+    l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size);
+    threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
+
+    ASSERT_EQ(1u, threads.size());
+    EXPECT_EQ(0u, threads[0].slice);
+    EXPECT_EQ(1u, threads[0].subslice);
+    EXPECT_EQ(5u, threads[0].eu);
+    EXPECT_EQ(threadID, threads[0].thread);
+}
+
 HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned) {
    auto hwInfo = *NEO::defaultHwInfo.get();
    MockExecutionEnvironment executionEnvironment;
@ -340,7 +529,7 @@ HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingT
    }
 }

-HWTEST_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned) {
+HWTEST2_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IsXeHpcCoreOrXe2HpgCore) {
    auto hwInfo = *NEO::defaultHwInfo.get();
    MockExecutionEnvironment executionEnvironment;
    auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
@ -374,6 +563,118 @@ HWTEST_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThe
    }
 }

+HWTEST2_F(L0GfxCoreHelperTest, givenEu0To1Threads6To10BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IsXe3Core) {
+    auto hwInfo = *NEO::defaultHwInfo.get();
+    MockExecutionEnvironment executionEnvironment;
+    auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
+
+    const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
+    const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
+    const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
+    const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
+    const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
+
+    auto subsliceOffset = numEuPerSubslice * bytesPerEu;
+    auto sliceOffset = threadsSizePerSlice;
+
+    uint8_t data[1024] = {};
+    data[0] = 0xC0;
+    data[1] = 0xC0;
+    data[8] = 0x03;
+    data[9] = 0x03;
+    data[subsliceOffset + 8] = 0x03;
+    data[subsliceOffset + 9] = 0x03;
+    data[sliceOffset + subsliceOffset + 8] = 0x03;
+    data[sliceOffset + subsliceOffset + 9] = 0x03;
+
+    ze_device_thread_t expectedThreads[] = {
+        {0, 0, 0, 6},
+        {0, 0, 0, 7},
+        {0, 0, 1, 6},
+        {0, 0, 1, 7},
+        {0, 0, 0, 8},
+        {0, 0, 0, 9},
+        {0, 0, 1, 8},
+        {0, 0, 1, 9},
+        // subslice > 0
+        {0, 1, 0, 8},
+        {0, 1, 0, 9},
+        {0, 1, 1, 8},
+        {0, 1, 1, 9},
+        // slice > 0
+        {1, 1, 0, 8},
+        {1, 1, 0, 9},
+        {1, 1, 1, 8},
+        {1, 1, 1, 9}};
+
+    auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, data, sizeof(data));
+    ASSERT_EQ(16u, threads.size());
+
+    for (uint32_t i = 0; i < 16u; i++) {
+        EXPECT_EQ(expectedThreads[i].slice, threads[i].slice);
+        EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice);
+        EXPECT_EQ(expectedThreads[i].eu, threads[i].eu);
+        EXPECT_EQ(expectedThreads[i].thread, threads[i].thread);
+        EXPECT_EQ(0u, threads[i].tileIndex);
+    }
+}
+
+HWTEST2_F(L0GfxCoreHelperTest, givenThreadsToBitmaskThenSameThreadsReturnedParsingBitmask, IsXe3Core) {
+    auto hwInfo = *NEO::defaultHwInfo.get();
+    MockExecutionEnvironment executionEnvironment;
+    auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
+
+    hwInfo.gtSystemInfo.IsDynamicallyPopulated = true;
+    hwInfo.gtSystemInfo.MaxSlicesSupported = 4u;
+    hwInfo.gtSystemInfo.MaxSubSlicesSupported = 16u;
+    for (auto &sliceInfo : hwInfo.gtSystemInfo.SliceInfo) {
+        sliceInfo.Enabled = false;
+    }
+    hwInfo.gtSystemInfo.SliceInfo[2].Enabled = true;
+    hwInfo.gtSystemInfo.SliceInfo[3].Enabled = true;
+
+    std::unique_ptr<uint8_t[]> bitmask;
+    size_t size = 0;
+
+    // ordering is important, byte0 of every EU is before byte1 of any EU
+    std::vector<EuThread::ThreadId> expectedThreads = {
+        {0, 0, 0, 0, 6},
+        {0, 0, 0, 0, 7},
+        {0, 0, 0, 1, 6},
+        {0, 0, 0, 1, 7},
+        {0, 0, 0, 0, 8},
+        {0, 0, 0, 0, 9},
+        {0, 0, 0, 1, 8},
+        {0, 0, 0, 1, 9},
+        {0, 0, 1, 0, 8},
+        {0, 0, 1, 0, 9},
+        {0, 0, 1, 1, 8},
+        {0, 0, 1, 1, 9},
+        {0, 1, 1, 3, 5},
+        {0, 1, 1, 6, 7},
+        {0, 1, 1, 0, 8},
+        {0, 1, 1, 0, 9},
+        {0, 1, 1, 1, 8},
+        {0, 1, 1, 1, 9},
+        {0, 1, 1, 2, 8},
+        {0, 1, 1, 4, 9},
+        {0, 2, 1, 0, 0},
+        {0, 2, 2, 3, 5}};
+
+    l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(expectedThreads, hwInfo, bitmask, size);
+
+    auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
+
+    for (uint32_t i = 0; i < expectedThreads.size(); i++) {
+        EXPECT_EQ(expectedThreads[i].slice, threads[i].slice);
+        EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice);
+        EXPECT_EQ(expectedThreads[i].eu, threads[i].eu);
+        EXPECT_EQ(expectedThreads[i].thread, threads[i].thread);
+
+        EXPECT_EQ(0u, threads[i].tileIndex);
+    }
+}
+
 HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned) {
    auto hwInfo = *NEO::defaultHwInfo.get();
    MockExecutionEnvironment executionEnvironment;
--- a/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_gfx_core_helper_xe_hpg_core.cpp
+++ b/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_gfx_core_helper_xe_hpg_core.cpp
@ -20,7 +20,6 @@ namespace ult {
 HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_XE_HPG_CORE);
 HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
 HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
-HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
 HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);

 using L0GfxCoreHelperTestXeHpg = Test<DeviceFixture>;
--- a/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/test_debug_api_linux.cpp
+++ b/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/test_debug_api_linux.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022-2024 Intel Corporation
+ * Copyright (C) 2022-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -5749,7 +5749,7 @@ HWTEST2_F(DebugApiLinuxTest, GivenErrorFromSynchronousAttScanWhenMultipleThreads
    l0GfxCoreHelperBackup.release();
 }

-TEST_F(DebugApiLinuxTest, GivenResumeWARequiredWhenCallingResumeThenWaIsAppliedToBitmask) {
+HWTEST2_F(DebugApiLinuxTest, GivenResumeWARequiredWhenCallingResumeThenWaIsAppliedToBitmask, IsAtMostXe2HpgCore) {
    zet_debug_config_t config = {};
    config.pid = 0x1234;

--- a/shared/test/common/test_macros/header/common_matchers.h
+++ b/shared/test/common/test_macros/header/common_matchers.h
@ -33,6 +33,7 @@ using IsBeforeXeHpcCore = IsBeforeGfxCore<IGFX_XE_HPC_CORE>;
 using IsAtLeastXe2HpgCore = IsAtLeastGfxCore<IGFX_XE2_HPG_CORE>;
 using IsAtMostXe2HpgCore = IsAtMostGfxCore<IGFX_XE2_HPG_CORE>;
 using IsWithinXeHpCoreAndXe2HpgCore = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE2_HPG_CORE>;
+using IsXeHpcCoreOrXe2HpgCore = IsAnyGfxCores<IGFX_XE_HPC_CORE, IGFX_XE2_HPG_CORE>;
 using IsWithinXeHpCoreAndXe3Core = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE3_CORE>;

 using IsXeHpOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPG_CORE>;