diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl index 95f5adbd51..5daeb345d8 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,67 +15,6 @@ #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" namespace L0 { - -template -void L0GfxCoreHelperHw::getAttentionBitmaskForSingleThreads(const std::vector &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize) const { - const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; - const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u); - const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); - const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; - const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; - const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo); - - bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; - bitmask = std::make_unique(bitmaskSize); - - memset(bitmask.get(), 0, bitmaskSize); - - for (auto &thread : threads) { - uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice); - uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice); - uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu); - UNRECOVERABLE_IF(thread.thread > 7); - *euData |= (1 << thread.thread); - } -} - -template -std::vector L0GfxCoreHelperHw::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const { - const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; - const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; - const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); - const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; - const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; - const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu; - const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo); - - std::vector threads; - - for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) { - for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) { - for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) { - size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu; - - if (offset >= bitmaskSize) { - return threads; - } - - UNRECOVERABLE_IF(!bitmask); - for (uint32_t byte = 0; byte < bytesPerEu; byte++) { - std::bitset<8> bits(bitmask[offset + byte]); - for (uint32_t i = 0; i < 8; i++) { - if (bits.test(i)) { - threads.emplace_back(tile, slice, subslice, eu, i + 8 * byte); - } - } - } - } - } - } - - return threads; -} - template void L0GfxCoreHelperHw::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const { if (group.engineGroupType == NEO::EngineGroupType::linkedCopy) { diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl new file mode 100644 index 0000000000..4c35f77762 --- /dev/null +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2021-2025 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/device/device.h" +#include "shared/source/helpers/aligned_memory.h" +#include "shared/source/helpers/definitions/engine_group_types.h" +#include "shared/source/helpers/engine_node_helper.h" +#include "shared/source/helpers/hw_info.h" +#include "shared/source/helpers/ptr_math.h" + +#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" + +namespace L0 { + +template +void L0GfxCoreHelperHw::getAttentionBitmaskForSingleThreads(const std::vector &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize) const { + const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u); + const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); + const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; + const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; + const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo); + + bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; + bitmask = std::make_unique(bitmaskSize); + + memset(bitmask.get(), 0, bitmaskSize); + + for (auto &thread : threads) { + uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice); + uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice); + uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu); + UNRECOVERABLE_IF(thread.thread > 7); + *euData |= (1 << thread.thread); + } +} + +template +std::vector L0GfxCoreHelperHw::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const { + const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; + const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); + const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; + const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; + const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu; + const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo); + + std::vector threads; + + for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) { + for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) { + for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) { + size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu; + + if (offset >= bitmaskSize) { + return threads; + } + + UNRECOVERABLE_IF(!bitmask); + for (uint32_t byte = 0; byte < bytesPerEu; byte++) { + std::bitset<8> bits(bitmask[offset + byte]); + for (uint32_t i = 0; i < 8; i++) { + if (bits.test(i)) { + threads.emplace_back(tile, slice, subslice, eu, i + 8 * byte); + } + } + } + } + } + } + + return threads; +} +} // namespace L0 diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe3_and_later.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe3_and_later.inl index be025d513b..0397691124 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe3_and_later.inl +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe3_and_later.inl @@ -9,6 +9,72 @@ namespace L0 { +template +void L0GfxCoreHelperHw::getAttentionBitmaskForSingleThreads(const std::vector &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize) const { + const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u); + const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); + const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; + const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; + const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo); + + bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; + bitmask = std::make_unique(bitmaskSize); + + memset(bitmask.get(), 0, bitmaskSize); + + for (auto &thread : threads) { + uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice); + + uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice); + UNRECOVERABLE_IF(thread.thread > 9); + + auto euByteNum = (thread.thread / 8); + uint8_t *euData = ptrOffset(subsliceData, euByteNum * numEuPerSubslice + thread.eu); + + *euData |= 1 << ((thread.thread) % 8); + } +} + +template +std::vector L0GfxCoreHelperHw::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const { + const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; + const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); + + const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; + const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; + const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu; + const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo); + + std::vector threads; + + for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) { + for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) { + for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) { + + size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu; + + if (offset >= bitmaskSize) { + return threads; + } + + UNRECOVERABLE_IF(!bitmask); + for (uint32_t byte = 0; byte < bytesPerEu; byte++) { + std::bitset<8> bits(bitmask[offset + byte]); + for (uint32_t i = 0; i < 8; i++) { + if (bits.test(i)) { + threads.emplace_back(tile, slice, subslice, (((eu % (numEuPerSubslice / bytesPerEu)) * bytesPerEu)) + byte, i + 8 * (eu / (numEuPerSubslice / bytesPerEu))); + } + } + } + } + } + } + + return threads; +} + template ze_rtas_format_exp_t L0GfxCoreHelperHw::getSupportedRTASFormat() const { return static_cast(RTASDeviceFormatInternal::version2); diff --git a/level_zero/core/source/xe2_hpg_core/l0_gfx_core_helper_xe2_hpg_core.cpp b/level_zero/core/source/xe2_hpg_core/l0_gfx_core_helper_xe2_hpg_core.cpp index fa00f735d4..77c77d9e75 100644 --- a/level_zero/core/source/xe2_hpg_core/l0_gfx_core_helper_xe2_hpg_core.cpp +++ b/level_zero/core/source/xe2_hpg_core/l0_gfx_core_helper_xe2_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,6 +9,7 @@ #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl" +#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe2_hpg_and_later.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xehp_and_later.inl" diff --git a/level_zero/core/source/xe_hpc_core/l0_gfx_core_helper_xe_hpc_core.cpp b/level_zero/core/source/xe_hpc_core/l0_gfx_core_helper_xe_hpc_core.cpp index 498f22958b..23d264058a 100644 --- a/level_zero/core/source/xe_hpc_core/l0_gfx_core_helper_xe_hpc_core.cpp +++ b/level_zero/core/source/xe_hpc_core/l0_gfx_core_helper_xe_hpc_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,6 +9,7 @@ #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl" +#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_and_xe_hpc.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl" diff --git a/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp b/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp index 36c59a7f55..12e7cecc5e 100644 --- a/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp +++ b/level_zero/core/test/unit_tests/gen12lp/test_l0_gfx_core_helper_gen12lp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,7 +18,6 @@ namespace ult { HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE); -HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE); using L0GfxCoreHelperTestGen12Lp = Test; diff --git a/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp b/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp index b94431a0c5..acce7697c5 100644 --- a/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -87,7 +87,7 @@ HWTEST2_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenAskingForUsmCompressionSu EXPECT_FALSE(l0GfxCoreHelper.usmCompressionSupported(hwInfo)); } -HWTEST_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned) { +HWTEST2_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned, IsAtMostXe2HpgCore) { auto printAttentionBitmask = [](uint8_t *expected, uint8_t *actual, uint32_t maxSlices, uint32_t maxSubSlicesPerSlice, uint32_t maxEuPerSubslice, uint32_t threadsPerEu, bool printBitmask = false) { auto bytesPerThread = threadsPerEu > 8 ? 2u : 1u; @@ -215,7 +215,7 @@ HWTEST_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmask EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size)); } -HWTEST_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet) { +HWTEST2_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet, IsAtMostXe2HpgCore) { auto hwInfo = *NEO::defaultHwInfo.get(); MockExecutionEnvironment executionEnvironment; auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); @@ -239,6 +239,153 @@ HWTEST_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBit EXPECT_TRUE(memoryZeroed(&data[numBytesPerThread + 1], size - numBytesPerThread - 1)); } +HWTEST2_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet, IsXe3Core) { + auto hwInfo = *NEO::defaultHwInfo.get(); + MockExecutionEnvironment executionEnvironment; + auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); + + hwInfo.gtSystemInfo.IsDynamicallyPopulated = true; + hwInfo.gtSystemInfo.MaxSlicesSupported = 4u; + hwInfo.gtSystemInfo.MaxSubSlicesSupported = 16u; + for (auto &sliceInfo : hwInfo.gtSystemInfo.SliceInfo) { + sliceInfo.Enabled = false; + } + hwInfo.gtSystemInfo.SliceInfo[2].Enabled = true; + hwInfo.gtSystemInfo.SliceInfo[3].Enabled = true; + + const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u); + const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); + const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; + const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; + + std::unique_ptr bitmask; + size_t size = 0; + + std::vector threads; + threads.push_back({0, 0, 0, 0, 8}); + threads.push_back({0, 0, 0, 1, 9}); + + threads.push_back({0, 1, 0, 0, 0}); + threads.push_back({0, 1, 0, 1, 1}); + threads.push_back({0, 1, 0, 0, 8}); + threads.push_back({0, 1, 0, 1, 9}); + + threads.push_back({0, 1, 1, 0, 0}); + threads.push_back({0, 1, 1, 1, 1}); + threads.push_back({0, 1, 1, 0, 8}); + threads.push_back({0, 1, 1, 1, 9}); + + threads.push_back({0, 2, 1, 0, 0}); + threads.push_back({0, 2, 1, 1, 1}); + threads.push_back({0, 2, 1, 0, 8}); + threads.push_back({0, 2, 1, 1, 9}); + + threads.push_back({0, 1, 2, 0, 0}); + threads.push_back({0, 1, 2, 1, 1}); + threads.push_back({0, 1, 2, 0, 8}); + threads.push_back({0, 1, 2, 1, 9}); + + auto maxSlice = hwInfo.gtSystemInfo.MaxSlicesSupported - 1; + threads.push_back({0, maxSlice, 2, 3, 0}); + threads.push_back({0, maxSlice, 2, 3, 1}); + threads.push_back({0, maxSlice, 2, 3, 8}); + threads.push_back({0, maxSlice, 2, 3, 9}); + + auto maxSubSlice = numSubslicesPerSlice - 1; + threads.push_back({0, 1, maxSubSlice, 3, 0}); + threads.push_back({0, 1, maxSubSlice, 3, 1}); + threads.push_back({0, 1, maxSubSlice, 3, 8}); + threads.push_back({0, 1, maxSubSlice, 3, 9}); + + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); + + auto data = bitmask.get(); + EXPECT_EQ(1u, data[8]); + EXPECT_EQ(1u << 1, data[9]); + + auto sliceOffset = threadsSizePerSlice; + EXPECT_EQ(1u, data[sliceOffset]); + EXPECT_EQ(1u << 1, data[sliceOffset + 1]); + + EXPECT_EQ(1u, data[sliceOffset + 8]); + EXPECT_EQ(1u << 1, data[sliceOffset + 9]); + + auto subSliceOffset = sliceOffset + numEuPerSubslice * bytesPerEu; + EXPECT_EQ(1u, data[subSliceOffset]); + EXPECT_EQ(1u << 1, data[subSliceOffset + 1]); + + EXPECT_EQ(1u, data[subSliceOffset + 8]); + EXPECT_EQ(1u << 1, data[subSliceOffset + 9]); + + size_t threadCount = 0; + for (size_t i = 0; i < size; i++) { + while (data[i]) { + if (data[i] & 0x01) { + threadCount++; + } + data[i] = data[i] >> 1; + } + } + EXPECT_EQ(threadCount, threads.size()); +} + +HWTEST2_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned, IsXe3Core) { + + auto hwInfo = *NEO::defaultHwInfo.get(); + const auto threadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); + + hwInfo.gtSystemInfo.MaxEuPerSubSlice = 16u; + hwInfo.gtSystemInfo.EUCount = hwInfo.gtSystemInfo.MaxEuPerSubSlice * hwInfo.gtSystemInfo.SubSliceCount; + hwInfo.gtSystemInfo.ThreadCount = hwInfo.gtSystemInfo.EUCount * threadsPerEu; + MockExecutionEnvironment executionEnvironment(&hwInfo); + auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); + + const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u); + const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); + const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; + const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; + auto sliceOffset = threadsSizePerSlice; + + std::unique_ptr bitmask; + size_t size = 0; + + std::vector threads; + threads.push_back({0, 0, 0, 0, 6}); + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); + + auto expectedBitmask = std::make_unique(size); + memset(expectedBitmask.get(), 0, size); + + auto returnedBitmask = bitmask.get(); + EXPECT_EQ(uint8_t(1u << 6), returnedBitmask[0]); + + threads.clear(); + threads.push_back({0, 0, 0, 1, 3}); + + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); + + returnedBitmask = bitmask.get(); + EXPECT_EQ(uint8_t(1u << 3), returnedBitmask[1]); + + threads.clear(); + threads.push_back({0, 0, 1, 1, 8}); + + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); + + returnedBitmask = bitmask.get(); + EXPECT_EQ(1u, returnedBitmask[25]); + + threads.clear(); + threads.push_back({0, 1, 0, 0, 8}); + + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); + + returnedBitmask = bitmask.get(); + EXPECT_EQ(1u, returnedBitmask[sliceOffset + 8]); +} + HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); MockExecutionEnvironment executionEnvironment; @@ -277,6 +424,48 @@ HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGe EXPECT_EQ(1u, threads[0].tileIndex); } +HWTEST2_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IsXe3Core) { + auto hwInfo = *NEO::defaultHwInfo.get(); + MockExecutionEnvironment executionEnvironment; + auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); + + std::unique_ptr bitmask; + size_t size = 0; + + uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0; + + uint32_t threadID = 8; + std::vector threadsWithAtt; + threadsWithAtt.push_back({0, 0, subsliceID, 0, threadID}); + + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); + + auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size); + + ASSERT_EQ(1u, threads.size()); + EXPECT_EQ(0u, threads[0].slice); + EXPECT_EQ(subsliceID, threads[0].subslice); + EXPECT_EQ(0u, threads[0].eu); + EXPECT_EQ(threadID, threads[0].thread); + + EXPECT_EQ(0u, threads[0].tileIndex); + + std::memset(bitmask.get(), 0, size); + threadsWithAtt.clear(); + threadID = 9; + threadsWithAtt.push_back({0, 0, 1, 5, threadID}); + + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); + threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size); + + ASSERT_EQ(1u, threads.size()); + EXPECT_EQ(0u, threads[0].slice); + EXPECT_EQ(1u, threads[0].subslice); + EXPECT_EQ(5u, threads[0].eu); + EXPECT_EQ(threadID, threads[0].thread); +} + HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); MockExecutionEnvironment executionEnvironment; @@ -340,7 +529,7 @@ HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingT } } -HWTEST_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned) { +HWTEST2_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IsXeHpcCoreOrXe2HpgCore) { auto hwInfo = *NEO::defaultHwInfo.get(); MockExecutionEnvironment executionEnvironment; auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); @@ -374,6 +563,118 @@ HWTEST_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThe } } +HWTEST2_F(L0GfxCoreHelperTest, givenEu0To1Threads6To10BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IsXe3Core) { + auto hwInfo = *NEO::defaultHwInfo.get(); + MockExecutionEnvironment executionEnvironment; + auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); + + const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u); + const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); + const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; + const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; + + auto subsliceOffset = numEuPerSubslice * bytesPerEu; + auto sliceOffset = threadsSizePerSlice; + + uint8_t data[1024] = {}; + data[0] = 0xC0; + data[1] = 0xC0; + data[8] = 0x03; + data[9] = 0x03; + data[subsliceOffset + 8] = 0x03; + data[subsliceOffset + 9] = 0x03; + data[sliceOffset + subsliceOffset + 8] = 0x03; + data[sliceOffset + subsliceOffset + 9] = 0x03; + + ze_device_thread_t expectedThreads[] = { + {0, 0, 0, 6}, + {0, 0, 0, 7}, + {0, 0, 1, 6}, + {0, 0, 1, 7}, + {0, 0, 0, 8}, + {0, 0, 0, 9}, + {0, 0, 1, 8}, + {0, 0, 1, 9}, + // subslice > 0 + {0, 1, 0, 8}, + {0, 1, 0, 9}, + {0, 1, 1, 8}, + {0, 1, 1, 9}, + // slice > 0 + {1, 1, 0, 8}, + {1, 1, 0, 9}, + {1, 1, 1, 8}, + {1, 1, 1, 9}}; + + auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, data, sizeof(data)); + ASSERT_EQ(16u, threads.size()); + + for (uint32_t i = 0; i < 16u; i++) { + EXPECT_EQ(expectedThreads[i].slice, threads[i].slice); + EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice); + EXPECT_EQ(expectedThreads[i].eu, threads[i].eu); + EXPECT_EQ(expectedThreads[i].thread, threads[i].thread); + EXPECT_EQ(0u, threads[i].tileIndex); + } +} + +HWTEST2_F(L0GfxCoreHelperTest, givenThreadsToBitmaskThenSameThreadsReturnedParsingBitmask, IsXe3Core) { + auto hwInfo = *NEO::defaultHwInfo.get(); + MockExecutionEnvironment executionEnvironment; + auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); + + hwInfo.gtSystemInfo.IsDynamicallyPopulated = true; + hwInfo.gtSystemInfo.MaxSlicesSupported = 4u; + hwInfo.gtSystemInfo.MaxSubSlicesSupported = 16u; + for (auto &sliceInfo : hwInfo.gtSystemInfo.SliceInfo) { + sliceInfo.Enabled = false; + } + hwInfo.gtSystemInfo.SliceInfo[2].Enabled = true; + hwInfo.gtSystemInfo.SliceInfo[3].Enabled = true; + + std::unique_ptr bitmask; + size_t size = 0; + + // ordering is important, byte0 of every EU is before byte1 of any EU + std::vector expectedThreads = { + {0, 0, 0, 0, 6}, + {0, 0, 0, 0, 7}, + {0, 0, 0, 1, 6}, + {0, 0, 0, 1, 7}, + {0, 0, 0, 0, 8}, + {0, 0, 0, 0, 9}, + {0, 0, 0, 1, 8}, + {0, 0, 0, 1, 9}, + {0, 0, 1, 0, 8}, + {0, 0, 1, 0, 9}, + {0, 0, 1, 1, 8}, + {0, 0, 1, 1, 9}, + {0, 1, 1, 3, 5}, + {0, 1, 1, 6, 7}, + {0, 1, 1, 0, 8}, + {0, 1, 1, 0, 9}, + {0, 1, 1, 1, 8}, + {0, 1, 1, 1, 9}, + {0, 1, 1, 2, 8}, + {0, 1, 1, 4, 9}, + {0, 2, 1, 0, 0}, + {0, 2, 2, 3, 5}}; + + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(expectedThreads, hwInfo, bitmask, size); + + auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size); + + for (uint32_t i = 0; i < expectedThreads.size(); i++) { + EXPECT_EQ(expectedThreads[i].slice, threads[i].slice); + EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice); + EXPECT_EQ(expectedThreads[i].eu, threads[i].eu); + EXPECT_EQ(expectedThreads[i].thread, threads[i].thread); + + EXPECT_EQ(0u, threads[i].tileIndex); + } +} + HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); MockExecutionEnvironment executionEnvironment; diff --git a/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_gfx_core_helper_xe_hpg_core.cpp b/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_gfx_core_helper_xe_hpg_core.cpp index 26d54e02bf..0a4ca79665 100644 --- a/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_gfx_core_helper_xe_hpg_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpg_core/test_l0_gfx_core_helper_xe_hpg_core.cpp @@ -20,7 +20,6 @@ namespace ult { HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE); -HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE); using L0GfxCoreHelperTestXeHpg = Test; diff --git a/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/test_debug_api_linux.cpp b/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/test_debug_api_linux.cpp index 0292d283b4..80d36274bb 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/test_debug_api_linux.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/test_debug_api_linux.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -5749,7 +5749,7 @@ HWTEST2_F(DebugApiLinuxTest, GivenErrorFromSynchronousAttScanWhenMultipleThreads l0GfxCoreHelperBackup.release(); } -TEST_F(DebugApiLinuxTest, GivenResumeWARequiredWhenCallingResumeThenWaIsAppliedToBitmask) { +HWTEST2_F(DebugApiLinuxTest, GivenResumeWARequiredWhenCallingResumeThenWaIsAppliedToBitmask, IsAtMostXe2HpgCore) { zet_debug_config_t config = {}; config.pid = 0x1234; diff --git a/shared/test/common/test_macros/header/common_matchers.h b/shared/test/common/test_macros/header/common_matchers.h index 8c0f0b2282..40cc299ac6 100644 --- a/shared/test/common/test_macros/header/common_matchers.h +++ b/shared/test/common/test_macros/header/common_matchers.h @@ -33,6 +33,7 @@ using IsBeforeXeHpcCore = IsBeforeGfxCore; using IsAtLeastXe2HpgCore = IsAtLeastGfxCore; using IsAtMostXe2HpgCore = IsAtMostGfxCore; using IsWithinXeHpCoreAndXe2HpgCore = IsWithinGfxCore; +using IsXeHpcCoreOrXe2HpgCore = IsAnyGfxCores; using IsWithinXeHpCoreAndXe3Core = IsWithinGfxCore; using IsXeHpOrXeHpgCore = IsAnyGfxCores;