fix: Fix debugger bitmask parsing and setting for new platforms
Related-to: NEO-7799 Signed-off-by: Jemale Lockett <jemale.lockett@intel.com>
This commit is contained in:
parent
83af0b77ff
commit
272edeabe6
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -15,67 +15,6 @@
|
|||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <typename Family>
|
||||
void L0GfxCoreHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
|
||||
|
||||
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
|
||||
|
||||
memset(bitmask.get(), 0, bitmaskSize);
|
||||
|
||||
for (auto &thread : threads) {
|
||||
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
|
||||
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
|
||||
uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu);
|
||||
UNRECOVERABLE_IF(thread.thread > 7);
|
||||
*euData |= (1 << thread.thread);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
std::vector<EuThread::ThreadId> L0GfxCoreHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
|
||||
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
|
||||
|
||||
std::vector<EuThread::ThreadId> threads;
|
||||
|
||||
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
|
||||
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
|
||||
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
|
||||
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
|
||||
|
||||
if (offset >= bitmaskSize) {
|
||||
return threads;
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(!bitmask);
|
||||
for (uint32_t byte = 0; byte < bytesPerEu; byte++) {
|
||||
std::bitset<8> bits(bitmask[offset + byte]);
|
||||
for (uint32_t i = 0; i < 8; i++) {
|
||||
if (bits.test(i)) {
|
||||
threads.emplace_back(tile, slice, subslice, eu, i + 8 * byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return threads;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void L0GfxCoreHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const {
|
||||
if (group.engineGroupType == NEO::EngineGroupType::linkedCopy) {
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/definitions/engine_group_types.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <typename Family>
|
||||
void L0GfxCoreHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
|
||||
|
||||
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
|
||||
|
||||
memset(bitmask.get(), 0, bitmaskSize);
|
||||
|
||||
for (auto &thread : threads) {
|
||||
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
|
||||
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
|
||||
uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu);
|
||||
UNRECOVERABLE_IF(thread.thread > 7);
|
||||
*euData |= (1 << thread.thread);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
std::vector<EuThread::ThreadId> L0GfxCoreHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
|
||||
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
|
||||
|
||||
std::vector<EuThread::ThreadId> threads;
|
||||
|
||||
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
|
||||
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
|
||||
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
|
||||
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
|
||||
|
||||
if (offset >= bitmaskSize) {
|
||||
return threads;
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(!bitmask);
|
||||
for (uint32_t byte = 0; byte < bytesPerEu; byte++) {
|
||||
std::bitset<8> bits(bitmask[offset + byte]);
|
||||
for (uint32_t i = 0; i < 8; i++) {
|
||||
if (bits.test(i)) {
|
||||
threads.emplace_back(tile, slice, subslice, eu, i + 8 * byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return threads;
|
||||
}
|
||||
} // namespace L0
|
|
@ -9,6 +9,72 @@
|
|||
|
||||
namespace L0 {
|
||||
|
||||
template <typename Family>
|
||||
void L0GfxCoreHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
|
||||
|
||||
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
|
||||
|
||||
memset(bitmask.get(), 0, bitmaskSize);
|
||||
|
||||
for (auto &thread : threads) {
|
||||
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
|
||||
|
||||
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
|
||||
UNRECOVERABLE_IF(thread.thread > 9);
|
||||
|
||||
auto euByteNum = (thread.thread / 8);
|
||||
uint8_t *euData = ptrOffset(subsliceData, euByteNum * numEuPerSubslice + thread.eu);
|
||||
|
||||
*euData |= 1 << ((thread.thread) % 8);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
std::vector<EuThread::ThreadId> L0GfxCoreHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
|
||||
const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo);
|
||||
|
||||
std::vector<EuThread::ThreadId> threads;
|
||||
|
||||
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
|
||||
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
|
||||
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
|
||||
|
||||
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;
|
||||
|
||||
if (offset >= bitmaskSize) {
|
||||
return threads;
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(!bitmask);
|
||||
for (uint32_t byte = 0; byte < bytesPerEu; byte++) {
|
||||
std::bitset<8> bits(bitmask[offset + byte]);
|
||||
for (uint32_t i = 0; i < 8; i++) {
|
||||
if (bits.test(i)) {
|
||||
threads.emplace_back(tile, slice, subslice, (((eu % (numEuPerSubslice / bytesPerEu)) * bytesPerEu)) + byte, i + 8 * (eu / (numEuPerSubslice / bytesPerEu)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return threads;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
ze_rtas_format_exp_t L0GfxCoreHelperHw<Family>::getSupportedRTASFormat() const {
|
||||
return static_cast<ze_rtas_format_exp_t>(RTASDeviceFormatInternal::version2);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
* Copyright (C) 2024-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe2_hpg_and_later.inl"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xehp_and_later.inl"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_and_later.inl"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_pvc_to_xe2.inl"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_skl_to_pvc.inl"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_and_xe_hpc.inl"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe_hpg_to_xe2_hpg.inl"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -18,7 +18,6 @@ namespace ult {
|
|||
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_GEN12LP_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE);
|
||||
|
||||
using L0GfxCoreHelperTestGen12Lp = Test<DeviceFixture>;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -87,7 +87,7 @@ HWTEST2_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenAskingForUsmCompressionSu
|
|||
EXPECT_FALSE(l0GfxCoreHelper.usmCompressionSupported(hwInfo));
|
||||
}
|
||||
|
||||
HWTEST_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned) {
|
||||
HWTEST2_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned, IsAtMostXe2HpgCore) {
|
||||
|
||||
auto printAttentionBitmask = [](uint8_t *expected, uint8_t *actual, uint32_t maxSlices, uint32_t maxSubSlicesPerSlice, uint32_t maxEuPerSubslice, uint32_t threadsPerEu, bool printBitmask = false) {
|
||||
auto bytesPerThread = threadsPerEu > 8 ? 2u : 1u;
|
||||
|
@ -215,7 +215,7 @@ HWTEST_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmask
|
|||
EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size));
|
||||
}
|
||||
|
||||
HWTEST_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet) {
|
||||
HWTEST2_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet, IsAtMostXe2HpgCore) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
|
||||
|
@ -239,6 +239,153 @@ HWTEST_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBit
|
|||
EXPECT_TRUE(memoryZeroed(&data[numBytesPerThread + 1], size - numBytesPerThread - 1));
|
||||
}
|
||||
|
||||
HWTEST2_F(L0GfxCoreHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet, IsXe3Core) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
|
||||
|
||||
hwInfo.gtSystemInfo.IsDynamicallyPopulated = true;
|
||||
hwInfo.gtSystemInfo.MaxSlicesSupported = 4u;
|
||||
hwInfo.gtSystemInfo.MaxSubSlicesSupported = 16u;
|
||||
for (auto &sliceInfo : hwInfo.gtSystemInfo.SliceInfo) {
|
||||
sliceInfo.Enabled = false;
|
||||
}
|
||||
hwInfo.gtSystemInfo.SliceInfo[2].Enabled = true;
|
||||
hwInfo.gtSystemInfo.SliceInfo[3].Enabled = true;
|
||||
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
|
||||
std::unique_ptr<uint8_t[]> bitmask;
|
||||
size_t size = 0;
|
||||
|
||||
std::vector<EuThread::ThreadId> threads;
|
||||
threads.push_back({0, 0, 0, 0, 8});
|
||||
threads.push_back({0, 0, 0, 1, 9});
|
||||
|
||||
threads.push_back({0, 1, 0, 0, 0});
|
||||
threads.push_back({0, 1, 0, 1, 1});
|
||||
threads.push_back({0, 1, 0, 0, 8});
|
||||
threads.push_back({0, 1, 0, 1, 9});
|
||||
|
||||
threads.push_back({0, 1, 1, 0, 0});
|
||||
threads.push_back({0, 1, 1, 1, 1});
|
||||
threads.push_back({0, 1, 1, 0, 8});
|
||||
threads.push_back({0, 1, 1, 1, 9});
|
||||
|
||||
threads.push_back({0, 2, 1, 0, 0});
|
||||
threads.push_back({0, 2, 1, 1, 1});
|
||||
threads.push_back({0, 2, 1, 0, 8});
|
||||
threads.push_back({0, 2, 1, 1, 9});
|
||||
|
||||
threads.push_back({0, 1, 2, 0, 0});
|
||||
threads.push_back({0, 1, 2, 1, 1});
|
||||
threads.push_back({0, 1, 2, 0, 8});
|
||||
threads.push_back({0, 1, 2, 1, 9});
|
||||
|
||||
auto maxSlice = hwInfo.gtSystemInfo.MaxSlicesSupported - 1;
|
||||
threads.push_back({0, maxSlice, 2, 3, 0});
|
||||
threads.push_back({0, maxSlice, 2, 3, 1});
|
||||
threads.push_back({0, maxSlice, 2, 3, 8});
|
||||
threads.push_back({0, maxSlice, 2, 3, 9});
|
||||
|
||||
auto maxSubSlice = numSubslicesPerSlice - 1;
|
||||
threads.push_back({0, 1, maxSubSlice, 3, 0});
|
||||
threads.push_back({0, 1, maxSubSlice, 3, 1});
|
||||
threads.push_back({0, 1, maxSubSlice, 3, 8});
|
||||
threads.push_back({0, 1, maxSubSlice, 3, 9});
|
||||
|
||||
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
|
||||
|
||||
auto data = bitmask.get();
|
||||
EXPECT_EQ(1u, data[8]);
|
||||
EXPECT_EQ(1u << 1, data[9]);
|
||||
|
||||
auto sliceOffset = threadsSizePerSlice;
|
||||
EXPECT_EQ(1u, data[sliceOffset]);
|
||||
EXPECT_EQ(1u << 1, data[sliceOffset + 1]);
|
||||
|
||||
EXPECT_EQ(1u, data[sliceOffset + 8]);
|
||||
EXPECT_EQ(1u << 1, data[sliceOffset + 9]);
|
||||
|
||||
auto subSliceOffset = sliceOffset + numEuPerSubslice * bytesPerEu;
|
||||
EXPECT_EQ(1u, data[subSliceOffset]);
|
||||
EXPECT_EQ(1u << 1, data[subSliceOffset + 1]);
|
||||
|
||||
EXPECT_EQ(1u, data[subSliceOffset + 8]);
|
||||
EXPECT_EQ(1u << 1, data[subSliceOffset + 9]);
|
||||
|
||||
size_t threadCount = 0;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
while (data[i]) {
|
||||
if (data[i] & 0x01) {
|
||||
threadCount++;
|
||||
}
|
||||
data[i] = data[i] >> 1;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(threadCount, threads.size());
|
||||
}
|
||||
|
||||
HWTEST2_F(L0GfxCoreHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned, IsXe3Core) {
|
||||
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
const auto threadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
|
||||
hwInfo.gtSystemInfo.MaxEuPerSubSlice = 16u;
|
||||
hwInfo.gtSystemInfo.EUCount = hwInfo.gtSystemInfo.MaxEuPerSubSlice * hwInfo.gtSystemInfo.SubSliceCount;
|
||||
hwInfo.gtSystemInfo.ThreadCount = hwInfo.gtSystemInfo.EUCount * threadsPerEu;
|
||||
MockExecutionEnvironment executionEnvironment(&hwInfo);
|
||||
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
|
||||
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
auto sliceOffset = threadsSizePerSlice;
|
||||
|
||||
std::unique_ptr<uint8_t[]> bitmask;
|
||||
size_t size = 0;
|
||||
|
||||
std::vector<EuThread::ThreadId> threads;
|
||||
threads.push_back({0, 0, 0, 0, 6});
|
||||
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
|
||||
|
||||
auto expectedBitmask = std::make_unique<uint8_t[]>(size);
|
||||
memset(expectedBitmask.get(), 0, size);
|
||||
|
||||
auto returnedBitmask = bitmask.get();
|
||||
EXPECT_EQ(uint8_t(1u << 6), returnedBitmask[0]);
|
||||
|
||||
threads.clear();
|
||||
threads.push_back({0, 0, 0, 1, 3});
|
||||
|
||||
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
|
||||
|
||||
returnedBitmask = bitmask.get();
|
||||
EXPECT_EQ(uint8_t(1u << 3), returnedBitmask[1]);
|
||||
|
||||
threads.clear();
|
||||
threads.push_back({0, 0, 1, 1, 8});
|
||||
|
||||
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
|
||||
|
||||
returnedBitmask = bitmask.get();
|
||||
EXPECT_EQ(1u, returnedBitmask[25]);
|
||||
|
||||
threads.clear();
|
||||
threads.push_back({0, 1, 0, 0, 8});
|
||||
|
||||
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size);
|
||||
|
||||
returnedBitmask = bitmask.get();
|
||||
EXPECT_EQ(1u, returnedBitmask[sliceOffset + 8]);
|
||||
}
|
||||
|
||||
HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
|
@ -277,6 +424,48 @@ HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGe
|
|||
EXPECT_EQ(1u, threads[0].tileIndex);
|
||||
}
|
||||
|
||||
HWTEST2_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IsXe3Core) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
|
||||
|
||||
std::unique_ptr<uint8_t[]> bitmask;
|
||||
size_t size = 0;
|
||||
|
||||
uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0;
|
||||
|
||||
uint32_t threadID = 8;
|
||||
std::vector<EuThread::ThreadId> threadsWithAtt;
|
||||
threadsWithAtt.push_back({0, 0, subsliceID, 0, threadID});
|
||||
|
||||
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size);
|
||||
|
||||
auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
|
||||
|
||||
ASSERT_EQ(1u, threads.size());
|
||||
EXPECT_EQ(0u, threads[0].slice);
|
||||
EXPECT_EQ(subsliceID, threads[0].subslice);
|
||||
EXPECT_EQ(0u, threads[0].eu);
|
||||
EXPECT_EQ(threadID, threads[0].thread);
|
||||
|
||||
EXPECT_EQ(0u, threads[0].tileIndex);
|
||||
|
||||
std::memset(bitmask.get(), 0, size);
|
||||
threadsWithAtt.clear();
|
||||
threadID = 9;
|
||||
threadsWithAtt.push_back({0, 0, 1, 5, threadID});
|
||||
|
||||
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size);
|
||||
threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
|
||||
|
||||
ASSERT_EQ(1u, threads.size());
|
||||
EXPECT_EQ(0u, threads[0].slice);
|
||||
EXPECT_EQ(1u, threads[0].subslice);
|
||||
EXPECT_EQ(5u, threads[0].eu);
|
||||
EXPECT_EQ(threadID, threads[0].thread);
|
||||
}
|
||||
|
||||
HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
|
@ -340,7 +529,7 @@ HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingT
|
|||
}
|
||||
}
|
||||
|
||||
HWTEST_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned) {
|
||||
HWTEST2_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IsXeHpcCoreOrXe2HpgCore) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
|
||||
|
@ -374,6 +563,118 @@ HWTEST_F(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThe
|
|||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(L0GfxCoreHelperTest, givenEu0To1Threads6To10BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IsXe3Core) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
|
||||
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
|
||||
|
||||
auto subsliceOffset = numEuPerSubslice * bytesPerEu;
|
||||
auto sliceOffset = threadsSizePerSlice;
|
||||
|
||||
uint8_t data[1024] = {};
|
||||
data[0] = 0xC0;
|
||||
data[1] = 0xC0;
|
||||
data[8] = 0x03;
|
||||
data[9] = 0x03;
|
||||
data[subsliceOffset + 8] = 0x03;
|
||||
data[subsliceOffset + 9] = 0x03;
|
||||
data[sliceOffset + subsliceOffset + 8] = 0x03;
|
||||
data[sliceOffset + subsliceOffset + 9] = 0x03;
|
||||
|
||||
ze_device_thread_t expectedThreads[] = {
|
||||
{0, 0, 0, 6},
|
||||
{0, 0, 0, 7},
|
||||
{0, 0, 1, 6},
|
||||
{0, 0, 1, 7},
|
||||
{0, 0, 0, 8},
|
||||
{0, 0, 0, 9},
|
||||
{0, 0, 1, 8},
|
||||
{0, 0, 1, 9},
|
||||
// subslice > 0
|
||||
{0, 1, 0, 8},
|
||||
{0, 1, 0, 9},
|
||||
{0, 1, 1, 8},
|
||||
{0, 1, 1, 9},
|
||||
// slice > 0
|
||||
{1, 1, 0, 8},
|
||||
{1, 1, 0, 9},
|
||||
{1, 1, 1, 8},
|
||||
{1, 1, 1, 9}};
|
||||
|
||||
auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, data, sizeof(data));
|
||||
ASSERT_EQ(16u, threads.size());
|
||||
|
||||
for (uint32_t i = 0; i < 16u; i++) {
|
||||
EXPECT_EQ(expectedThreads[i].slice, threads[i].slice);
|
||||
EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice);
|
||||
EXPECT_EQ(expectedThreads[i].eu, threads[i].eu);
|
||||
EXPECT_EQ(expectedThreads[i].thread, threads[i].thread);
|
||||
EXPECT_EQ(0u, threads[i].tileIndex);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(L0GfxCoreHelperTest, givenThreadsToBitmaskThenSameThreadsReturnedParsingBitmask, IsXe3Core) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<L0GfxCoreHelper>();
|
||||
|
||||
hwInfo.gtSystemInfo.IsDynamicallyPopulated = true;
|
||||
hwInfo.gtSystemInfo.MaxSlicesSupported = 4u;
|
||||
hwInfo.gtSystemInfo.MaxSubSlicesSupported = 16u;
|
||||
for (auto &sliceInfo : hwInfo.gtSystemInfo.SliceInfo) {
|
||||
sliceInfo.Enabled = false;
|
||||
}
|
||||
hwInfo.gtSystemInfo.SliceInfo[2].Enabled = true;
|
||||
hwInfo.gtSystemInfo.SliceInfo[3].Enabled = true;
|
||||
|
||||
std::unique_ptr<uint8_t[]> bitmask;
|
||||
size_t size = 0;
|
||||
|
||||
// ordering is important, byte0 of every EU is before byte1 of any EU
|
||||
std::vector<EuThread::ThreadId> expectedThreads = {
|
||||
{0, 0, 0, 0, 6},
|
||||
{0, 0, 0, 0, 7},
|
||||
{0, 0, 0, 1, 6},
|
||||
{0, 0, 0, 1, 7},
|
||||
{0, 0, 0, 0, 8},
|
||||
{0, 0, 0, 0, 9},
|
||||
{0, 0, 0, 1, 8},
|
||||
{0, 0, 0, 1, 9},
|
||||
{0, 0, 1, 0, 8},
|
||||
{0, 0, 1, 0, 9},
|
||||
{0, 0, 1, 1, 8},
|
||||
{0, 0, 1, 1, 9},
|
||||
{0, 1, 1, 3, 5},
|
||||
{0, 1, 1, 6, 7},
|
||||
{0, 1, 1, 0, 8},
|
||||
{0, 1, 1, 0, 9},
|
||||
{0, 1, 1, 1, 8},
|
||||
{0, 1, 1, 1, 9},
|
||||
{0, 1, 1, 2, 8},
|
||||
{0, 1, 1, 4, 9},
|
||||
{0, 2, 1, 0, 0},
|
||||
{0, 2, 2, 3, 5}};
|
||||
|
||||
l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(expectedThreads, hwInfo, bitmask, size);
|
||||
|
||||
auto threads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
|
||||
|
||||
for (uint32_t i = 0; i < expectedThreads.size(); i++) {
|
||||
EXPECT_EQ(expectedThreads[i].slice, threads[i].slice);
|
||||
EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice);
|
||||
EXPECT_EQ(expectedThreads[i].eu, threads[i].eu);
|
||||
EXPECT_EQ(expectedThreads[i].thread, threads[i].thread);
|
||||
|
||||
EXPECT_EQ(0u, threads[i].tileIndex);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
|
|
|
@ -20,7 +20,6 @@ namespace ult {
|
|||
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(L0GfxCoreHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE);
|
||||
|
||||
using L0GfxCoreHelperTestXeHpg = Test<DeviceFixture>;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
* Copyright (C) 2022-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -5749,7 +5749,7 @@ HWTEST2_F(DebugApiLinuxTest, GivenErrorFromSynchronousAttScanWhenMultipleThreads
|
|||
l0GfxCoreHelperBackup.release();
|
||||
}
|
||||
|
||||
TEST_F(DebugApiLinuxTest, GivenResumeWARequiredWhenCallingResumeThenWaIsAppliedToBitmask) {
|
||||
HWTEST2_F(DebugApiLinuxTest, GivenResumeWARequiredWhenCallingResumeThenWaIsAppliedToBitmask, IsAtMostXe2HpgCore) {
|
||||
zet_debug_config_t config = {};
|
||||
config.pid = 0x1234;
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ using IsBeforeXeHpcCore = IsBeforeGfxCore<IGFX_XE_HPC_CORE>;
|
|||
using IsAtLeastXe2HpgCore = IsAtLeastGfxCore<IGFX_XE2_HPG_CORE>;
|
||||
using IsAtMostXe2HpgCore = IsAtMostGfxCore<IGFX_XE2_HPG_CORE>;
|
||||
using IsWithinXeHpCoreAndXe2HpgCore = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE2_HPG_CORE>;
|
||||
using IsXeHpcCoreOrXe2HpgCore = IsAnyGfxCores<IGFX_XE_HPC_CORE, IGFX_XE2_HPG_CORE>;
|
||||
using IsWithinXeHpCoreAndXe3Core = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE3_CORE>;
|
||||
|
||||
using IsXeHpOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPG_CORE>;
|
||||
|
|
Loading…
Reference in New Issue