fix: change logic to calculate available thread count

don't use magic number, value depend on grf size

Related-To: HSD-18039369782
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2024-10-07 13:44:54 +00:00
committed by Compute-Runtime-Automation
parent b520c64775
commit 42ca656edb
7 changed files with 52 additions and 73 deletions

View File

@@ -796,6 +796,15 @@ bool GfxCoreHelperHw<GfxFamily>::usmCompressionSupported(const NEO::HardwareInfo
return false;
}
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
auto maxThreadsPerEuCount = 8u;
if (grfCount == GrfConfig::largeGrfNumber) {
maxThreadsPerEuCount = 4;
}
return std::min(hwInfo.gtSystemInfo.ThreadCount, maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount);
}
template <typename Family>
uint32_t GfxCoreHelperHw<Family>::getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const {
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {

View File

@@ -54,11 +54,6 @@ bool GfxCoreHelperHw<GfxFamily>::makeResidentBeforeLockNeeded(bool precondition)
return precondition;
}
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
return hwInfo.gtSystemInfo.ThreadCount;
}
template <typename GfxFamily>
inline uint32_t GfxCoreHelperHw<GfxFamily>::calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const {
return std::min(defaultMaxGroupSize, CommonConstants::maxWorkgroupSize);

View File

@@ -86,10 +86,4 @@ size_t GfxCoreHelperHw<Family>::getPaddingForISAAllocation() const {
return 0xE00;
}
template <typename Family>
uint32_t GfxCoreHelperHw<Family>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
auto maxThreadsPerEuCount = 1024u / grfCount;
return maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount;
}
} // namespace NEO

View File

@@ -115,13 +115,6 @@ bool GfxCoreHelperHw<Family>::copyThroughLockedPtrEnabled(const HardwareInfo &hw
return this->isLocalMemoryEnabled(hwInfo) && !productHelper.isUnlockingLockedPtrNecessary(hwInfo);
}
template <>
uint32_t GfxCoreHelperHw<Family>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
if (grfCount > GrfConfig::defaultGrfNumber) {
return hwInfo.gtSystemInfo.ThreadCount / 2u;
}
return hwInfo.gtSystemInfo.ThreadCount;
}
template <>
void GfxCoreHelperHw<Family>::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) const {

View File

@@ -31,6 +31,7 @@ using IsAtMostXeHpcCore = IsAtMostGfxCore<IGFX_XE_HPC_CORE>;
using IsBeforeXeHpcCore = IsBeforeGfxCore<IGFX_XE_HPC_CORE>;
using IsAtLeastXe2HpgCore = IsAtLeastGfxCore<IGFX_XE2_HPG_CORE>;
using IsAtMostXe2HpgCore = IsAtMostGfxCore<IGFX_XE2_HPG_CORE>;
using IsXeHpOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPG_CORE>;
using IsXeHpOrXeHpcCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;

View File

@@ -1398,33 +1398,6 @@ HWTEST2_F(GfxCoreHelperTest, givenGfxCoreHelperWhenFlagSetAndCallGetAmountOfAllo
EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 1u);
}
HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesAndXeHpOrXeHpgCoreWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsXeHpOrXeHpgCore) {
std::array<std::pair<uint32_t, uint32_t>, 3> grfTestInputs = {{{64, 8},
{128, 8},
{256, 4}}};
const auto &hwInfo = *defaultHwInfo;
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
auto expected = expectedThreadCountPerEu * hwInfo.gtSystemInfo.EUCount;
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
EXPECT_EQ(expected, result);
}
}
HWTEST2_F(GfxCoreHelperTest, GivenModifiedGtSystemInfoAndXeHpOrXeHpgCoreWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsXeHpOrXeHpgCore) {
std::array<std::tuple<uint32_t, uint32_t, uint32_t>, 3> testInputs = {{{1, 64, 1},
{5, 128, 5},
{8, 256, 4}}};
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
auto hwInfo = hardwareInfo;
for (const auto &[threadCount, grfCount, expectedThreadCount] : testInputs) {
hwInfo.gtSystemInfo.ThreadCount = threadCount;
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
EXPECT_EQ(expectedThreadCount, result);
}
}
HWTEST2_F(GfxCoreHelperTest, givenAtMostGen12lpPlatformWhenGettingMinimalScratchSpaceSizeThen1024IsReturned, IsGen12LP) {
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
EXPECT_EQ(1024U, gfxCoreHelper.getMinimalScratchSpaceSize());
@@ -1848,3 +1821,45 @@ HWTEST_F(GfxCoreHelperTest, whenEncodeAdditionalTimestampOffsetsThenNothingEncod
GenCmdList storeRegMemList = hwParser.getCommandsList<MI_STORE_REGISTER_MEM>();
EXPECT_EQ(0u, storeRegMemList.size());
}
HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountAndThreadCountAvailableIsBiggerThenCorrectValueIsReturned, IsAtMostXe2HpgCore) {
std::array<std::pair<uint32_t, uint32_t>, 2> grfTestInputs = {{{128, 8},
{256, 4}}};
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
auto expected = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount;
// force allways bigger Thread Count available
hardwareInfo.gtSystemInfo.ThreadCount = 2 * expected;
auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount);
EXPECT_EQ(expected, result);
}
}
HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountAndThreadCountAvailableIsSmallerThenThreadCountIsReturned, IsAtMostXe2HpgCore) {
std::array<std::pair<uint32_t, uint32_t>, 2> grfTestInputs = {{
{128, 8},
{256, 4},
}};
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
auto calculatedThreadCount = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount;
// force thread count smaller than calculation
hardwareInfo.gtSystemInfo.ThreadCount = calculatedThreadCount / 2;
auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount);
EXPECT_EQ(hardwareInfo.gtSystemInfo.ThreadCount, result);
}
}
HWTEST2_F(GfxCoreHelperTest, GivenModifiedGtSystemInfoWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtMostXe2HpgCore) {
std::array<std::pair<uint32_t, uint32_t>, 2> testInputs = {{{64, 256},
{128, 512}}};
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
auto hwInfo = hardwareInfo;
for (const auto &[euCount, expectedThreadCount] : testInputs) {
// force thread count bigger than expected
hwInfo.gtSystemInfo.ThreadCount = 1024;
hwInfo.gtSystemInfo.EUCount = euCount;
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, 256);
EXPECT_EQ(expectedThreadCount, result);
}
}

View File

@@ -53,34 +53,6 @@ HWTEST2_F(GfxCoreHelperTestPvcAndLater, givenRenderEngineWhenRemapCalledThenUseC
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_BCS, rootDeviceEnvironment));
}
HWTEST2_F(GfxCoreHelperTestPvcAndLater, GivenVariousValuesAndPvcAndLaterPlatformsWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtLeastXeHpcCore) {
std::array<std::pair<uint32_t, uint32_t>, 6> grfTestInputs = {{{64, 16},
{96, 10},
{128, 8},
{160, 6},
{192, 5},
{256, 4}}};
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
auto expected = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount;
auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount);
EXPECT_EQ(expected, result);
}
}
HWTEST2_F(GfxCoreHelperTestPvcAndLater, GivenModifiedGtSystemInfoAndPvcAndLaterPlatformsWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtLeastXeHpcCore) {
std::array<std::pair<uint32_t, uint32_t>, 3> testInputs = {{{64, 256},
{96, 384},
{128, 512}}};
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
auto hwInfo = hardwareInfo;
for (const auto &[euCount, expectedThreadCount] : testInputs) {
hwInfo.gtSystemInfo.EUCount = euCount;
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, 256);
EXPECT_EQ(expectedThreadCount, result);
}
}
HWTEST2_F(GfxCoreHelperTestPvcAndLater, givenGfxCoreHelperWhenCheckIsUpdateTaskCountFromWaitSupportedThenReturnsTrue, IsAtLeastXeHpcCore) {
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();