diff --git a/shared/source/helpers/gfx_core_helper_base.inl b/shared/source/helpers/gfx_core_helper_base.inl index e448ddf41e..4decbc8b11 100644 --- a/shared/source/helpers/gfx_core_helper_base.inl +++ b/shared/source/helpers/gfx_core_helper_base.inl @@ -796,6 +796,15 @@ bool GfxCoreHelperHw::usmCompressionSupported(const NEO::HardwareInfo return false; } +template +uint32_t GfxCoreHelperHw::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const { + auto maxThreadsPerEuCount = 8u; + if (grfCount == GrfConfig::largeGrfNumber) { + maxThreadsPerEuCount = 4; + } + return std::min(hwInfo.gtSystemInfo.ThreadCount, maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount); +} + template uint32_t GfxCoreHelperHw::getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const { if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) { diff --git a/shared/source/helpers/gfx_core_helper_bdw_and_later.inl b/shared/source/helpers/gfx_core_helper_bdw_and_later.inl index 6f992bb6a2..a42543a5b7 100644 --- a/shared/source/helpers/gfx_core_helper_bdw_and_later.inl +++ b/shared/source/helpers/gfx_core_helper_bdw_and_later.inl @@ -54,11 +54,6 @@ bool GfxCoreHelperHw::makeResidentBeforeLockNeeded(bool precondition) return precondition; } -template -uint32_t GfxCoreHelperHw::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const { - return hwInfo.gtSystemInfo.ThreadCount; -} - template inline uint32_t GfxCoreHelperHw::calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const { return std::min(defaultMaxGroupSize, CommonConstants::maxWorkgroupSize); diff --git a/shared/source/helpers/gfx_core_helper_pvc_and_later.inl b/shared/source/helpers/gfx_core_helper_pvc_and_later.inl index 77dae69b1d..be88544c63 100644 --- a/shared/source/helpers/gfx_core_helper_pvc_and_later.inl +++ b/shared/source/helpers/gfx_core_helper_pvc_and_later.inl @@ -86,10 +86,4 @@ size_t GfxCoreHelperHw::getPaddingForISAAllocation() const { return 0xE00; } -template -uint32_t GfxCoreHelperHw::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const { - auto maxThreadsPerEuCount = 1024u / grfCount; - return maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount; -} - } // namespace NEO diff --git a/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp b/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp index c649eef161..b262ad4e74 100644 --- a/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp @@ -115,13 +115,6 @@ bool GfxCoreHelperHw::copyThroughLockedPtrEnabled(const HardwareInfo &hw return this->isLocalMemoryEnabled(hwInfo) && !productHelper.isUnlockingLockedPtrNecessary(hwInfo); } -template <> -uint32_t GfxCoreHelperHw::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const { - if (grfCount > GrfConfig::defaultGrfNumber) { - return hwInfo.gtSystemInfo.ThreadCount / 2u; - } - return hwInfo.gtSystemInfo.ThreadCount; -} template <> void GfxCoreHelperHw::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) const { diff --git a/shared/test/common/test_macros/header/common_matchers.h b/shared/test/common/test_macros/header/common_matchers.h index 8addbdcc55..45e828e092 100644 --- a/shared/test/common/test_macros/header/common_matchers.h +++ b/shared/test/common/test_macros/header/common_matchers.h @@ -31,6 +31,7 @@ using IsAtMostXeHpcCore = IsAtMostGfxCore; using IsBeforeXeHpcCore = IsBeforeGfxCore; using IsAtLeastXe2HpgCore = IsAtLeastGfxCore; +using IsAtMostXe2HpgCore = IsAtMostGfxCore; using IsXeHpOrXeHpgCore = IsAnyGfxCores; using IsXeHpOrXeHpcCore = IsAnyGfxCores; diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index 3ee956959a..cff9460eaf 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -1398,33 +1398,6 @@ HWTEST2_F(GfxCoreHelperTest, givenGfxCoreHelperWhenFlagSetAndCallGetAmountOfAllo EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 1u); } -HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesAndXeHpOrXeHpgCoreWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsXeHpOrXeHpgCore) { - std::array, 3> grfTestInputs = {{{64, 8}, - {128, 8}, - {256, 4}}}; - - const auto &hwInfo = *defaultHwInfo; - const auto &gfxCoreHelper = getHelper(); - for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) { - auto expected = expectedThreadCountPerEu * hwInfo.gtSystemInfo.EUCount; - auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount); - EXPECT_EQ(expected, result); - } -} - -HWTEST2_F(GfxCoreHelperTest, GivenModifiedGtSystemInfoAndXeHpOrXeHpgCoreWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsXeHpOrXeHpgCore) { - std::array, 3> testInputs = {{{1, 64, 1}, - {5, 128, 5}, - {8, 256, 4}}}; - const auto &gfxCoreHelper = getHelper(); - auto hwInfo = hardwareInfo; - for (const auto &[threadCount, grfCount, expectedThreadCount] : testInputs) { - hwInfo.gtSystemInfo.ThreadCount = threadCount; - auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount); - EXPECT_EQ(expectedThreadCount, result); - } -} - HWTEST2_F(GfxCoreHelperTest, givenAtMostGen12lpPlatformWhenGettingMinimalScratchSpaceSizeThen1024IsReturned, IsGen12LP) { const auto &gfxCoreHelper = getHelper(); EXPECT_EQ(1024U, gfxCoreHelper.getMinimalScratchSpaceSize()); @@ -1848,3 +1821,45 @@ HWTEST_F(GfxCoreHelperTest, whenEncodeAdditionalTimestampOffsetsThenNothingEncod GenCmdList storeRegMemList = hwParser.getCommandsList(); EXPECT_EQ(0u, storeRegMemList.size()); } + +HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountAndThreadCountAvailableIsBiggerThenCorrectValueIsReturned, IsAtMostXe2HpgCore) { + std::array, 2> grfTestInputs = {{{128, 8}, + {256, 4}}}; + auto &gfxCoreHelper = getHelper(); + for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) { + auto expected = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount; + // force allways bigger Thread Count available + hardwareInfo.gtSystemInfo.ThreadCount = 2 * expected; + auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount); + EXPECT_EQ(expected, result); + } +} + +HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountAndThreadCountAvailableIsSmallerThenThreadCountIsReturned, IsAtMostXe2HpgCore) { + std::array, 2> grfTestInputs = {{ + {128, 8}, + {256, 4}, + }}; + auto &gfxCoreHelper = getHelper(); + for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) { + auto calculatedThreadCount = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount; + // force thread count smaller than calculation + hardwareInfo.gtSystemInfo.ThreadCount = calculatedThreadCount / 2; + auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount); + EXPECT_EQ(hardwareInfo.gtSystemInfo.ThreadCount, result); + } +} + +HWTEST2_F(GfxCoreHelperTest, GivenModifiedGtSystemInfoWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtMostXe2HpgCore) { + std::array, 2> testInputs = {{{64, 256}, + {128, 512}}}; + auto &gfxCoreHelper = getHelper(); + auto hwInfo = hardwareInfo; + for (const auto &[euCount, expectedThreadCount] : testInputs) { + // force thread count bigger than expected + hwInfo.gtSystemInfo.ThreadCount = 1024; + hwInfo.gtSystemInfo.EUCount = euCount; + auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, 256); + EXPECT_EQ(expectedThreadCount, result); + } +} \ No newline at end of file diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests_pvc_and_later.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests_pvc_and_later.cpp index a6a3c79758..4c779200da 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests_pvc_and_later.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests_pvc_and_later.cpp @@ -53,34 +53,6 @@ HWTEST2_F(GfxCoreHelperTestPvcAndLater, givenRenderEngineWhenRemapCalledThenUseC EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_BCS, rootDeviceEnvironment)); } -HWTEST2_F(GfxCoreHelperTestPvcAndLater, GivenVariousValuesAndPvcAndLaterPlatformsWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtLeastXeHpcCore) { - std::array, 6> grfTestInputs = {{{64, 16}, - {96, 10}, - {128, 8}, - {160, 6}, - {192, 5}, - {256, 4}}}; - auto &gfxCoreHelper = getHelper(); - for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) { - auto expected = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount; - auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount); - EXPECT_EQ(expected, result); - } -} - -HWTEST2_F(GfxCoreHelperTestPvcAndLater, GivenModifiedGtSystemInfoAndPvcAndLaterPlatformsWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtLeastXeHpcCore) { - std::array, 3> testInputs = {{{64, 256}, - {96, 384}, - {128, 512}}}; - auto &gfxCoreHelper = getHelper(); - auto hwInfo = hardwareInfo; - for (const auto &[euCount, expectedThreadCount] : testInputs) { - hwInfo.gtSystemInfo.EUCount = euCount; - auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, 256); - EXPECT_EQ(expectedThreadCount, result); - } -} - HWTEST2_F(GfxCoreHelperTestPvcAndLater, givenGfxCoreHelperWhenCheckIsUpdateTaskCountFromWaitSupportedThenReturnsTrue, IsAtLeastXeHpcCore) { auto &gfxCoreHelper = getHelper();