mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-18 22:08:53 +08:00
fix: change logic to calculate available thread count
don't use magic number, value depend on grf size Related-To: HSD-18039369782 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b520c64775
commit
42ca656edb
@@ -796,6 +796,15 @@ bool GfxCoreHelperHw<GfxFamily>::usmCompressionSupported(const NEO::HardwareInfo
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t GfxCoreHelperHw<GfxFamily>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
|
||||
auto maxThreadsPerEuCount = 8u;
|
||||
if (grfCount == GrfConfig::largeGrfNumber) {
|
||||
maxThreadsPerEuCount = 4;
|
||||
}
|
||||
return std::min(hwInfo.gtSystemInfo.ThreadCount, maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t GfxCoreHelperHw<Family>::getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const {
|
||||
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
|
||||
|
||||
@@ -54,11 +54,6 @@ bool GfxCoreHelperHw<GfxFamily>::makeResidentBeforeLockNeeded(bool precondition)
|
||||
return precondition;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t GfxCoreHelperHw<GfxFamily>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
|
||||
return hwInfo.gtSystemInfo.ThreadCount;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline uint32_t GfxCoreHelperHw<GfxFamily>::calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const {
|
||||
return std::min(defaultMaxGroupSize, CommonConstants::maxWorkgroupSize);
|
||||
|
||||
@@ -86,10 +86,4 @@ size_t GfxCoreHelperHw<Family>::getPaddingForISAAllocation() const {
|
||||
return 0xE00;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t GfxCoreHelperHw<Family>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
|
||||
auto maxThreadsPerEuCount = 1024u / grfCount;
|
||||
return maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -115,13 +115,6 @@ bool GfxCoreHelperHw<Family>::copyThroughLockedPtrEnabled(const HardwareInfo &hw
|
||||
|
||||
return this->isLocalMemoryEnabled(hwInfo) && !productHelper.isUnlockingLockedPtrNecessary(hwInfo);
|
||||
}
|
||||
template <>
|
||||
uint32_t GfxCoreHelperHw<Family>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
|
||||
if (grfCount > GrfConfig::defaultGrfNumber) {
|
||||
return hwInfo.gtSystemInfo.ThreadCount / 2u;
|
||||
}
|
||||
return hwInfo.gtSystemInfo.ThreadCount;
|
||||
}
|
||||
|
||||
template <>
|
||||
void GfxCoreHelperHw<Family>::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
|
||||
@@ -31,6 +31,7 @@ using IsAtMostXeHpcCore = IsAtMostGfxCore<IGFX_XE_HPC_CORE>;
|
||||
using IsBeforeXeHpcCore = IsBeforeGfxCore<IGFX_XE_HPC_CORE>;
|
||||
|
||||
using IsAtLeastXe2HpgCore = IsAtLeastGfxCore<IGFX_XE2_HPG_CORE>;
|
||||
using IsAtMostXe2HpgCore = IsAtMostGfxCore<IGFX_XE2_HPG_CORE>;
|
||||
|
||||
using IsXeHpOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPG_CORE>;
|
||||
using IsXeHpOrXeHpcCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;
|
||||
|
||||
@@ -1398,33 +1398,6 @@ HWTEST2_F(GfxCoreHelperTest, givenGfxCoreHelperWhenFlagSetAndCallGetAmountOfAllo
|
||||
EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 1u);
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesAndXeHpOrXeHpgCoreWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsXeHpOrXeHpgCore) {
|
||||
std::array<std::pair<uint32_t, uint32_t>, 3> grfTestInputs = {{{64, 8},
|
||||
{128, 8},
|
||||
{256, 4}}};
|
||||
|
||||
const auto &hwInfo = *defaultHwInfo;
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
|
||||
auto expected = expectedThreadCountPerEu * hwInfo.gtSystemInfo.EUCount;
|
||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
|
||||
EXPECT_EQ(expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTest, GivenModifiedGtSystemInfoAndXeHpOrXeHpgCoreWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsXeHpOrXeHpgCore) {
|
||||
std::array<std::tuple<uint32_t, uint32_t, uint32_t>, 3> testInputs = {{{1, 64, 1},
|
||||
{5, 128, 5},
|
||||
{8, 256, 4}}};
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
auto hwInfo = hardwareInfo;
|
||||
for (const auto &[threadCount, grfCount, expectedThreadCount] : testInputs) {
|
||||
hwInfo.gtSystemInfo.ThreadCount = threadCount;
|
||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
|
||||
EXPECT_EQ(expectedThreadCount, result);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTest, givenAtMostGen12lpPlatformWhenGettingMinimalScratchSpaceSizeThen1024IsReturned, IsGen12LP) {
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
EXPECT_EQ(1024U, gfxCoreHelper.getMinimalScratchSpaceSize());
|
||||
@@ -1848,3 +1821,45 @@ HWTEST_F(GfxCoreHelperTest, whenEncodeAdditionalTimestampOffsetsThenNothingEncod
|
||||
GenCmdList storeRegMemList = hwParser.getCommandsList<MI_STORE_REGISTER_MEM>();
|
||||
EXPECT_EQ(0u, storeRegMemList.size());
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountAndThreadCountAvailableIsBiggerThenCorrectValueIsReturned, IsAtMostXe2HpgCore) {
|
||||
std::array<std::pair<uint32_t, uint32_t>, 2> grfTestInputs = {{{128, 8},
|
||||
{256, 4}}};
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
|
||||
auto expected = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount;
|
||||
// force allways bigger Thread Count available
|
||||
hardwareInfo.gtSystemInfo.ThreadCount = 2 * expected;
|
||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount);
|
||||
EXPECT_EQ(expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountAndThreadCountAvailableIsSmallerThenThreadCountIsReturned, IsAtMostXe2HpgCore) {
|
||||
std::array<std::pair<uint32_t, uint32_t>, 2> grfTestInputs = {{
|
||||
{128, 8},
|
||||
{256, 4},
|
||||
}};
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
|
||||
auto calculatedThreadCount = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount;
|
||||
// force thread count smaller than calculation
|
||||
hardwareInfo.gtSystemInfo.ThreadCount = calculatedThreadCount / 2;
|
||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount);
|
||||
EXPECT_EQ(hardwareInfo.gtSystemInfo.ThreadCount, result);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTest, GivenModifiedGtSystemInfoWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtMostXe2HpgCore) {
|
||||
std::array<std::pair<uint32_t, uint32_t>, 2> testInputs = {{{64, 256},
|
||||
{128, 512}}};
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
auto hwInfo = hardwareInfo;
|
||||
for (const auto &[euCount, expectedThreadCount] : testInputs) {
|
||||
// force thread count bigger than expected
|
||||
hwInfo.gtSystemInfo.ThreadCount = 1024;
|
||||
hwInfo.gtSystemInfo.EUCount = euCount;
|
||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, 256);
|
||||
EXPECT_EQ(expectedThreadCount, result);
|
||||
}
|
||||
}
|
||||
@@ -53,34 +53,6 @@ HWTEST2_F(GfxCoreHelperTestPvcAndLater, givenRenderEngineWhenRemapCalledThenUseC
|
||||
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_BCS, rootDeviceEnvironment));
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTestPvcAndLater, GivenVariousValuesAndPvcAndLaterPlatformsWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtLeastXeHpcCore) {
|
||||
std::array<std::pair<uint32_t, uint32_t>, 6> grfTestInputs = {{{64, 16},
|
||||
{96, 10},
|
||||
{128, 8},
|
||||
{160, 6},
|
||||
{192, 5},
|
||||
{256, 4}}};
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
|
||||
auto expected = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount;
|
||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount);
|
||||
EXPECT_EQ(expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTestPvcAndLater, GivenModifiedGtSystemInfoAndPvcAndLaterPlatformsWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtLeastXeHpcCore) {
|
||||
std::array<std::pair<uint32_t, uint32_t>, 3> testInputs = {{{64, 256},
|
||||
{96, 384},
|
||||
{128, 512}}};
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
auto hwInfo = hardwareInfo;
|
||||
for (const auto &[euCount, expectedThreadCount] : testInputs) {
|
||||
hwInfo.gtSystemInfo.EUCount = euCount;
|
||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, 256);
|
||||
EXPECT_EQ(expectedThreadCount, result);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTestPvcAndLater, givenGfxCoreHelperWhenCheckIsUpdateTaskCountFromWaitSupportedThenReturnsTrue, IsAtLeastXeHpcCore) {
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user