mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
fix: change logic to calculate available thread count
don't use magic number, value depend on grf size Related-To: HSD-18039369782 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b520c64775
commit
42ca656edb
@@ -796,6 +796,15 @@ bool GfxCoreHelperHw<GfxFamily>::usmCompressionSupported(const NEO::HardwareInfo
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
uint32_t GfxCoreHelperHw<GfxFamily>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
|
||||||
|
auto maxThreadsPerEuCount = 8u;
|
||||||
|
if (grfCount == GrfConfig::largeGrfNumber) {
|
||||||
|
maxThreadsPerEuCount = 4;
|
||||||
|
}
|
||||||
|
return std::min(hwInfo.gtSystemInfo.ThreadCount, maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
uint32_t GfxCoreHelperHw<Family>::getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const {
|
uint32_t GfxCoreHelperHw<Family>::getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const {
|
||||||
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
|
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
|
||||||
|
|||||||
@@ -54,11 +54,6 @@ bool GfxCoreHelperHw<GfxFamily>::makeResidentBeforeLockNeeded(bool precondition)
|
|||||||
return precondition;
|
return precondition;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
|
||||||
uint32_t GfxCoreHelperHw<GfxFamily>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
|
|
||||||
return hwInfo.gtSystemInfo.ThreadCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
inline uint32_t GfxCoreHelperHw<GfxFamily>::calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const {
|
inline uint32_t GfxCoreHelperHw<GfxFamily>::calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const {
|
||||||
return std::min(defaultMaxGroupSize, CommonConstants::maxWorkgroupSize);
|
return std::min(defaultMaxGroupSize, CommonConstants::maxWorkgroupSize);
|
||||||
|
|||||||
@@ -86,10 +86,4 @@ size_t GfxCoreHelperHw<Family>::getPaddingForISAAllocation() const {
|
|||||||
return 0xE00;
|
return 0xE00;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Family>
|
|
||||||
uint32_t GfxCoreHelperHw<Family>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
|
|
||||||
auto maxThreadsPerEuCount = 1024u / grfCount;
|
|
||||||
return maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -115,13 +115,6 @@ bool GfxCoreHelperHw<Family>::copyThroughLockedPtrEnabled(const HardwareInfo &hw
|
|||||||
|
|
||||||
return this->isLocalMemoryEnabled(hwInfo) && !productHelper.isUnlockingLockedPtrNecessary(hwInfo);
|
return this->isLocalMemoryEnabled(hwInfo) && !productHelper.isUnlockingLockedPtrNecessary(hwInfo);
|
||||||
}
|
}
|
||||||
template <>
|
|
||||||
uint32_t GfxCoreHelperHw<Family>::calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const {
|
|
||||||
if (grfCount > GrfConfig::defaultGrfNumber) {
|
|
||||||
return hwInfo.gtSystemInfo.ThreadCount / 2u;
|
|
||||||
}
|
|
||||||
return hwInfo.gtSystemInfo.ThreadCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void GfxCoreHelperHw<Family>::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
void GfxCoreHelperHw<Family>::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ using IsAtMostXeHpcCore = IsAtMostGfxCore<IGFX_XE_HPC_CORE>;
|
|||||||
using IsBeforeXeHpcCore = IsBeforeGfxCore<IGFX_XE_HPC_CORE>;
|
using IsBeforeXeHpcCore = IsBeforeGfxCore<IGFX_XE_HPC_CORE>;
|
||||||
|
|
||||||
using IsAtLeastXe2HpgCore = IsAtLeastGfxCore<IGFX_XE2_HPG_CORE>;
|
using IsAtLeastXe2HpgCore = IsAtLeastGfxCore<IGFX_XE2_HPG_CORE>;
|
||||||
|
using IsAtMostXe2HpgCore = IsAtMostGfxCore<IGFX_XE2_HPG_CORE>;
|
||||||
|
|
||||||
using IsXeHpOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPG_CORE>;
|
using IsXeHpOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPG_CORE>;
|
||||||
using IsXeHpOrXeHpcCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;
|
using IsXeHpOrXeHpcCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;
|
||||||
|
|||||||
@@ -1398,33 +1398,6 @@ HWTEST2_F(GfxCoreHelperTest, givenGfxCoreHelperWhenFlagSetAndCallGetAmountOfAllo
|
|||||||
EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 1u);
|
EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 1u);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesAndXeHpOrXeHpgCoreWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsXeHpOrXeHpgCore) {
|
|
||||||
std::array<std::pair<uint32_t, uint32_t>, 3> grfTestInputs = {{{64, 8},
|
|
||||||
{128, 8},
|
|
||||||
{256, 4}}};
|
|
||||||
|
|
||||||
const auto &hwInfo = *defaultHwInfo;
|
|
||||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
|
||||||
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
|
|
||||||
auto expected = expectedThreadCountPerEu * hwInfo.gtSystemInfo.EUCount;
|
|
||||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
|
|
||||||
EXPECT_EQ(expected, result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(GfxCoreHelperTest, GivenModifiedGtSystemInfoAndXeHpOrXeHpgCoreWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsXeHpOrXeHpgCore) {
|
|
||||||
std::array<std::tuple<uint32_t, uint32_t, uint32_t>, 3> testInputs = {{{1, 64, 1},
|
|
||||||
{5, 128, 5},
|
|
||||||
{8, 256, 4}}};
|
|
||||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
|
||||||
auto hwInfo = hardwareInfo;
|
|
||||||
for (const auto &[threadCount, grfCount, expectedThreadCount] : testInputs) {
|
|
||||||
hwInfo.gtSystemInfo.ThreadCount = threadCount;
|
|
||||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
|
|
||||||
EXPECT_EQ(expectedThreadCount, result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(GfxCoreHelperTest, givenAtMostGen12lpPlatformWhenGettingMinimalScratchSpaceSizeThen1024IsReturned, IsGen12LP) {
|
HWTEST2_F(GfxCoreHelperTest, givenAtMostGen12lpPlatformWhenGettingMinimalScratchSpaceSizeThen1024IsReturned, IsGen12LP) {
|
||||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||||
EXPECT_EQ(1024U, gfxCoreHelper.getMinimalScratchSpaceSize());
|
EXPECT_EQ(1024U, gfxCoreHelper.getMinimalScratchSpaceSize());
|
||||||
@@ -1848,3 +1821,45 @@ HWTEST_F(GfxCoreHelperTest, whenEncodeAdditionalTimestampOffsetsThenNothingEncod
|
|||||||
GenCmdList storeRegMemList = hwParser.getCommandsList<MI_STORE_REGISTER_MEM>();
|
GenCmdList storeRegMemList = hwParser.getCommandsList<MI_STORE_REGISTER_MEM>();
|
||||||
EXPECT_EQ(0u, storeRegMemList.size());
|
EXPECT_EQ(0u, storeRegMemList.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountAndThreadCountAvailableIsBiggerThenCorrectValueIsReturned, IsAtMostXe2HpgCore) {
|
||||||
|
std::array<std::pair<uint32_t, uint32_t>, 2> grfTestInputs = {{{128, 8},
|
||||||
|
{256, 4}}};
|
||||||
|
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||||
|
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
|
||||||
|
auto expected = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount;
|
||||||
|
// force allways bigger Thread Count available
|
||||||
|
hardwareInfo.gtSystemInfo.ThreadCount = 2 * expected;
|
||||||
|
auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount);
|
||||||
|
EXPECT_EQ(expected, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(GfxCoreHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountAndThreadCountAvailableIsSmallerThenThreadCountIsReturned, IsAtMostXe2HpgCore) {
|
||||||
|
std::array<std::pair<uint32_t, uint32_t>, 2> grfTestInputs = {{
|
||||||
|
{128, 8},
|
||||||
|
{256, 4},
|
||||||
|
}};
|
||||||
|
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||||
|
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
|
||||||
|
auto calculatedThreadCount = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount;
|
||||||
|
// force thread count smaller than calculation
|
||||||
|
hardwareInfo.gtSystemInfo.ThreadCount = calculatedThreadCount / 2;
|
||||||
|
auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount);
|
||||||
|
EXPECT_EQ(hardwareInfo.gtSystemInfo.ThreadCount, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(GfxCoreHelperTest, GivenModifiedGtSystemInfoWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtMostXe2HpgCore) {
|
||||||
|
std::array<std::pair<uint32_t, uint32_t>, 2> testInputs = {{{64, 256},
|
||||||
|
{128, 512}}};
|
||||||
|
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||||
|
auto hwInfo = hardwareInfo;
|
||||||
|
for (const auto &[euCount, expectedThreadCount] : testInputs) {
|
||||||
|
// force thread count bigger than expected
|
||||||
|
hwInfo.gtSystemInfo.ThreadCount = 1024;
|
||||||
|
hwInfo.gtSystemInfo.EUCount = euCount;
|
||||||
|
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, 256);
|
||||||
|
EXPECT_EQ(expectedThreadCount, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -53,34 +53,6 @@ HWTEST2_F(GfxCoreHelperTestPvcAndLater, givenRenderEngineWhenRemapCalledThenUseC
|
|||||||
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_BCS, rootDeviceEnvironment));
|
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_BCS, rootDeviceEnvironment));
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(GfxCoreHelperTestPvcAndLater, GivenVariousValuesAndPvcAndLaterPlatformsWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtLeastXeHpcCore) {
|
|
||||||
std::array<std::pair<uint32_t, uint32_t>, 6> grfTestInputs = {{{64, 16},
|
|
||||||
{96, 10},
|
|
||||||
{128, 8},
|
|
||||||
{160, 6},
|
|
||||||
{192, 5},
|
|
||||||
{256, 4}}};
|
|
||||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
|
||||||
for (const auto &[grfCount, expectedThreadCountPerEu] : grfTestInputs) {
|
|
||||||
auto expected = expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount;
|
|
||||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hardwareInfo, grfCount);
|
|
||||||
EXPECT_EQ(expected, result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(GfxCoreHelperTestPvcAndLater, GivenModifiedGtSystemInfoAndPvcAndLaterPlatformsWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtLeastXeHpcCore) {
|
|
||||||
std::array<std::pair<uint32_t, uint32_t>, 3> testInputs = {{{64, 256},
|
|
||||||
{96, 384},
|
|
||||||
{128, 512}}};
|
|
||||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
|
||||||
auto hwInfo = hardwareInfo;
|
|
||||||
for (const auto &[euCount, expectedThreadCount] : testInputs) {
|
|
||||||
hwInfo.gtSystemInfo.EUCount = euCount;
|
|
||||||
auto result = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, 256);
|
|
||||||
EXPECT_EQ(expectedThreadCount, result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(GfxCoreHelperTestPvcAndLater, givenGfxCoreHelperWhenCheckIsUpdateTaskCountFromWaitSupportedThenReturnsTrue, IsAtLeastXeHpcCore) {
|
HWTEST2_F(GfxCoreHelperTestPvcAndLater, givenGfxCoreHelperWhenCheckIsUpdateTaskCountFromWaitSupportedThenReturnsTrue, IsAtLeastXeHpcCore) {
|
||||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user