Revert "fix: correct limitation for num threads per thread group"

This reverts commit 6ad4ad41b1.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2025-05-23 02:11:46 +02:00
committed by Compute-Runtime-Automation
parent d6849a5605
commit 593c9e76f2
26 changed files with 223 additions and 166 deletions

View File

@@ -1649,16 +1649,18 @@ HWTEST_F(GfxCoreHelperTest, givenNumGrfAndSimdSizeWhenAdjustingMaxWorkGroupSizeT
constexpr auto defaultMaxGroupSize = 1024u;
uint32_t simdSize = 16u;
uint32_t isHwLocalIdGeneration = true;
uint32_t numGrfRequired = GrfConfig::largeGrfNumber;
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize, rootDeviceEnvironment));
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, isHwLocalIdGeneration, defaultMaxGroupSize, rootDeviceEnvironment));
simdSize = 32u;
numGrfRequired = GrfConfig::largeGrfNumber;
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize, rootDeviceEnvironment));
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, isHwLocalIdGeneration, defaultMaxGroupSize, rootDeviceEnvironment));
simdSize = 16u;
isHwLocalIdGeneration = false;
numGrfRequired = GrfConfig::defaultGrfNumber;
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize, rootDeviceEnvironment));
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, isHwLocalIdGeneration, defaultMaxGroupSize, rootDeviceEnvironment));
}
HWTEST2_F(GfxCoreHelperTest, givenParamsWhenCalculateNumThreadsPerThreadGroupThenMethodReturnProperValue, IsAtMostXeHpcCore) {
@@ -1676,7 +1678,7 @@ HWTEST2_F(GfxCoreHelperTest, givenParamsWhenCalculateNumThreadsPerThreadGroupThe
}};
for (auto &[simtSize, totalWgSize, expectedNumThreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, 32u, rootDeviceEnvironment));
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, 32u, true, rootDeviceEnvironment));
}
}
@@ -1686,19 +1688,19 @@ HWTEST_F(GfxCoreHelperTest, givenFlagRemoveRestrictionsOnNumberOfThreadsInGpgpuT
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
std::array<std::array<uint32_t, 4>, 8> values = {{
{32u, 32u, 128u, 1u}, // SIMT Size, totalWorkItems,Grf size, Max Num of threads
{32u, 64u, 32u, 2u},
{32u, 128u, 256u, 4u},
{32u, 1024u, 128u, 32u},
{16u, 32u, 32u, 2u},
{16u, 64u, 256u, 4u},
{16u, 128u, 128u, 8u},
{16u, 1024u, 256u, 64u},
std::array<std::array<uint32_t, 5>, 8> values = {{
{32u, 32u, 128u, 1, 1u}, // SIMT Size, totalWorkItems, Max Num of threads, Grf size, Hw local id generation
{32u, 64u, 32u, 1, 2u},
{32u, 128u, 256u, 1, 4u},
{32u, 1024u, 128u, 1, 32u},
{16u, 32u, 32u, 0, 2u},
{16u, 64u, 256u, 0, 4u},
{16u, 128u, 128u, 0, 8u},
{16u, 1024u, 256u, 0, 64u},
}};
for (auto &[simtSize, totalWgSize, grfsize, expectedNumThreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfsize, rootDeviceEnvironment));
for (auto &[simtSize, totalWgSize, grfsize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfsize, isHwLocalIdGeneration, rootDeviceEnvironment));
}
}

View File

@@ -81,7 +81,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP
NEO::MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
auto localIdsSize = alignUp(PerThreadDataHelper::getPerThreadDataSizeTotal(implicitArgs.v0.simdWidth, 32u /* grfSize */, GrfConfig::defaultGrfNumber /* numGrf */, 3u /* num channels */, totalWorkgroupSize, rootDeviceEnvironment), MemoryConstants::cacheLineSize);
auto localIdsSize = alignUp(PerThreadDataHelper::getPerThreadDataSizeTotal(implicitArgs.v0.simdWidth, 32u /* grfSize */, GrfConfig::defaultGrfNumber /* numGrf */, 3u /* num channels */, totalWorkgroupSize, false, rootDeviceEnvironment), MemoryConstants::cacheLineSize);
EXPECT_EQ(localIdsSize + ImplicitArgsV0::getAlignedSize(), ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, false, rootDeviceEnvironment));
}

View File

@@ -807,17 +807,23 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenNumGrfAndSimdSizeWhenAdjus
auto defaultMaxWorkGroupSize = 2048u;
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
std::array<std::array<uint32_t, 3>, 6> values = {{
{GrfConfig::defaultGrfNumber, 16u, 1024u}, // Grf Size, SIMT Size, Max Num of threads
{GrfConfig::defaultGrfNumber, 32u, 1024u},
{GrfConfig::largeGrfNumber, 16u, 512u},
{GrfConfig::largeGrfNumber, 32u, 1024u},
{GrfConfig::defaultGrfNumber, 1u, 32u},
{GrfConfig::largeGrfNumber, 1u, 32u},
std::array<std::array<uint32_t, 4>, 12> values = {{
{GrfConfig::defaultGrfNumber, 16u, 0u, 1024u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
{GrfConfig::defaultGrfNumber, 16u, 1u, 1024u},
{GrfConfig::defaultGrfNumber, 32u, 1u, 1024u},
{GrfConfig::defaultGrfNumber, 32u, 0u, 2048u},
{GrfConfig::largeGrfNumber, 16u, 0u, 512u},
{GrfConfig::largeGrfNumber, 16u, 1u, 512u},
{GrfConfig::largeGrfNumber, 32u, 0u, 1024u},
{GrfConfig::largeGrfNumber, 32u, 1u, 1024u},
{GrfConfig::defaultGrfNumber, 1u, 1u, 32u},
{GrfConfig::defaultGrfNumber, 1u, 0u, 64u},
{GrfConfig::largeGrfNumber, 1u, 0u, 32u},
{GrfConfig::largeGrfNumber, 1u, 1u, 32u},
}};
for (auto &[grfSize, simtSize, expectedNumThreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, defaultMaxWorkGroupSize, rootDeviceEnvironment));
for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, isHwLocalIdGeneration, defaultMaxWorkGroupSize, rootDeviceEnvironment));
}
}
@@ -825,17 +831,23 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenParamsWhenCalculateNumThre
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto totalWgSize = 2048u;
std::array<std::array<uint32_t, 3>, 6> values = {{
{GrfConfig::defaultGrfNumber, 16u, 64u}, // Grf Size, SIMT Size, Max Num of threads
{GrfConfig::defaultGrfNumber, 32u, 32u},
{GrfConfig::defaultGrfNumber, 1u, 32u},
{GrfConfig::largeGrfNumber, 16u, 32u},
{GrfConfig::largeGrfNumber, 32u, 32u},
{GrfConfig::largeGrfNumber, 1u, 32u},
std::array<std::array<uint32_t, 4>, 12> values = {{
{GrfConfig::defaultGrfNumber, 16u, 0u, 64u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
{GrfConfig::defaultGrfNumber, 16u, 1u, 64u},
{GrfConfig::defaultGrfNumber, 32u, 1u, 32u},
{GrfConfig::defaultGrfNumber, 32u, 0u, 64u},
{GrfConfig::defaultGrfNumber, 1u, 1u, 32u},
{GrfConfig::defaultGrfNumber, 1u, 0u, 64u},
{GrfConfig::largeGrfNumber, 16u, 0u, 32u},
{GrfConfig::largeGrfNumber, 16u, 1u, 32u},
{GrfConfig::largeGrfNumber, 32u, 0u, 32u},
{GrfConfig::largeGrfNumber, 32u, 1u, 32u},
{GrfConfig::largeGrfNumber, 1u, 0u, 32u},
{GrfConfig::largeGrfNumber, 1u, 1u, 32u},
}};
for (auto &[grfSize, simtSize, expectedNumThdreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThdreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, rootDeviceEnvironment));
for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThdreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThdreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, isHwLocalIdGeneration, rootDeviceEnvironment));
}
}

View File

@@ -754,26 +754,41 @@ XE3_CORETEST_F(GfxCoreHelperTestsXe3Core, givenNumGrfAndSimdSizeWhenAdjustingMax
auto defaultMaxWorkGroupSize = 2048u;
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
std::array<std::array<uint32_t, 3>, 15> values = {{
{128u, 16u, 1024u}, // Grf Size, SIMT Size, Max Num of threads
{128u, 32u, 1024u},
{160u, 16u, 768u},
{160u, 32u, 1024u},
{192u, 16u, 640u},
{192u, 32u, 1024u},
{256u, 16u, 512u},
{256u, 32u, 1024u},
{512u, 16u, 256u},
{512u, 32u, 512u},
{128u, 1u, 32u},
{160u, 1u, 32u},
{192u, 1u, 32u},
{256u, 1u, 32u},
{512u, 1u, 16u},
std::array<std::array<uint32_t, 4>, 30> values = {{
{128u, 16u, 0u, 1024u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
{128u, 16u, 1u, 1024u},
{128u, 32u, 1u, 1024u},
{128u, 32u, 0u, 2048u},
{160u, 16u, 0u, 768u},
{160u, 16u, 1u, 768u},
{160u, 32u, 1u, 1024u},
{160u, 32u, 0u, 1536u},
{192u, 16u, 0u, 640u},
{192u, 16u, 1u, 640u},
{192u, 32u, 1u, 1024u},
{192u, 32u, 0u, 1280u},
{256u, 16u, 0u, 512u},
{256u, 16u, 1u, 512u},
{256u, 32u, 1u, 1024u},
{256u, 32u, 0u, 1024u},
{512u, 16u, 0u, 256u},
{512u, 16u, 1u, 256u},
{512u, 32u, 1u, 512u},
{512u, 32u, 0u, 512u},
{128u, 1u, 1u, 32u},
{128u, 1u, 0u, 64u},
{160u, 1u, 1u, 32u},
{160u, 1u, 0u, 48u},
{192u, 1u, 1u, 32u},
{192u, 1u, 0u, 40u},
{256u, 1u, 1u, 32u},
{256u, 1u, 0u, 32u},
{512u, 1u, 1u, 16u},
{512u, 1u, 0u, 16u},
}};
for (auto &[grfSize, simtSize, expectedNumThreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, defaultMaxWorkGroupSize, rootDeviceEnvironment));
for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, isHwLocalIdGeneration, defaultMaxWorkGroupSize, rootDeviceEnvironment));
}
}
@@ -786,26 +801,41 @@ XE3_CORETEST_F(GfxCoreHelperTestsXe3Core, givenParamsWhenCalculateNumThreadsPerT
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto totalWgSize = 2048u;
std::array<std::array<uint32_t, 3>, 15> values = {{
{128u, 16u, 64u}, // Grf Size, SIMT Size, Max Num of threads
{128u, 32u, 32u},
{128u, 1u, 32u},
{160u, 16u, 48u},
{160u, 32u, 32u},
{160u, 1u, 32u},
{192u, 16u, 40u},
{192u, 32u, 32u},
{192u, 1u, 32u},
{256u, 16u, 32u},
{256u, 32u, 32u},
{256u, 1u, 32u},
{512u, 16u, 16u},
{512u, 32u, 16u},
{512u, 1u, 16u},
std::array<std::array<uint32_t, 4>, 30> values = {{
{128u, 16u, 0u, 64u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
{128u, 16u, 1u, 64u},
{128u, 32u, 1u, 32u},
{128u, 32u, 0u, 64u},
{128u, 1u, 1u, 32u},
{128u, 1u, 0u, 64u},
{160u, 16u, 0u, 48u},
{160u, 16u, 1u, 48u},
{160u, 32u, 1u, 32u},
{160u, 32u, 0u, 48u},
{160u, 1u, 1u, 32u},
{160u, 1u, 0u, 48u},
{192u, 16u, 0u, 40u},
{192u, 16u, 1u, 40u},
{192u, 32u, 1u, 32u},
{192u, 32u, 0u, 40u},
{192u, 1u, 1u, 32u},
{192u, 1u, 0u, 40u},
{256u, 16u, 0u, 32u},
{256u, 16u, 1u, 32u},
{256u, 32u, 1u, 32u},
{256u, 32u, 0u, 32u},
{256u, 1u, 1u, 32u},
{256u, 1u, 0u, 32u},
{512u, 16u, 0u, 16u},
{512u, 16u, 1u, 16u},
{512u, 32u, 1u, 16u},
{512u, 32u, 0u, 16u},
{512u, 1u, 1u, 16u},
{512u, 1u, 0u, 16u},
}};
for (auto &[grfSize, simtSize, expectedNumThreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, rootDeviceEnvironment));
for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, isHwLocalIdGeneration, rootDeviceEnvironment));
}
}