mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 18:06:32 +08:00
Revert "fix: correct limitation for num threads per thread group"
This reverts commit 6ad4ad41b1.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d6849a5605
commit
593c9e76f2
@@ -1649,16 +1649,18 @@ HWTEST_F(GfxCoreHelperTest, givenNumGrfAndSimdSizeWhenAdjustingMaxWorkGroupSizeT
|
||||
constexpr auto defaultMaxGroupSize = 1024u;
|
||||
|
||||
uint32_t simdSize = 16u;
|
||||
uint32_t isHwLocalIdGeneration = true;
|
||||
uint32_t numGrfRequired = GrfConfig::largeGrfNumber;
|
||||
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize, rootDeviceEnvironment));
|
||||
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, isHwLocalIdGeneration, defaultMaxGroupSize, rootDeviceEnvironment));
|
||||
|
||||
simdSize = 32u;
|
||||
numGrfRequired = GrfConfig::largeGrfNumber;
|
||||
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize, rootDeviceEnvironment));
|
||||
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, isHwLocalIdGeneration, defaultMaxGroupSize, rootDeviceEnvironment));
|
||||
|
||||
simdSize = 16u;
|
||||
isHwLocalIdGeneration = false;
|
||||
numGrfRequired = GrfConfig::defaultGrfNumber;
|
||||
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize, rootDeviceEnvironment));
|
||||
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, isHwLocalIdGeneration, defaultMaxGroupSize, rootDeviceEnvironment));
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTest, givenParamsWhenCalculateNumThreadsPerThreadGroupThenMethodReturnProperValue, IsAtMostXeHpcCore) {
|
||||
@@ -1676,7 +1678,7 @@ HWTEST2_F(GfxCoreHelperTest, givenParamsWhenCalculateNumThreadsPerThreadGroupThe
|
||||
}};
|
||||
|
||||
for (auto &[simtSize, totalWgSize, expectedNumThreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, 32u, rootDeviceEnvironment));
|
||||
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, 32u, true, rootDeviceEnvironment));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1686,19 +1688,19 @@ HWTEST_F(GfxCoreHelperTest, givenFlagRemoveRestrictionsOnNumberOfThreadsInGpgpuT
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
|
||||
std::array<std::array<uint32_t, 4>, 8> values = {{
|
||||
{32u, 32u, 128u, 1u}, // SIMT Size, totalWorkItems,Grf size, Max Num of threads
|
||||
{32u, 64u, 32u, 2u},
|
||||
{32u, 128u, 256u, 4u},
|
||||
{32u, 1024u, 128u, 32u},
|
||||
{16u, 32u, 32u, 2u},
|
||||
{16u, 64u, 256u, 4u},
|
||||
{16u, 128u, 128u, 8u},
|
||||
{16u, 1024u, 256u, 64u},
|
||||
std::array<std::array<uint32_t, 5>, 8> values = {{
|
||||
{32u, 32u, 128u, 1, 1u}, // SIMT Size, totalWorkItems, Max Num of threads, Grf size, Hw local id generation
|
||||
{32u, 64u, 32u, 1, 2u},
|
||||
{32u, 128u, 256u, 1, 4u},
|
||||
{32u, 1024u, 128u, 1, 32u},
|
||||
{16u, 32u, 32u, 0, 2u},
|
||||
{16u, 64u, 256u, 0, 4u},
|
||||
{16u, 128u, 128u, 0, 8u},
|
||||
{16u, 1024u, 256u, 0, 64u},
|
||||
}};
|
||||
|
||||
for (auto &[simtSize, totalWgSize, grfsize, expectedNumThreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfsize, rootDeviceEnvironment));
|
||||
for (auto &[simtSize, totalWgSize, grfsize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfsize, isHwLocalIdGeneration, rootDeviceEnvironment));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP
|
||||
|
||||
NEO::MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
|
||||
auto localIdsSize = alignUp(PerThreadDataHelper::getPerThreadDataSizeTotal(implicitArgs.v0.simdWidth, 32u /* grfSize */, GrfConfig::defaultGrfNumber /* numGrf */, 3u /* num channels */, totalWorkgroupSize, rootDeviceEnvironment), MemoryConstants::cacheLineSize);
|
||||
auto localIdsSize = alignUp(PerThreadDataHelper::getPerThreadDataSizeTotal(implicitArgs.v0.simdWidth, 32u /* grfSize */, GrfConfig::defaultGrfNumber /* numGrf */, 3u /* num channels */, totalWorkgroupSize, false, rootDeviceEnvironment), MemoryConstants::cacheLineSize);
|
||||
EXPECT_EQ(localIdsSize + ImplicitArgsV0::getAlignedSize(), ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, false, rootDeviceEnvironment));
|
||||
}
|
||||
|
||||
|
||||
@@ -807,17 +807,23 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenNumGrfAndSimdSizeWhenAdjus
|
||||
auto defaultMaxWorkGroupSize = 2048u;
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
std::array<std::array<uint32_t, 3>, 6> values = {{
|
||||
{GrfConfig::defaultGrfNumber, 16u, 1024u}, // Grf Size, SIMT Size, Max Num of threads
|
||||
{GrfConfig::defaultGrfNumber, 32u, 1024u},
|
||||
{GrfConfig::largeGrfNumber, 16u, 512u},
|
||||
{GrfConfig::largeGrfNumber, 32u, 1024u},
|
||||
{GrfConfig::defaultGrfNumber, 1u, 32u},
|
||||
{GrfConfig::largeGrfNumber, 1u, 32u},
|
||||
std::array<std::array<uint32_t, 4>, 12> values = {{
|
||||
{GrfConfig::defaultGrfNumber, 16u, 0u, 1024u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
|
||||
{GrfConfig::defaultGrfNumber, 16u, 1u, 1024u},
|
||||
{GrfConfig::defaultGrfNumber, 32u, 1u, 1024u},
|
||||
{GrfConfig::defaultGrfNumber, 32u, 0u, 2048u},
|
||||
{GrfConfig::largeGrfNumber, 16u, 0u, 512u},
|
||||
{GrfConfig::largeGrfNumber, 16u, 1u, 512u},
|
||||
{GrfConfig::largeGrfNumber, 32u, 0u, 1024u},
|
||||
{GrfConfig::largeGrfNumber, 32u, 1u, 1024u},
|
||||
{GrfConfig::defaultGrfNumber, 1u, 1u, 32u},
|
||||
{GrfConfig::defaultGrfNumber, 1u, 0u, 64u},
|
||||
{GrfConfig::largeGrfNumber, 1u, 0u, 32u},
|
||||
{GrfConfig::largeGrfNumber, 1u, 1u, 32u},
|
||||
}};
|
||||
|
||||
for (auto &[grfSize, simtSize, expectedNumThreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, defaultMaxWorkGroupSize, rootDeviceEnvironment));
|
||||
for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, isHwLocalIdGeneration, defaultMaxWorkGroupSize, rootDeviceEnvironment));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -825,17 +831,23 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenParamsWhenCalculateNumThre
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
auto totalWgSize = 2048u;
|
||||
std::array<std::array<uint32_t, 3>, 6> values = {{
|
||||
{GrfConfig::defaultGrfNumber, 16u, 64u}, // Grf Size, SIMT Size, Max Num of threads
|
||||
{GrfConfig::defaultGrfNumber, 32u, 32u},
|
||||
{GrfConfig::defaultGrfNumber, 1u, 32u},
|
||||
{GrfConfig::largeGrfNumber, 16u, 32u},
|
||||
{GrfConfig::largeGrfNumber, 32u, 32u},
|
||||
{GrfConfig::largeGrfNumber, 1u, 32u},
|
||||
std::array<std::array<uint32_t, 4>, 12> values = {{
|
||||
{GrfConfig::defaultGrfNumber, 16u, 0u, 64u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
|
||||
{GrfConfig::defaultGrfNumber, 16u, 1u, 64u},
|
||||
{GrfConfig::defaultGrfNumber, 32u, 1u, 32u},
|
||||
{GrfConfig::defaultGrfNumber, 32u, 0u, 64u},
|
||||
{GrfConfig::defaultGrfNumber, 1u, 1u, 32u},
|
||||
{GrfConfig::defaultGrfNumber, 1u, 0u, 64u},
|
||||
{GrfConfig::largeGrfNumber, 16u, 0u, 32u},
|
||||
{GrfConfig::largeGrfNumber, 16u, 1u, 32u},
|
||||
{GrfConfig::largeGrfNumber, 32u, 0u, 32u},
|
||||
{GrfConfig::largeGrfNumber, 32u, 1u, 32u},
|
||||
{GrfConfig::largeGrfNumber, 1u, 0u, 32u},
|
||||
{GrfConfig::largeGrfNumber, 1u, 1u, 32u},
|
||||
}};
|
||||
|
||||
for (auto &[grfSize, simtSize, expectedNumThdreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThdreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, rootDeviceEnvironment));
|
||||
for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThdreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThdreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, isHwLocalIdGeneration, rootDeviceEnvironment));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -754,26 +754,41 @@ XE3_CORETEST_F(GfxCoreHelperTestsXe3Core, givenNumGrfAndSimdSizeWhenAdjustingMax
|
||||
auto defaultMaxWorkGroupSize = 2048u;
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
std::array<std::array<uint32_t, 3>, 15> values = {{
|
||||
{128u, 16u, 1024u}, // Grf Size, SIMT Size, Max Num of threads
|
||||
{128u, 32u, 1024u},
|
||||
{160u, 16u, 768u},
|
||||
{160u, 32u, 1024u},
|
||||
{192u, 16u, 640u},
|
||||
{192u, 32u, 1024u},
|
||||
{256u, 16u, 512u},
|
||||
{256u, 32u, 1024u},
|
||||
{512u, 16u, 256u},
|
||||
{512u, 32u, 512u},
|
||||
{128u, 1u, 32u},
|
||||
{160u, 1u, 32u},
|
||||
{192u, 1u, 32u},
|
||||
{256u, 1u, 32u},
|
||||
{512u, 1u, 16u},
|
||||
std::array<std::array<uint32_t, 4>, 30> values = {{
|
||||
{128u, 16u, 0u, 1024u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
|
||||
{128u, 16u, 1u, 1024u},
|
||||
{128u, 32u, 1u, 1024u},
|
||||
{128u, 32u, 0u, 2048u},
|
||||
{160u, 16u, 0u, 768u},
|
||||
{160u, 16u, 1u, 768u},
|
||||
{160u, 32u, 1u, 1024u},
|
||||
{160u, 32u, 0u, 1536u},
|
||||
{192u, 16u, 0u, 640u},
|
||||
{192u, 16u, 1u, 640u},
|
||||
{192u, 32u, 1u, 1024u},
|
||||
{192u, 32u, 0u, 1280u},
|
||||
{256u, 16u, 0u, 512u},
|
||||
{256u, 16u, 1u, 512u},
|
||||
{256u, 32u, 1u, 1024u},
|
||||
{256u, 32u, 0u, 1024u},
|
||||
{512u, 16u, 0u, 256u},
|
||||
{512u, 16u, 1u, 256u},
|
||||
{512u, 32u, 1u, 512u},
|
||||
{512u, 32u, 0u, 512u},
|
||||
{128u, 1u, 1u, 32u},
|
||||
{128u, 1u, 0u, 64u},
|
||||
{160u, 1u, 1u, 32u},
|
||||
{160u, 1u, 0u, 48u},
|
||||
{192u, 1u, 1u, 32u},
|
||||
{192u, 1u, 0u, 40u},
|
||||
{256u, 1u, 1u, 32u},
|
||||
{256u, 1u, 0u, 32u},
|
||||
{512u, 1u, 1u, 16u},
|
||||
{512u, 1u, 0u, 16u},
|
||||
}};
|
||||
|
||||
for (auto &[grfSize, simtSize, expectedNumThreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, defaultMaxWorkGroupSize, rootDeviceEnvironment));
|
||||
for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, isHwLocalIdGeneration, defaultMaxWorkGroupSize, rootDeviceEnvironment));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -786,26 +801,41 @@ XE3_CORETEST_F(GfxCoreHelperTestsXe3Core, givenParamsWhenCalculateNumThreadsPerT
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
auto totalWgSize = 2048u;
|
||||
std::array<std::array<uint32_t, 3>, 15> values = {{
|
||||
{128u, 16u, 64u}, // Grf Size, SIMT Size, Max Num of threads
|
||||
{128u, 32u, 32u},
|
||||
{128u, 1u, 32u},
|
||||
{160u, 16u, 48u},
|
||||
{160u, 32u, 32u},
|
||||
{160u, 1u, 32u},
|
||||
{192u, 16u, 40u},
|
||||
{192u, 32u, 32u},
|
||||
{192u, 1u, 32u},
|
||||
{256u, 16u, 32u},
|
||||
{256u, 32u, 32u},
|
||||
{256u, 1u, 32u},
|
||||
{512u, 16u, 16u},
|
||||
{512u, 32u, 16u},
|
||||
{512u, 1u, 16u},
|
||||
std::array<std::array<uint32_t, 4>, 30> values = {{
|
||||
{128u, 16u, 0u, 64u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
|
||||
{128u, 16u, 1u, 64u},
|
||||
{128u, 32u, 1u, 32u},
|
||||
{128u, 32u, 0u, 64u},
|
||||
{128u, 1u, 1u, 32u},
|
||||
{128u, 1u, 0u, 64u},
|
||||
{160u, 16u, 0u, 48u},
|
||||
{160u, 16u, 1u, 48u},
|
||||
{160u, 32u, 1u, 32u},
|
||||
{160u, 32u, 0u, 48u},
|
||||
{160u, 1u, 1u, 32u},
|
||||
{160u, 1u, 0u, 48u},
|
||||
{192u, 16u, 0u, 40u},
|
||||
{192u, 16u, 1u, 40u},
|
||||
{192u, 32u, 1u, 32u},
|
||||
{192u, 32u, 0u, 40u},
|
||||
{192u, 1u, 1u, 32u},
|
||||
{192u, 1u, 0u, 40u},
|
||||
{256u, 16u, 0u, 32u},
|
||||
{256u, 16u, 1u, 32u},
|
||||
{256u, 32u, 1u, 32u},
|
||||
{256u, 32u, 0u, 32u},
|
||||
{256u, 1u, 1u, 32u},
|
||||
{256u, 1u, 0u, 32u},
|
||||
{512u, 16u, 0u, 16u},
|
||||
{512u, 16u, 1u, 16u},
|
||||
{512u, 32u, 1u, 16u},
|
||||
{512u, 32u, 0u, 16u},
|
||||
{512u, 1u, 1u, 16u},
|
||||
{512u, 1u, 0u, 16u},
|
||||
}};
|
||||
|
||||
for (auto &[grfSize, simtSize, expectedNumThreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, rootDeviceEnvironment));
|
||||
for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
|
||||
EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, isHwLocalIdGeneration, rootDeviceEnvironment));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user