Revert "fix: correct limitation for num threads per thread group"

This reverts commit 6ad4ad41b1.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2025-05-23 02:11:46 +02:00
committed by Compute-Runtime-Automation
parent d6849a5605
commit 593c9e76f2
26 changed files with 223 additions and 166 deletions

View File

@@ -255,8 +255,28 @@ uint32_t GfxCoreHelperHw<Family>::overrideMaxWorkGroupSize(uint32_t maxWG) const
}
template <>
uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const {
const uint32_t threadsPerThreadGroup = calculateNumThreadsPerThreadGroup(simd, defaultMaxGroupSize, grfCount, rootDeviceEnvironment);
uint32_t GfxCoreHelperHw<Family>::calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, bool isHwLocalIdGeneration, const RootDeviceEnvironment &rootDeviceEnvironment) const {
uint32_t numThreadsPerThreadGroup = getThreadsPerWG(simd, totalWorkItems);
if (debugManager.flags.RemoveRestrictionsOnNumberOfThreadsInGpgpuThreadGroup.get() == 1) {
return numThreadsPerThreadGroup;
}
auto simt = isSimd1(simd) ? 32u : simd;
uint32_t maxThreadsPerThreadGroup = 32u;
if (grfCount == 512) {
maxThreadsPerThreadGroup = 16u;
} else if (grfCount == 192 && ((simt == 16u) || (simt == 32u && !isHwLocalIdGeneration))) {
maxThreadsPerThreadGroup = 40u;
} else if (grfCount == 160 && ((simt == 16u) || (simt == 32u && !isHwLocalIdGeneration))) {
maxThreadsPerThreadGroup = 48u;
} else if (grfCount <= 128 && ((simt == 16u) || (simt == 32u && !isHwLocalIdGeneration))) {
maxThreadsPerThreadGroup = 64u;
}
return std::min(numThreadsPerThreadGroup, maxThreadsPerThreadGroup);
}
template <>
uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, bool isHwLocalGeneration, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const {
const uint32_t threadsPerThreadGroup = calculateNumThreadsPerThreadGroup(simd, defaultMaxGroupSize, grfCount, isHwLocalGeneration, rootDeviceEnvironment);
return (threadsPerThreadGroup * simd);
}
} // namespace NEO