mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 22:43:00 +08:00
Revert "fix: correct limitation for num threads per thread group"
This reverts commit 6ad4ad41b1.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d6849a5605
commit
593c9e76f2
@@ -255,8 +255,28 @@ uint32_t GfxCoreHelperHw<Family>::overrideMaxWorkGroupSize(uint32_t maxWG) const
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
const uint32_t threadsPerThreadGroup = calculateNumThreadsPerThreadGroup(simd, defaultMaxGroupSize, grfCount, rootDeviceEnvironment);
|
||||
uint32_t GfxCoreHelperHw<Family>::calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, bool isHwLocalIdGeneration, const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
uint32_t numThreadsPerThreadGroup = getThreadsPerWG(simd, totalWorkItems);
|
||||
if (debugManager.flags.RemoveRestrictionsOnNumberOfThreadsInGpgpuThreadGroup.get() == 1) {
|
||||
return numThreadsPerThreadGroup;
|
||||
}
|
||||
auto simt = isSimd1(simd) ? 32u : simd;
|
||||
uint32_t maxThreadsPerThreadGroup = 32u;
|
||||
if (grfCount == 512) {
|
||||
maxThreadsPerThreadGroup = 16u;
|
||||
} else if (grfCount == 192 && ((simt == 16u) || (simt == 32u && !isHwLocalIdGeneration))) {
|
||||
maxThreadsPerThreadGroup = 40u;
|
||||
} else if (grfCount == 160 && ((simt == 16u) || (simt == 32u && !isHwLocalIdGeneration))) {
|
||||
maxThreadsPerThreadGroup = 48u;
|
||||
} else if (grfCount <= 128 && ((simt == 16u) || (simt == 32u && !isHwLocalIdGeneration))) {
|
||||
maxThreadsPerThreadGroup = 64u;
|
||||
}
|
||||
return std::min(numThreadsPerThreadGroup, maxThreadsPerThreadGroup);
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, bool isHwLocalGeneration, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
const uint32_t threadsPerThreadGroup = calculateNumThreadsPerThreadGroup(simd, defaultMaxGroupSize, grfCount, isHwLocalGeneration, rootDeviceEnvironment);
|
||||
return (threadsPerThreadGroup * simd);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user