mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 17:13:29 +08:00
fix: correct limitation for num threads per thread group
taking into account the max work group limit Resolves: NEO-14922 Related-To: NEO-11881 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
5f80490385
commit
6ad4ad41b1
@@ -73,7 +73,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const
|
||||
}
|
||||
|
||||
auto itemsInGroup = Math::computeTotalElementsCount(localWorkSize);
|
||||
localIdsSize = static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(simdSize, grfSize, grfCount, 3u, itemsInGroup, isHwLocalIdGeneration, rootDeviceEnvironment));
|
||||
localIdsSize = static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(simdSize, grfSize, grfCount, 3u, itemsInGroup, rootDeviceEnvironment));
|
||||
localIdsSize = alignUp(localIdsSize, MemoryConstants::cacheLineSize);
|
||||
}
|
||||
return implicitArgsStructSize + localIdsSize;
|
||||
|
||||
Reference in New Issue
Block a user