fix: correct limitation for num threads per thread group

taking into account the max work group limit

Resolves: NEO-14922
Related-To: NEO-11881
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2025-05-20 14:41:49 +00:00
committed by Compute-Runtime-Automation
parent 5f80490385
commit 6ad4ad41b1
26 changed files with 171 additions and 228 deletions

View File

@@ -73,7 +73,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const
}
auto itemsInGroup = Math::computeTotalElementsCount(localWorkSize);
localIdsSize = static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(simdSize, grfSize, grfCount, 3u, itemsInGroup, isHwLocalIdGeneration, rootDeviceEnvironment));
localIdsSize = static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(simdSize, grfSize, grfCount, 3u, itemsInGroup, rootDeviceEnvironment));
localIdsSize = alignUp(localIdsSize, MemoryConstants::cacheLineSize);
}
return implicitArgsStructSize + localIdsSize;