fix: correct limitation for num threads per thread group

taking into account the max work group limit

Resolves: NEO-14922
Related-To: NEO-11881
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2025-05-20 14:41:49 +00:00
committed by Compute-Runtime-Automation
parent 5f80490385
commit 6ad4ad41b1
26 changed files with 171 additions and 228 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -42,7 +42,7 @@ size_t LocalIdsCache::getLocalIdsSizeForGroup(const Vec3<uint16_t> &group, const
return static_cast<size_t>(numElementsInGroup * localIdsSizePerThread);
}
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<NEO::GfxCoreHelper>();
const auto numberOfThreads = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simdSize, numElementsInGroup, grfCount, false, rootDeviceEnvironment);
const auto numberOfThreads = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simdSize, numElementsInGroup, grfCount, rootDeviceEnvironment);
return static_cast<size_t>(numberOfThreads * localIdsSizePerThread);
}