mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
fix: align thread group to dss size if kernel uses slm
Related-To: NEO-12133 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b17fabb120
commit
9d6d6e85f1
@@ -54,16 +54,16 @@ uint32_t KernelHelper::getMaxWorkGroupCount(const RootDeviceEnvironment &rootDev
|
||||
UNRECOVERABLE_IF(workGroupSize == 0);
|
||||
auto numThreadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, simdSize));
|
||||
auto maxWorkGroupsCount = availableThreadCount / numThreadsPerThreadGroup;
|
||||
|
||||
if (barrierCount > 0) {
|
||||
auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / barrierCount);
|
||||
if (barrierCount > 0 || usedSlmSize > 0) {
|
||||
helper.alignThreadGroupCountToDssSize(maxWorkGroupsCount, dssCount, availableThreadCount / dssCount, numThreadsPerThreadGroup);
|
||||
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToBarrierUsage);
|
||||
}
|
||||
|
||||
if (usedSlmSize > 0) {
|
||||
auto maxWorkGroupsCountDueToSlm = availableSlmSize / usedSlmSize;
|
||||
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToSlm);
|
||||
if (barrierCount > 0) {
|
||||
auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / barrierCount);
|
||||
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToBarrierUsage);
|
||||
}
|
||||
if (usedSlmSize > 0) {
|
||||
auto maxWorkGroupsCountDueToSlm = availableSlmSize / usedSlmSize;
|
||||
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToSlm);
|
||||
}
|
||||
}
|
||||
|
||||
maxWorkGroupsCount = helper.adjustMaxWorkGroupCount(maxWorkGroupsCount, engineGroupType, rootDeviceEnvironment);
|
||||
|
||||
Reference in New Issue
Block a user