mirror of
https://github.com/intel/compute-runtime.git
synced 2025-11-10 05:49:51 +08:00
Add adjustMaxWorkGroupCount helper
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8d60fb2a07
commit
de1e4e0074
@@ -1056,15 +1056,18 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
|
||||
hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
|
||||
|
||||
auto barrierCount = kernelDescriptor.kernelAttributes.barrierCount;
|
||||
return KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(),
|
||||
availableThreadCount,
|
||||
dssCount,
|
||||
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
|
||||
hwHelper.alignSlmSize(slmTotalSize),
|
||||
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
|
||||
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
||||
workDim,
|
||||
localWorkSize);
|
||||
auto maxWorkGroupCount = KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(),
|
||||
availableThreadCount,
|
||||
dssCount,
|
||||
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
|
||||
hwHelper.alignSlmSize(slmTotalSize),
|
||||
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
|
||||
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
||||
workDim,
|
||||
localWorkSize);
|
||||
auto isEngineInstanced = commandQueue->getCommandStreamReceiver(false).getOsContext().isEngineInstanced();
|
||||
maxWorkGroupCount = hwHelper.adjustMaxWorkGroupCount(maxWorkGroupCount, engineGroupType, hardwareInfo, isEngineInstanced);
|
||||
return maxWorkGroupCount;
|
||||
}
|
||||
|
||||
inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
|
||||
Reference in New Issue
Block a user