improve lws algorithm

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2021-05-25 14:42:50 +00:00
committed by Compute-Runtime-Automation
parent c2dbaee128
commit 128c994821

View File

@ -423,7 +423,8 @@ Vec3<size_t> computeWorkgroupSize(const DispatchInfo &dispatchInfo) {
setSpecialWorkgroupSize(workGroupSize);
} else if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
WorkSizeInfo wsInfo(dispatchInfo);
if (wsInfo.slmTotalSize == 0 && !wsInfo.hasBarriers && !wsInfo.imgUsed && hwHelper.preferSmallWorkgroupSizeForKernel(kernel->getKernelInfo().heapInfo.KernelUnpaddedSize)) {
if (wsInfo.slmTotalSize == 0 && !wsInfo.hasBarriers && !wsInfo.imgUsed && hwHelper.preferSmallWorkgroupSizeForKernel(kernel->getKernelInfo().heapInfo.KernelUnpaddedSize) &&
((dispatchInfo.getDim() == 1) && (dispatchInfo.getGWS().x % wsInfo.simdSize * 2 == 0))) {
wsInfo.maxWorkGroupSize = wsInfo.simdSize * 2;
}