mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
improve lws algorithm
Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
c2dbaee128
commit
128c994821
@ -423,7 +423,8 @@ Vec3<size_t> computeWorkgroupSize(const DispatchInfo &dispatchInfo) {
|
||||
setSpecialWorkgroupSize(workGroupSize);
|
||||
} else if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
|
||||
WorkSizeInfo wsInfo(dispatchInfo);
|
||||
if (wsInfo.slmTotalSize == 0 && !wsInfo.hasBarriers && !wsInfo.imgUsed && hwHelper.preferSmallWorkgroupSizeForKernel(kernel->getKernelInfo().heapInfo.KernelUnpaddedSize)) {
|
||||
if (wsInfo.slmTotalSize == 0 && !wsInfo.hasBarriers && !wsInfo.imgUsed && hwHelper.preferSmallWorkgroupSizeForKernel(kernel->getKernelInfo().heapInfo.KernelUnpaddedSize) &&
|
||||
((dispatchInfo.getDim() == 1) && (dispatchInfo.getGWS().x % wsInfo.simdSize * 2 == 0))) {
|
||||
wsInfo.maxWorkGroupSize = wsInfo.simdSize * 2;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user