Refactor setting max workgroup size for Kernel

Change-Id: I2a489d60a3ec9ee363c10e3a5f12b6c7ba4e8dd8
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
Related-To: NEO-2946
This commit is contained in:
Dunajski, Bartosz
2019-09-13 14:09:49 +02:00
committed by sys_ocldev
parent f7f420b066
commit ca94628058
3 changed files with 14 additions and 10 deletions

View File

@@ -72,7 +72,7 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const Devic
numWorkGroupsX(&Kernel::dummyPatchLocation),
numWorkGroupsY(&Kernel::dummyPatchLocation),
numWorkGroupsZ(&Kernel::dummyPatchLocation),
maxWorkGroupSize(&Kernel::dummyPatchLocation),
maxWorkGroupSizeForCrossThreadData(&Kernel::dummyPatchLocation),
workDim(&Kernel::dummyPatchLocation),
dataParameterSimdSize(&Kernel::dummyPatchLocation),
parentEventOffset(&Kernel::dummyPatchLocation),
@@ -96,6 +96,8 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const Devic
usingSharedObjArgs(false) {
program->retain();
imageTransformer.reset(new ImageTransformer);
maxKernelWorkGroupSize = static_cast<uint32_t>(device.getDeviceInfo().maxWorkGroupSize);
}
Kernel::~Kernel() {
@@ -186,6 +188,8 @@ cl_int Kernel::initialize() {
const auto &heapInfo = kernelInfo.heapInfo;
const auto &patchInfo = kernelInfo.patchInfo;
reconfigureKernel();
crossThreadDataSize = patchInfo.dataParameterStream
? patchInfo.dataParameterStream->DataParameterStreamSize
: 0;
@@ -225,13 +229,13 @@ cl_int Kernel::initialize() {
numWorkGroupsY = workloadInfo.numWorkGroupsOffset[1] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[1]) : numWorkGroupsY;
numWorkGroupsZ = workloadInfo.numWorkGroupsOffset[2] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[2]) : numWorkGroupsZ;
maxWorkGroupSize = workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.maxWorkGroupSizeOffset) : maxWorkGroupSize;
maxWorkGroupSizeForCrossThreadData = workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.maxWorkGroupSizeOffset) : maxWorkGroupSizeForCrossThreadData;
workDim = workloadInfo.workDimOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.workDimOffset) : workDim;
dataParameterSimdSize = workloadInfo.simdSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.simdSizeOffset) : dataParameterSimdSize;
parentEventOffset = workloadInfo.parentEventOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.parentEventOffset) : parentEventOffset;
preferredWkgMultipleOffset = workloadInfo.preferredWkgMultipleOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.preferredWkgMultipleOffset) : preferredWkgMultipleOffset;
*maxWorkGroupSize = static_cast<uint32_t>(device.getDeviceInfo().maxWorkGroupSize);
*maxWorkGroupSizeForCrossThreadData = maxKernelWorkGroupSize;
*dataParameterSimdSize = getKernelInfo().getMaxSimdSize();
*preferredWkgMultipleOffset = getKernelInfo().getMaxSimdSize();
*parentEventOffset = WorkloadInfo::invalidParentEvent;
@@ -370,8 +374,6 @@ cl_int Kernel::initialize() {
program->allocateBlockPrivateSurfaces();
}
reconfigureKernel();
retVal = CL_SUCCESS;
} while (false);