mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
Refactor setting max workgroup size for Kernel
Change-Id: I2a489d60a3ec9ee363c10e3a5f12b6c7ba4e8dd8 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com> Related-To: NEO-2946
This commit is contained in:
committed by
sys_ocldev
parent
f7f420b066
commit
ca94628058
@@ -72,7 +72,7 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const Devic
|
||||
numWorkGroupsX(&Kernel::dummyPatchLocation),
|
||||
numWorkGroupsY(&Kernel::dummyPatchLocation),
|
||||
numWorkGroupsZ(&Kernel::dummyPatchLocation),
|
||||
maxWorkGroupSize(&Kernel::dummyPatchLocation),
|
||||
maxWorkGroupSizeForCrossThreadData(&Kernel::dummyPatchLocation),
|
||||
workDim(&Kernel::dummyPatchLocation),
|
||||
dataParameterSimdSize(&Kernel::dummyPatchLocation),
|
||||
parentEventOffset(&Kernel::dummyPatchLocation),
|
||||
@@ -96,6 +96,8 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const Devic
|
||||
usingSharedObjArgs(false) {
|
||||
program->retain();
|
||||
imageTransformer.reset(new ImageTransformer);
|
||||
|
||||
maxKernelWorkGroupSize = static_cast<uint32_t>(device.getDeviceInfo().maxWorkGroupSize);
|
||||
}
|
||||
|
||||
Kernel::~Kernel() {
|
||||
@@ -186,6 +188,8 @@ cl_int Kernel::initialize() {
|
||||
const auto &heapInfo = kernelInfo.heapInfo;
|
||||
const auto &patchInfo = kernelInfo.patchInfo;
|
||||
|
||||
reconfigureKernel();
|
||||
|
||||
crossThreadDataSize = patchInfo.dataParameterStream
|
||||
? patchInfo.dataParameterStream->DataParameterStreamSize
|
||||
: 0;
|
||||
@@ -225,13 +229,13 @@ cl_int Kernel::initialize() {
|
||||
numWorkGroupsY = workloadInfo.numWorkGroupsOffset[1] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[1]) : numWorkGroupsY;
|
||||
numWorkGroupsZ = workloadInfo.numWorkGroupsOffset[2] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[2]) : numWorkGroupsZ;
|
||||
|
||||
maxWorkGroupSize = workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.maxWorkGroupSizeOffset) : maxWorkGroupSize;
|
||||
maxWorkGroupSizeForCrossThreadData = workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.maxWorkGroupSizeOffset) : maxWorkGroupSizeForCrossThreadData;
|
||||
workDim = workloadInfo.workDimOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.workDimOffset) : workDim;
|
||||
dataParameterSimdSize = workloadInfo.simdSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.simdSizeOffset) : dataParameterSimdSize;
|
||||
parentEventOffset = workloadInfo.parentEventOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.parentEventOffset) : parentEventOffset;
|
||||
preferredWkgMultipleOffset = workloadInfo.preferredWkgMultipleOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.preferredWkgMultipleOffset) : preferredWkgMultipleOffset;
|
||||
|
||||
*maxWorkGroupSize = static_cast<uint32_t>(device.getDeviceInfo().maxWorkGroupSize);
|
||||
*maxWorkGroupSizeForCrossThreadData = maxKernelWorkGroupSize;
|
||||
*dataParameterSimdSize = getKernelInfo().getMaxSimdSize();
|
||||
*preferredWkgMultipleOffset = getKernelInfo().getMaxSimdSize();
|
||||
*parentEventOffset = WorkloadInfo::invalidParentEvent;
|
||||
@@ -370,8 +374,6 @@ cl_int Kernel::initialize() {
|
||||
program->allocateBlockPrivateSurfaces();
|
||||
}
|
||||
|
||||
reconfigureKernel();
|
||||
|
||||
retVal = CL_SUCCESS;
|
||||
|
||||
} while (false);
|
||||
|
||||
Reference in New Issue
Block a user