mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-20 13:11:34 +08:00
Store device specific kernel members per root device
Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
8d2cfd87ae
commit
aa1fc85257
@ -132,7 +132,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
||||
",", globalWorkSizeIn[2],
|
||||
",SIMD:, ", kernelInfo.getMaxSimdSize());
|
||||
|
||||
if (totalWorkItems > kernel.maxKernelWorkGroupSize) {
|
||||
if (totalWorkItems > kernel.getMaxKernelWorkGroupSize(rootDeviceIndex)) {
|
||||
return CL_INVALID_WORK_GROUP_SIZE;
|
||||
}
|
||||
|
||||
|
@ -96,31 +96,13 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
DEBUG_BREAK_IF(simd != PARALLEL_SCHEDULER_COMPILATION_SIZE_20);
|
||||
|
||||
// Patch our kernel constants
|
||||
*scheduler.globalWorkOffsetX = 0;
|
||||
*scheduler.globalWorkOffsetY = 0;
|
||||
*scheduler.globalWorkOffsetZ = 0;
|
||||
|
||||
*scheduler.globalWorkSizeX = (uint32_t)scheduler.getGws();
|
||||
*scheduler.globalWorkSizeY = 1;
|
||||
*scheduler.globalWorkSizeZ = 1;
|
||||
|
||||
*scheduler.localWorkSizeX = (uint32_t)scheduler.getLws();
|
||||
*scheduler.localWorkSizeY = 1;
|
||||
*scheduler.localWorkSizeZ = 1;
|
||||
|
||||
*scheduler.localWorkSizeX2 = (uint32_t)scheduler.getLws();
|
||||
*scheduler.localWorkSizeY2 = 1;
|
||||
*scheduler.localWorkSizeZ2 = 1;
|
||||
|
||||
*scheduler.enqueuedLocalWorkSizeX = (uint32_t)scheduler.getLws();
|
||||
*scheduler.enqueuedLocalWorkSizeY = 1;
|
||||
*scheduler.enqueuedLocalWorkSizeZ = 1;
|
||||
|
||||
*scheduler.numWorkGroupsX = (uint32_t)(scheduler.getGws() / scheduler.getLws());
|
||||
*scheduler.numWorkGroupsY = 0;
|
||||
*scheduler.numWorkGroupsZ = 0;
|
||||
|
||||
*scheduler.workDim = 1;
|
||||
scheduler.setGlobalWorkOffsetValues(rootDeviceIndex, 0, 0, 0);
|
||||
scheduler.setGlobalWorkSizeValues(rootDeviceIndex, static_cast<uint32_t>(scheduler.getGws()), 1, 1);
|
||||
scheduler.setLocalWorkSizeValues(rootDeviceIndex, static_cast<uint32_t>(scheduler.getLws()), 1, 1);
|
||||
scheduler.setLocalWorkSize2Values(rootDeviceIndex, static_cast<uint32_t>(scheduler.getLws()), 1, 1);
|
||||
scheduler.setEnqueuedLocalWorkSizeValues(rootDeviceIndex, static_cast<uint32_t>(scheduler.getLws()), 1, 1);
|
||||
scheduler.setNumWorkGroupsValues(rootDeviceIndex, static_cast<uint32_t>(scheduler.getGws() / scheduler.getLws()), 0, 0);
|
||||
scheduler.setWorkDim(rootDeviceIndex, 1);
|
||||
|
||||
// Send our indirect object data
|
||||
size_t localWorkSizes[3] = {scheduler.getLws(), 1, 1};
|
||||
|
@ -196,36 +196,23 @@ void HardwareInterface<GfxFamily>::dispatchKernelCommands(CommandQueue &commandQ
|
||||
|
||||
size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z};
|
||||
|
||||
auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex();
|
||||
// Patch our kernel constants
|
||||
*kernel.globalWorkOffsetX = static_cast<uint32_t>(offset.x);
|
||||
*kernel.globalWorkOffsetY = static_cast<uint32_t>(offset.y);
|
||||
*kernel.globalWorkOffsetZ = static_cast<uint32_t>(offset.z);
|
||||
kernel.setGlobalWorkOffsetValues(rootDeviceIndex, static_cast<uint32_t>(offset.x), static_cast<uint32_t>(offset.y), static_cast<uint32_t>(offset.z));
|
||||
kernel.setGlobalWorkSizeValues(rootDeviceIndex, static_cast<uint32_t>(gws.x), static_cast<uint32_t>(gws.y), static_cast<uint32_t>(gws.z));
|
||||
|
||||
*kernel.globalWorkSizeX = static_cast<uint32_t>(gws.x);
|
||||
*kernel.globalWorkSizeY = static_cast<uint32_t>(gws.y);
|
||||
*kernel.globalWorkSizeZ = static_cast<uint32_t>(gws.z);
|
||||
|
||||
if (isMainKernel || (kernel.localWorkSizeX2 == &Kernel::dummyPatchLocation)) {
|
||||
*kernel.localWorkSizeX = static_cast<uint32_t>(lws.x);
|
||||
*kernel.localWorkSizeY = static_cast<uint32_t>(lws.y);
|
||||
*kernel.localWorkSizeZ = static_cast<uint32_t>(lws.z);
|
||||
if (isMainKernel || (!kernel.isLocalWorkSize2Patched(rootDeviceIndex))) {
|
||||
kernel.setLocalWorkSizeValues(rootDeviceIndex, static_cast<uint32_t>(lws.x), static_cast<uint32_t>(lws.y), static_cast<uint32_t>(lws.z));
|
||||
}
|
||||
|
||||
*kernel.localWorkSizeX2 = static_cast<uint32_t>(lws.x);
|
||||
*kernel.localWorkSizeY2 = static_cast<uint32_t>(lws.y);
|
||||
*kernel.localWorkSizeZ2 = static_cast<uint32_t>(lws.z);
|
||||
|
||||
*kernel.enqueuedLocalWorkSizeX = static_cast<uint32_t>(elws.x);
|
||||
*kernel.enqueuedLocalWorkSizeY = static_cast<uint32_t>(elws.y);
|
||||
*kernel.enqueuedLocalWorkSizeZ = static_cast<uint32_t>(elws.z);
|
||||
kernel.setLocalWorkSize2Values(rootDeviceIndex, static_cast<uint32_t>(lws.x), static_cast<uint32_t>(lws.y), static_cast<uint32_t>(lws.z));
|
||||
kernel.setEnqueuedLocalWorkSizeValues(rootDeviceIndex, static_cast<uint32_t>(elws.x), static_cast<uint32_t>(elws.y), static_cast<uint32_t>(elws.z));
|
||||
|
||||
if (isMainKernel) {
|
||||
*kernel.numWorkGroupsX = static_cast<uint32_t>(totalNumberOfWorkgroups.x);
|
||||
*kernel.numWorkGroupsY = static_cast<uint32_t>(totalNumberOfWorkgroups.y);
|
||||
*kernel.numWorkGroupsZ = static_cast<uint32_t>(totalNumberOfWorkgroups.z);
|
||||
kernel.setNumWorkGroupsValues(rootDeviceIndex, static_cast<uint32_t>(totalNumberOfWorkgroups.x), static_cast<uint32_t>(totalNumberOfWorkgroups.y), static_cast<uint32_t>(totalNumberOfWorkgroups.z));
|
||||
}
|
||||
|
||||
*kernel.workDim = dim;
|
||||
kernel.setWorkDim(rootDeviceIndex, dim);
|
||||
|
||||
// Send our indirect object data
|
||||
size_t localWorkSizes[3] = {lws.x, lws.y, lws.z};
|
||||
|
@ -427,7 +427,7 @@ Vec3<size_t> computeWorkgroupSize(const DispatchInfo &dispatchInfo) {
|
||||
size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z};
|
||||
computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim());
|
||||
} else {
|
||||
auto maxWorkGroupSize = kernel->maxKernelWorkGroupSize;
|
||||
auto maxWorkGroupSize = kernel->getMaxKernelWorkGroupSize(rootDeviceIndex);
|
||||
auto simd = kernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize();
|
||||
size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z};
|
||||
if (dispatchInfo.getDim() == 1) {
|
||||
|
Reference in New Issue
Block a user