Respect KernelExecutionType in enqueue kernel calls

Change-Id: I9de07f9e3b77c4a44f6a0127e0ae3bd7e1ab97f8
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2020-01-24 15:06:55 +01:00
committed by sys_ocldev
parent d6f4520599
commit 07c4682668
4 changed files with 116 additions and 32 deletions

View File

@@ -3143,7 +3143,8 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
return retVal;
}
if (pKernel->getKernelInfo().patchInfo.pAllocateSyncBuffer != nullptr) {
if ((pKernel->getExecutionType() != KernelExecutionType::Default) ||
pKernel->isUsingSyncBuffer()) {
retVal = CL_INVALID_KERNEL;
TRACING_EXIT(clEnqueueNDRangeKernel, &retVal);
return retVal;
@@ -5330,16 +5331,28 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
}
size_t globalWorkSize[3];
size_t requestedNumberOfWorkgroups = 1;
for (size_t i = 0; i < workDim; i++) {
globalWorkSize[i] = workgroupCount[i] * localWorkSize[i];
requestedNumberOfWorkgroups *= workgroupCount[i];
}
size_t maximalNumberOfWorkgroupsAllowed = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
if (requestedNumberOfWorkgroups > maximalNumberOfWorkgroupsAllowed) {
retVal = CL_INVALID_VALUE;
return retVal;
if (pKernel->getExecutionType() == KernelExecutionType::Concurrent) {
size_t requestedNumberOfWorkgroups = 1;
for (size_t i = 0; i < workDim; i++) {
requestedNumberOfWorkgroups *= workgroupCount[i];
}
size_t maximalNumberOfWorkgroupsAllowed = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
if (requestedNumberOfWorkgroups > maximalNumberOfWorkgroupsAllowed) {
retVal = CL_INVALID_VALUE;
return retVal;
}
}
if (pKernel->isUsingSyncBuffer()) {
if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) {
retVal = CL_INVALID_KERNEL;
return retVal;
}
platform()->clDeviceMap[&pCommandQueue->getDevice()]->allocateSyncBufferHandler();
}
TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized());
@@ -5347,8 +5360,6 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
gtpinNotifyKernelSubmit(kernel, pCommandQueue);
}
platform()->clDeviceMap[&pCommandQueue->getDevice()]->allocateSyncBufferHandler();
retVal = pCommandQueue->enqueueKernel(
kernel,
workDim,