Add clEnqueueNDRangeKernelINTEL API

Related-To: NEO-2712

Change-Id: If1d16d9d626871a9dc4b19282f9edc5786ffa398
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2019-11-19 16:54:47 +01:00
committed by sys_ocldev
parent 7be937c226
commit 82bc594af0
26 changed files with 764 additions and 14 deletions

View File

@@ -3135,10 +3135,11 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
"event", DebugManager.getEvents(reinterpret_cast<const uintptr_t *>(event), 1));
CommandQueue *pCommandQueue = nullptr;
Kernel *pKernel = nullptr;
retVal = validateObjects(
WithCastToInternal(commandQueue, &pCommandQueue),
kernel,
WithCastToInternal(kernel, &pKernel),
EventWaitList(numEventsInWaitList, eventWaitList));
if (CL_SUCCESS != retVal) {
@@ -3146,7 +3147,12 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
return retVal;
}
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
if (pKernel->getKernelInfo().patchInfo.pAllocateSyncBuffer != nullptr) {
retVal = CL_INVALID_KERNEL;
TRACING_EXIT(clEnqueueNDRangeKernel, &retVal);
return retVal;
}
TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized());
if (gtpinIsGTPinInitialized()) {
gtpinNotifyKernelSubmit(kernel, pCommandQueue);
@@ -3947,6 +3953,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) {
RETURN_FUNC_PTR_IF_EXIST(clGetDeviceFunctionPointerINTEL);
RETURN_FUNC_PTR_IF_EXIST(clGetDeviceGlobalVariablePointerINTEL);
RETURN_FUNC_PTR_IF_EXIST(clGetExecutionInfoINTEL);
RETURN_FUNC_PTR_IF_EXIST(clEnqueueNDRangeKernelINTEL);
void *ret = sharingFactory.getExtensionFunctionAddress(funcName);
if (ret != nullptr) {
@@ -5197,3 +5204,70 @@ cl_int CL_API_CALL clGetExecutionInfoINTEL(cl_command_queue commandQueue,
return retVal;
}
cl_int CL_API_CALL clEnqueueNDRangeKernelINTEL(cl_command_queue commandQueue,
cl_kernel kernel,
cl_uint workDim,
const size_t *globalWorkOffset,
const size_t *workgroupCount,
const size_t *localWorkSize,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
cl_int retVal = CL_SUCCESS;
API_ENTER(&retVal);
DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel,
"globalWorkOffset[0]", DebugManager.getInput(globalWorkOffset, 0),
"globalWorkOffset[1]", DebugManager.getInput(globalWorkOffset, 1),
"globalWorkOffset[2]", DebugManager.getInput(globalWorkOffset, 2),
"workgroupCount", DebugManager.getSizes(workgroupCount, workDim, false),
"localWorkSize", DebugManager.getSizes(localWorkSize, workDim, true),
"numEventsInWaitList", numEventsInWaitList,
"eventWaitList", DebugManager.getEvents(reinterpret_cast<const uintptr_t *>(eventWaitList), numEventsInWaitList),
"event", DebugManager.getEvents(reinterpret_cast<const uintptr_t *>(event), 1));
CommandQueue *pCommandQueue = nullptr;
Kernel *pKernel = nullptr;
retVal = validateObjects(
WithCastToInternal(commandQueue, &pCommandQueue),
WithCastToInternal(kernel, &pKernel),
EventWaitList(numEventsInWaitList, eventWaitList));
if (CL_SUCCESS != retVal) {
return retVal;
}
size_t globalWorkSize[3];
size_t requestedNumberOfWorkgroups = 1;
for (size_t i = 0; i < workDim; i++) {
globalWorkSize[i] = workgroupCount[i] * localWorkSize[i];
requestedNumberOfWorkgroups *= workgroupCount[i];
}
size_t maximalNumberOfWorkgroupsAllowed = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
if (requestedNumberOfWorkgroups > maximalNumberOfWorkgroupsAllowed) {
retVal = CL_INVALID_VALUE;
return retVal;
}
TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized());
if (gtpinIsGTPinInitialized()) {
gtpinNotifyKernelSubmit(kernel, pCommandQueue);
}
pCommandQueue->getDevice().allocateSyncBufferHandler();
retVal = pCommandQueue->enqueueKernel(
kernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
DBG_LOG_INPUTS("event", DebugManager.getEvents(reinterpret_cast<const uintptr_t *>(event), 1u));
return retVal;
}