Add clGetKernelMaxConcurrentWorkGroupCountINTEL

clGetKernelMaxConcurrentWorkGroupCountINTEL replaces clGetExecutionInfoINTEL
function.

Change-Id: I7e3461695de7ee4c0e43c3e9770724b025c0e2be
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2020-01-22 12:47:58 +01:00
committed by sys_ocldev
parent 2c568542f1
commit 9c16c1a425
10 changed files with 123 additions and 113 deletions

View File

@@ -3977,8 +3977,8 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) {
RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemAdviseINTEL);
RETURN_FUNC_PTR_IF_EXIST(clGetDeviceFunctionPointerINTEL);
RETURN_FUNC_PTR_IF_EXIST(clGetDeviceGlobalVariablePointerINTEL);
RETURN_FUNC_PTR_IF_EXIST(clGetKernelMaxConcurrentWorkGroupCountINTEL);
RETURN_FUNC_PTR_IF_EXIST(clGetKernelSuggestedLocalWorkSizeINTEL);
RETURN_FUNC_PTR_IF_EXIST(clGetExecutionInfoINTEL);
RETURN_FUNC_PTR_IF_EXIST(clEnqueueNDCountKernelINTEL);
void *ret = sharingFactory.getExtensionFunctionAddress(funcName);
@@ -5252,15 +5252,12 @@ cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(cl_command_queue comma
return retVal;
}
cl_int CL_API_CALL clGetExecutionInfoINTEL(cl_command_queue commandQueue,
cl_kernel kernel,
cl_uint workDim,
const size_t *globalWorkOffset,
const size_t *localWorkSize,
cl_execution_info_intel paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet) {
cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL(cl_command_queue commandQueue,
cl_kernel kernel,
cl_uint workDim,
const size_t *globalWorkOffset,
const size_t *localWorkSize,
size_t *suggestedWorkGroupCount) {
cl_int retVal = CL_SUCCESS;
API_ENTER(&retVal);
@@ -5269,8 +5266,7 @@ cl_int CL_API_CALL clGetExecutionInfoINTEL(cl_command_queue commandQueue,
"globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1),
"globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2),
"localWorkSize", NEO::FileLoggerInstance().getSizes(localWorkSize, workDim, true),
"paramName", paramName, "paramValueSize", paramValueSize,
"paramValue", paramValue, "paramValueSizeRet", paramValueSizeRet);
"suggestedWorkGroupCount", suggestedWorkGroupCount);
retVal = validateObjects(commandQueue, kernel);
@@ -5278,28 +5274,34 @@ cl_int CL_API_CALL clGetExecutionInfoINTEL(cl_command_queue commandQueue,
return retVal;
}
if ((workDim == 0) || (workDim > 3)) {
retVal = CL_INVALID_WORK_DIMENSION;
return retVal;
}
if (globalWorkOffset == nullptr) {
retVal = CL_INVALID_GLOBAL_OFFSET;
return retVal;
}
if (localWorkSize == nullptr) {
retVal = CL_INVALID_WORK_GROUP_SIZE;
return retVal;
}
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
if (!pKernel->isPatched()) {
retVal = CL_INVALID_KERNEL;
return retVal;
}
TakeOwnershipWrapper<Kernel> kernelOwnership(*pKernel, gtpinIsGTPinInitialized());
switch (paramName) {
case CL_EXECUTION_INFO_MAX_WORKGROUP_COUNT_INTEL:
if ((paramValueSize < sizeof(uint32_t)) || (paramValue == nullptr)) {
retVal = CL_INVALID_VALUE;
return retVal;
}
*reinterpret_cast<uint32_t *>(paramValue) = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
if (paramValueSizeRet != nullptr) {
*paramValueSizeRet = sizeof(uint32_t);
}
break;
default:
if (suggestedWorkGroupCount == nullptr) {
retVal = CL_INVALID_VALUE;
return retVal;
}
*suggestedWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize);
return retVal;
}

View File

@@ -1034,16 +1034,13 @@ cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(
const size_t *globalWorkSize,
size_t *suggestedLocalWorkSize);
cl_int CL_API_CALL clGetExecutionInfoINTEL(
cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL(
cl_command_queue commandQueue,
cl_kernel kernel,
cl_uint workDim,
const size_t *globalWorkOffset,
const size_t *localWorkSize,
cl_execution_info_intel paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet);
size_t *suggestedWorkGroupCount);
cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(
cl_command_queue commandQueue,