fix: add missing checks to return proper OpenCL error from API
changes affect clEnqueueNDCountKernelINTEL and clGetKernelMaxConcurrentWorkGroupCountINTEL Related-To: NEO-9688 Signed-off-by: Aleksander Czerwionka <aleksander.czerwionka@intel.com>
This commit is contained in:
parent
8a60257aac
commit
4667f55ea2
|
@ -6066,6 +6066,12 @@ cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL(cl_command_queue
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < workDim; i++) {
|
||||||
|
if (localWorkSize[i] == 0) {
|
||||||
|
return CL_INVALID_WORK_GROUP_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
withCastToInternal(commandQueue, &pCommandQueue);
|
withCastToInternal(commandQueue, &pCommandQueue);
|
||||||
*suggestedWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue);
|
*suggestedWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue);
|
||||||
|
|
||||||
|
@ -6112,6 +6118,9 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
|
||||||
pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
|
pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
|
||||||
size_t globalWorkSize[3];
|
size_t globalWorkSize[3];
|
||||||
for (size_t i = 0; i < workDim; i++) {
|
for (size_t i = 0; i < workDim; i++) {
|
||||||
|
if (localWorkSize[i] == 0) {
|
||||||
|
return CL_INVALID_WORK_GROUP_SIZE;
|
||||||
|
}
|
||||||
globalWorkSize[i] = workgroupCount[i] * localWorkSize[i];
|
globalWorkSize[i] = workgroupCount[i] * localWorkSize[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,10 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenInvalidInputWhenCalling
|
||||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim,
|
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim,
|
||||||
globalWorkOffset, nullptr, &suggestedWorkGroupCount);
|
globalWorkOffset, nullptr, &suggestedWorkGroupCount);
|
||||||
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal);
|
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal);
|
||||||
|
|
||||||
|
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim,
|
||||||
|
globalWorkOffset, localWorkSize, &suggestedWorkGroupCount);
|
||||||
|
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGettingMaxConcurrentWorkGroupCountThenCorrectValuesAreReturned) {
|
TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGettingMaxConcurrentWorkGroupCountThenCorrectValuesAreReturned) {
|
||||||
|
|
|
@ -391,6 +391,20 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDCountKernel
|
||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(EnqueueKernelTest, givenLocalWorkSizeEqualZeroThenClEnqueueNDCountKernelINTELReturnsError) {
|
||||||
|
size_t workgroupCount[3] = {1, 1, 1};
|
||||||
|
size_t localWorkSize[3] = {0, 1, 1};
|
||||||
|
cl_int retVal = CL_SUCCESS;
|
||||||
|
std::unique_ptr<MultiDeviceKernel> pMultiDeviceKernel(MultiDeviceKernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), retVal));
|
||||||
|
|
||||||
|
retVal = clEnqueueNDCountKernelINTEL(pCmdQ, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal);
|
||||||
|
|
||||||
|
pMultiDeviceKernel.get()->setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL);
|
||||||
|
retVal = clEnqueueNDCountKernelINTEL(pCmdQ, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalledTwiceThenClEnqueueNDCountKernelINTELReturnsError) {
|
TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalledTwiceThenClEnqueueNDCountKernelINTELReturnsError) {
|
||||||
const size_t n = 512;
|
const size_t n = 512;
|
||||||
size_t workgroupCount[3] = {2, 1, 1};
|
size_t workgroupCount[3] = {2, 1, 1};
|
||||||
|
|
|
@ -45,7 +45,7 @@ uint32_t KernelHelper::getMaxWorkGroupCount(const RootDeviceEnvironment &rootDev
|
||||||
for (uint32_t i = 1; i < workDim; i++) {
|
for (uint32_t i = 1; i < workDim; i++) {
|
||||||
workGroupSize *= localWorkSize[i];
|
workGroupSize *= localWorkSize[i];
|
||||||
}
|
}
|
||||||
|
UNRECOVERABLE_IF(workGroupSize == 0);
|
||||||
auto numThreadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, kernelDescriptor.kernelAttributes.simdSize));
|
auto numThreadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, kernelDescriptor.kernelAttributes.simdSize));
|
||||||
auto maxWorkGroupsCount = availableThreadCount / numThreadsPerThreadGroup;
|
auto maxWorkGroupsCount = availableThreadCount / numThreadsPerThreadGroup;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue