From 4667f55ea20361631f569696ea6d16fe0ec6824b Mon Sep 17 00:00:00 2001 From: Aleksander Czerwionka Date: Fri, 26 Apr 2024 15:37:43 +0000 Subject: [PATCH] fix: add missing checks to return proper OpenCL error from API changes affect clEnqueueNDCountKernelINTEL and clGetKernelMaxConcurrentWorkGroupCountINTEL Related-To: NEO-9688 Signed-off-by: Aleksander Czerwionka --- opencl/source/api/api.cpp | 9 +++++++++ ...max_concurrent_work_group_count_intel_tests.inl | 4 ++++ .../command_queue/enqueue_kernel_1_tests.cpp | 14 ++++++++++++++ shared/source/helpers/kernel_helpers.cpp | 2 +- 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 308b3e0ad5..c051c6a334 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -6066,6 +6066,12 @@ cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL(cl_command_queue return retVal; } + for (size_t i = 0; i < workDim; i++) { + if (localWorkSize[i] == 0) { + return CL_INVALID_WORK_GROUP_SIZE; + } + } + withCastToInternal(commandQueue, &pCommandQueue); *suggestedWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue); @@ -6112,6 +6118,9 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue, pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); size_t globalWorkSize[3]; for (size_t i = 0; i < workDim; i++) { + if (localWorkSize[i] == 0) { + return CL_INVALID_WORK_GROUP_SIZE; + } globalWorkSize[i] = workgroupCount[i] * localWorkSize[i]; } diff --git a/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl index e39e6a07e2..e86cabdbd5 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl @@ -52,6 +52,10 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenInvalidInputWhenCalling retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, nullptr, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); + + retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, + globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); + EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); } TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGettingMaxConcurrentWorkGroupCountThenCorrectValuesAreReturned) { diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index 41bcaa5802..068e0084a2 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -391,6 +391,20 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDCountKernel EXPECT_EQ(CL_SUCCESS, retVal); } +TEST_F(EnqueueKernelTest, givenLocalWorkSizeEqualZeroThenClEnqueueNDCountKernelINTELReturnsError) { + size_t workgroupCount[3] = {1, 1, 1}; + size_t localWorkSize[3] = {0, 1, 1}; + cl_int retVal = CL_SUCCESS; + std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), retVal)); + + retVal = clEnqueueNDCountKernelINTEL(pCmdQ, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); + + pMultiDeviceKernel.get()->setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL); + retVal = clEnqueueNDCountKernelINTEL(pCmdQ, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); +} + TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalledTwiceThenClEnqueueNDCountKernelINTELReturnsError) { const size_t n = 512; size_t workgroupCount[3] = {2, 1, 1}; diff --git a/shared/source/helpers/kernel_helpers.cpp b/shared/source/helpers/kernel_helpers.cpp index f8afac43dd..7c45f70003 100644 --- a/shared/source/helpers/kernel_helpers.cpp +++ b/shared/source/helpers/kernel_helpers.cpp @@ -45,7 +45,7 @@ uint32_t KernelHelper::getMaxWorkGroupCount(const RootDeviceEnvironment &rootDev for (uint32_t i = 1; i < workDim; i++) { workGroupSize *= localWorkSize[i]; } - + UNRECOVERABLE_IF(workGroupSize == 0); auto numThreadsPerThreadGroup = static_cast(Math::divideAndRoundUp(workGroupSize, kernelDescriptor.kernelAttributes.simdSize)); auto maxWorkGroupsCount = availableThreadCount / numThreadsPerThreadGroup;