From 074fc1d60f1336c8010a23e228c49ae75e3c6b8e Mon Sep 17 00:00:00 2001 From: Konstanty Misiak Date: Mon, 10 May 2021 16:46:41 +0000 Subject: [PATCH] Fix clGetKernelSuggestedLocalWorkSizeINTEL Related-To: NEO-5456 Signed-off-by: Konstanty Misiak --- opencl/source/api/api.cpp | 5 +++- opencl/source/kernel/kernel.cpp | 17 ++++++++--- ..._suggested_local_work_size_intel_tests.inl | 28 ++++++++++++++++++- 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 668e80700d..e315761da9 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -5796,7 +5796,10 @@ cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(cl_command_queue comma return retVal; } - if (globalWorkSize == nullptr) { + if (globalWorkSize == nullptr || + globalWorkSize[0] == 0 || + (workDim > 1 && globalWorkSize[1] == 0) || + (workDim > 2 && globalWorkSize[2] == 0)) { retVal = CL_INVALID_GLOBAL_WORK_SIZE; return retVal; } diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index fc9a12c2f4..3c3059d70b 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1011,8 +1011,8 @@ void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *glob Vec3 elws{0, 0, 0}; Vec3 gws{ globalWorkSize[0], - (workDim > 1) ? globalWorkSize[1] : 0, - (workDim > 2) ? globalWorkSize[2] : 0}; + (workDim > 1) ? globalWorkSize[1] : 1, + (workDim > 2) ? globalWorkSize[2] : 1}; Vec3 offset{0, 0, 0}; if (globalWorkOffset) { offset.x = globalWorkOffset[0]; @@ -1024,8 +1024,17 @@ void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *glob } } - const DispatchInfo dispatchInfo{&clDevice, this, workDim, gws, elws, offset}; - auto suggestedLws = computeWorkgroupSize(dispatchInfo); + Vec3 suggestedLws{0, 0, 0}; + + if (kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] != 0) { + suggestedLws.x = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; + suggestedLws.y = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]; + suggestedLws.z = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]; + } else { + uint32_t dispatchWorkDim = std::max(1U, std::max(gws.getSimplifiedDim(), offset.getSimplifiedDim())); + const DispatchInfo dispatchInfo{&clDevice, this, dispatchWorkDim, gws, elws, offset}; + suggestedLws = computeWorkgroupSize(dispatchInfo); + } localWorkSize[0] = suggestedLws.x; if (workDim > 1) diff --git a/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl index de0b4f4ffa..a5e3646b1b 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl @@ -19,7 +19,7 @@ namespace ULT { TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenInvalidInputWhenCallingGetKernelSuggestedLocalWorkSizeThenErrorIsReturned) { size_t globalWorkOffset[3]; - size_t globalWorkSize[3]; + size_t globalWorkSize[3] = {1, 1, 1}; size_t suggestedLocalWorkSize[3]; cl_uint workDim = 1; @@ -52,6 +52,14 @@ TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenInvalidInputWhenCallingGetKe retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, nullptr, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal); + + for (size_t i = 0; i < 3; ++i) { + globalWorkSize[i] = 0; + retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, 3, + globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); + EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal); + globalWorkSize[i] = 1; + } } TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) { @@ -96,6 +104,24 @@ TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSugge EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]); } +TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithReqdWorkGroupSizeWhenGettingSuggestedLocalWorkSizeThenRequiredWorkSizeIsReturned) { + size_t globalWorkOffset[] = {0, 0, 0}; + size_t globalWorkSize[] = {128, 128, 128}; + size_t suggestedLocalWorkSize[] = {0, 0, 0}; + uint16_t regdLocalWorkSize[] = {32, 32, 32}; + + MockKernelWithInternals mockKernel(*pDevice); + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = regdLocalWorkSize[0]; + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = regdLocalWorkSize[1]; + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = regdLocalWorkSize[2]; + + retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, mockKernel.mockMultiDeviceKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(regdLocalWorkSize[0], suggestedLocalWorkSize[0]); + EXPECT_EQ(regdLocalWorkSize[1], suggestedLocalWorkSize[1]); + EXPECT_EQ(regdLocalWorkSize[2], suggestedLocalWorkSize[2]); +} + TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithExecutionEnvironmentPatchedWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) { auto pKernelWithExecutionEnvironmentPatch = MockKernel::create(pCommandQueue->getDevice(), pProgram); auto kernelInfos = MockKernel::toKernelInfoContainer(pKernelWithExecutionEnvironmentPatch->getKernelInfo(), testedRootDeviceIndex);