Fix clGetKernelSuggestedLocalWorkSizeINTEL

Related-To: NEO-5456

Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
Konstanty Misiak 2021-05-10 16:46:41 +00:00 committed by Compute-Runtime-Automation
parent 754fb1fffc
commit 074fc1d60f
3 changed files with 44 additions and 6 deletions

View File

@ -5796,7 +5796,10 @@ cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(cl_command_queue comma
return retVal;
}
if (globalWorkSize == nullptr) {
if (globalWorkSize == nullptr ||
globalWorkSize[0] == 0 ||
(workDim > 1 && globalWorkSize[1] == 0) ||
(workDim > 2 && globalWorkSize[2] == 0)) {
retVal = CL_INVALID_GLOBAL_WORK_SIZE;
return retVal;
}

View File

@ -1011,8 +1011,8 @@ void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *glob
Vec3<size_t> elws{0, 0, 0};
Vec3<size_t> gws{
globalWorkSize[0],
(workDim > 1) ? globalWorkSize[1] : 0,
(workDim > 2) ? globalWorkSize[2] : 0};
(workDim > 1) ? globalWorkSize[1] : 1,
(workDim > 2) ? globalWorkSize[2] : 1};
Vec3<size_t> offset{0, 0, 0};
if (globalWorkOffset) {
offset.x = globalWorkOffset[0];
@ -1024,8 +1024,17 @@ void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *glob
}
}
const DispatchInfo dispatchInfo{&clDevice, this, workDim, gws, elws, offset};
auto suggestedLws = computeWorkgroupSize(dispatchInfo);
Vec3<size_t> suggestedLws{0, 0, 0};
if (kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] != 0) {
suggestedLws.x = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
suggestedLws.y = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
suggestedLws.z = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
} else {
uint32_t dispatchWorkDim = std::max(1U, std::max(gws.getSimplifiedDim(), offset.getSimplifiedDim()));
const DispatchInfo dispatchInfo{&clDevice, this, dispatchWorkDim, gws, elws, offset};
suggestedLws = computeWorkgroupSize(dispatchInfo);
}
localWorkSize[0] = suggestedLws.x;
if (workDim > 1)

View File

@ -19,7 +19,7 @@ namespace ULT {
TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenInvalidInputWhenCallingGetKernelSuggestedLocalWorkSizeThenErrorIsReturned) {
size_t globalWorkOffset[3];
size_t globalWorkSize[3];
size_t globalWorkSize[3] = {1, 1, 1};
size_t suggestedLocalWorkSize[3];
cl_uint workDim = 1;
@ -52,6 +52,14 @@ TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenInvalidInputWhenCallingGetKe
retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, workDim,
globalWorkOffset, nullptr, suggestedLocalWorkSize);
EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal);
for (size_t i = 0; i < 3; ++i) {
globalWorkSize[i] = 0;
retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, 3,
globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal);
globalWorkSize[i] = 1;
}
}
TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) {
@ -96,6 +104,24 @@ TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSugge
EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]);
}
TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithReqdWorkGroupSizeWhenGettingSuggestedLocalWorkSizeThenRequiredWorkSizeIsReturned) {
size_t globalWorkOffset[] = {0, 0, 0};
size_t globalWorkSize[] = {128, 128, 128};
size_t suggestedLocalWorkSize[] = {0, 0, 0};
uint16_t regdLocalWorkSize[] = {32, 32, 32};
MockKernelWithInternals mockKernel(*pDevice);
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = regdLocalWorkSize[0];
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = regdLocalWorkSize[1];
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = regdLocalWorkSize[2];
retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, mockKernel.mockMultiDeviceKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(regdLocalWorkSize[0], suggestedLocalWorkSize[0]);
EXPECT_EQ(regdLocalWorkSize[1], suggestedLocalWorkSize[1]);
EXPECT_EQ(regdLocalWorkSize[2], suggestedLocalWorkSize[2]);
}
TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithExecutionEnvironmentPatchedWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) {
auto pKernelWithExecutionEnvironmentPatch = MockKernel::create(pCommandQueue->getDevice(), pProgram);
auto kernelInfos = MockKernel::toKernelInfoContainer(pKernelWithExecutionEnvironmentPatch->getKernelInfo(), testedRootDeviceIndex);