Fix clGetKernelSuggestedLocalWorkSizeINTEL
Related-To: NEO-5456 Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
parent
754fb1fffc
commit
074fc1d60f
|
@ -5796,7 +5796,10 @@ cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(cl_command_queue comma
|
|||
return retVal;
|
||||
}
|
||||
|
||||
if (globalWorkSize == nullptr) {
|
||||
if (globalWorkSize == nullptr ||
|
||||
globalWorkSize[0] == 0 ||
|
||||
(workDim > 1 && globalWorkSize[1] == 0) ||
|
||||
(workDim > 2 && globalWorkSize[2] == 0)) {
|
||||
retVal = CL_INVALID_GLOBAL_WORK_SIZE;
|
||||
return retVal;
|
||||
}
|
||||
|
|
|
@ -1011,8 +1011,8 @@ void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *glob
|
|||
Vec3<size_t> elws{0, 0, 0};
|
||||
Vec3<size_t> gws{
|
||||
globalWorkSize[0],
|
||||
(workDim > 1) ? globalWorkSize[1] : 0,
|
||||
(workDim > 2) ? globalWorkSize[2] : 0};
|
||||
(workDim > 1) ? globalWorkSize[1] : 1,
|
||||
(workDim > 2) ? globalWorkSize[2] : 1};
|
||||
Vec3<size_t> offset{0, 0, 0};
|
||||
if (globalWorkOffset) {
|
||||
offset.x = globalWorkOffset[0];
|
||||
|
@ -1024,8 +1024,17 @@ void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *glob
|
|||
}
|
||||
}
|
||||
|
||||
const DispatchInfo dispatchInfo{&clDevice, this, workDim, gws, elws, offset};
|
||||
auto suggestedLws = computeWorkgroupSize(dispatchInfo);
|
||||
Vec3<size_t> suggestedLws{0, 0, 0};
|
||||
|
||||
if (kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] != 0) {
|
||||
suggestedLws.x = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
suggestedLws.y = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
suggestedLws.z = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
} else {
|
||||
uint32_t dispatchWorkDim = std::max(1U, std::max(gws.getSimplifiedDim(), offset.getSimplifiedDim()));
|
||||
const DispatchInfo dispatchInfo{&clDevice, this, dispatchWorkDim, gws, elws, offset};
|
||||
suggestedLws = computeWorkgroupSize(dispatchInfo);
|
||||
}
|
||||
|
||||
localWorkSize[0] = suggestedLws.x;
|
||||
if (workDim > 1)
|
||||
|
|
|
@ -19,7 +19,7 @@ namespace ULT {
|
|||
|
||||
TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenInvalidInputWhenCallingGetKernelSuggestedLocalWorkSizeThenErrorIsReturned) {
|
||||
size_t globalWorkOffset[3];
|
||||
size_t globalWorkSize[3];
|
||||
size_t globalWorkSize[3] = {1, 1, 1};
|
||||
size_t suggestedLocalWorkSize[3];
|
||||
cl_uint workDim = 1;
|
||||
|
||||
|
@ -52,6 +52,14 @@ TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenInvalidInputWhenCallingGetKe
|
|||
retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, workDim,
|
||||
globalWorkOffset, nullptr, suggestedLocalWorkSize);
|
||||
EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal);
|
||||
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
globalWorkSize[i] = 0;
|
||||
retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, 3,
|
||||
globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
|
||||
EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal);
|
||||
globalWorkSize[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) {
|
||||
|
@ -96,6 +104,24 @@ TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSugge
|
|||
EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]);
|
||||
}
|
||||
|
||||
TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithReqdWorkGroupSizeWhenGettingSuggestedLocalWorkSizeThenRequiredWorkSizeIsReturned) {
|
||||
size_t globalWorkOffset[] = {0, 0, 0};
|
||||
size_t globalWorkSize[] = {128, 128, 128};
|
||||
size_t suggestedLocalWorkSize[] = {0, 0, 0};
|
||||
uint16_t regdLocalWorkSize[] = {32, 32, 32};
|
||||
|
||||
MockKernelWithInternals mockKernel(*pDevice);
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = regdLocalWorkSize[0];
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = regdLocalWorkSize[1];
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = regdLocalWorkSize[2];
|
||||
|
||||
retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, mockKernel.mockMultiDeviceKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(regdLocalWorkSize[0], suggestedLocalWorkSize[0]);
|
||||
EXPECT_EQ(regdLocalWorkSize[1], suggestedLocalWorkSize[1]);
|
||||
EXPECT_EQ(regdLocalWorkSize[2], suggestedLocalWorkSize[2]);
|
||||
}
|
||||
|
||||
TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithExecutionEnvironmentPatchedWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) {
|
||||
auto pKernelWithExecutionEnvironmentPatch = MockKernel::create(pCommandQueue->getDevice(), pProgram);
|
||||
auto kernelInfos = MockKernel::toKernelInfoContainer(pKernelWithExecutionEnvironmentPatch->getKernelInfo(), testedRootDeviceIndex);
|
||||
|
|
Loading…
Reference in New Issue