Add check for local work group size in clEnqueueNDRangeKernel call.

- Incoming local work group size cannot exceed device capabilities.

Change-Id: I89a7503155c71443e3ebc630debb5d5b466c6cb5
This commit is contained in:
Mrozek, Michal
2018-04-20 07:58:48 +02:00
parent 5ed13d7c88
commit ce8c44cae3
6 changed files with 34 additions and 18 deletions

View File

@ -71,6 +71,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
}
size_t remainder = 0;
size_t totalWorkItems = 1u;
const size_t *localWkgSizeToPass = localWorkSizeIn ? workGroupSize : nullptr;
for (auto i = 0u; i < workDim; i++) {
@ -86,6 +87,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
}
}
workGroupSize[i] = localWorkSizeIn[i];
totalWorkItems *= localWorkSizeIn[i];
}
remainder += region[i] % workGroupSize[i];
@ -126,6 +128,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
",", globalWorkSizeIn[2],
",SIMD:, ", kernel.getKernelInfo().getMaxSimdSize());
if (totalWorkItems > this->getDevice().getDeviceInfo().maxWorkGroupSize) {
return CL_INVALID_WORK_GROUP_SIZE;
}
enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(
surfaces,
false,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -78,12 +78,12 @@ TEST_F(EnqueueKernelRequiredWorkSize, unspecifiedWorkGroupSize) {
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(*pKernel->localWorkSizeX, 16u);
EXPECT_EQ(*pKernel->localWorkSizeY, 8u);
EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
EXPECT_EQ(*pKernel->localWorkSizeY, 4u);
EXPECT_EQ(*pKernel->localWorkSizeZ, 4u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 16u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 8u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u);
}
@ -91,7 +91,7 @@ TEST_F(EnqueueKernelRequiredWorkSize, unspecifiedWorkGroupSize) {
TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {32, 32, 32};
size_t localWorkSize[3] = {16, 8, 4};
size_t localWorkSize[3] = {8, 4, 4};
auto retVal = pCmdQ->enqueueKernel(
pKernel,
@ -105,12 +105,12 @@ TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 16u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 8u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u);
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u);
EXPECT_EQ(*pKernel->localWorkSizeX, 16u);
EXPECT_EQ(*pKernel->localWorkSizeY, 8u);
EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
EXPECT_EQ(*pKernel->localWorkSizeY, 4u);
EXPECT_EQ(*pKernel->localWorkSizeZ, 4u);
}
@ -118,7 +118,7 @@ TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {
TEST_F(EnqueueKernelRequiredWorkSize, givenKernelRequiringLocalWorkgroupSizeWhen1DimensionIsPassedThatIsCorrectThenNdRangeIsSuccesful) {
size_t globalWorkOffset[1] = {0};
size_t globalWorkSize[1] = {32};
size_t localWorkSize[1] = {16};
size_t localWorkSize[1] = {8};
auto retVal = pCmdQ->enqueueKernel(
pKernel,

View File

@ -1547,3 +1547,13 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqu
clReleaseCommandQueue(pCmdQ2);
}
TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLwsExceedsDeviceCapabilitiesWhenEnqueueNDRangeKernelIsCalledThenErrorIsReturned) {
auto maxWorkgroupSize = pDevice->getDeviceInfo().maxWorkGroupSize;
size_t globalWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
size_t localWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
MockKernelWithInternals mockKernel(*pDevice);
auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -764,10 +764,10 @@ TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEn
}
badSizeDimension = GetParam();
if (badSizeDimension == 0) {
if (localWorkGroupSize[badSizeDimension] > 1) {
localWorkGroupSize[badSizeDimension] /= 2;
} else {
localWorkGroupSize[badSizeDimension] *= 2;
localWorkGroupSize[0] /= 2;
}
retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, localWorkGroupSize, 0, nullptr, nullptr);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -240,7 +240,7 @@ TEST_F(ProgramNonUniformTest, ExecuteKernelNonUniform21) {
ASSERT_NE(nullptr, pKernel);
size_t globalWorkSize[3] = {12, 12, 12};
size_t localWorkSize[3] = {11, 12, 12};
size_t localWorkSize[3] = {11, 12, 1};
retVal = pCmdQ->enqueueKernel(
pKernel,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
__kernel __attribute__((reqd_work_group_size(16, 8, 4)))
__kernel __attribute__((reqd_work_group_size(8, 4, 4)))
void CopyBuffer(
__global unsigned int *src,
__global unsigned int *dst)