diff --git a/runtime/command_queue/enqueue_kernel.h b/runtime/command_queue/enqueue_kernel.h index 5b2ca74469..13ee4e6d5a 100644 --- a/runtime/command_queue/enqueue_kernel.h +++ b/runtime/command_queue/enqueue_kernel.h @@ -71,6 +71,7 @@ cl_int CommandQueueHw::enqueueKernel( } size_t remainder = 0; + size_t totalWorkItems = 1u; const size_t *localWkgSizeToPass = localWorkSizeIn ? workGroupSize : nullptr; for (auto i = 0u; i < workDim; i++) { @@ -86,6 +87,7 @@ cl_int CommandQueueHw::enqueueKernel( } } workGroupSize[i] = localWorkSizeIn[i]; + totalWorkItems *= localWorkSizeIn[i]; } remainder += region[i] % workGroupSize[i]; @@ -126,6 +128,10 @@ cl_int CommandQueueHw::enqueueKernel( ",", globalWorkSizeIn[2], ",SIMD:, ", kernel.getKernelInfo().getMaxSimdSize()); + if (totalWorkItems > this->getDevice().getDeviceInfo().maxWorkGroupSize) { + return CL_INVALID_WORK_GROUP_SIZE; + } + enqueueHandler( surfaces, false, diff --git a/unit_tests/command_queue/enqueue_kernel_local_work_size_tests.cpp b/unit_tests/command_queue/enqueue_kernel_local_work_size_tests.cpp index 28c9113604..e5f83448de 100644 --- a/unit_tests/command_queue/enqueue_kernel_local_work_size_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_local_work_size_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -78,12 +78,12 @@ TEST_F(EnqueueKernelRequiredWorkSize, unspecifiedWorkGroupSize) { EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(*pKernel->localWorkSizeX, 16u); - EXPECT_EQ(*pKernel->localWorkSizeY, 8u); + EXPECT_EQ(*pKernel->localWorkSizeX, 8u); + EXPECT_EQ(*pKernel->localWorkSizeY, 4u); EXPECT_EQ(*pKernel->localWorkSizeZ, 4u); - EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 16u); - EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 8u); + EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u); + EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u); EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u); } @@ -91,7 +91,7 @@ TEST_F(EnqueueKernelRequiredWorkSize, unspecifiedWorkGroupSize) { TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) { size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 32, 32}; - size_t localWorkSize[3] = {16, 8, 4}; + size_t localWorkSize[3] = {8, 4, 4}; auto retVal = pCmdQ->enqueueKernel( pKernel, @@ -105,12 +105,12 @@ TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) { EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 16u); - EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 8u); + EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u); + EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u); EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u); - EXPECT_EQ(*pKernel->localWorkSizeX, 16u); - EXPECT_EQ(*pKernel->localWorkSizeY, 8u); + EXPECT_EQ(*pKernel->localWorkSizeX, 8u); + EXPECT_EQ(*pKernel->localWorkSizeY, 4u); EXPECT_EQ(*pKernel->localWorkSizeZ, 4u); } @@ -118,7 +118,7 @@ TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) { TEST_F(EnqueueKernelRequiredWorkSize, givenKernelRequiringLocalWorkgroupSizeWhen1DimensionIsPassedThatIsCorrectThenNdRangeIsSuccesful) { size_t globalWorkOffset[1] = {0}; size_t globalWorkSize[1] = {32}; - size_t localWorkSize[1] = {16}; + size_t localWorkSize[1] = {8}; auto retVal = pCmdQ->enqueueKernel( pKernel, diff --git a/unit_tests/command_queue/enqueue_kernel_tests.cpp b/unit_tests/command_queue/enqueue_kernel_tests.cpp index b073c3297f..d22344a366 100644 --- a/unit_tests/command_queue/enqueue_kernel_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_tests.cpp @@ -1547,3 +1547,13 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqu clReleaseCommandQueue(pCmdQ2); } + +TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLwsExceedsDeviceCapabilitiesWhenEnqueueNDRangeKernelIsCalledThenErrorIsReturned) { + auto maxWorkgroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; + size_t globalWorkSize[3] = {maxWorkgroupSize * 2, 1, 1}; + size_t localWorkSize[3] = {maxWorkgroupSize * 2, 1, 1}; + MockKernelWithInternals mockKernel(*pDevice); + + auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status); +} diff --git a/unit_tests/context/driver_diagnostics_enqueue_tests.cpp b/unit_tests/context/driver_diagnostics_enqueue_tests.cpp index 9b8a90c31d..0c7915c006 100644 --- a/unit_tests/context/driver_diagnostics_enqueue_tests.cpp +++ b/unit_tests/context/driver_diagnostics_enqueue_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -764,10 +764,10 @@ TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEn } badSizeDimension = GetParam(); - if (badSizeDimension == 0) { + if (localWorkGroupSize[badSizeDimension] > 1) { localWorkGroupSize[badSizeDimension] /= 2; } else { - localWorkGroupSize[badSizeDimension] *= 2; + localWorkGroupSize[0] /= 2; } retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, localWorkGroupSize, 0, nullptr, nullptr); diff --git a/unit_tests/program/program_nonuniform.cpp b/unit_tests/program/program_nonuniform.cpp index ec049970da..5baa2e1969 100644 --- a/unit_tests/program/program_nonuniform.cpp +++ b/unit_tests/program/program_nonuniform.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -240,7 +240,7 @@ TEST_F(ProgramNonUniformTest, ExecuteKernelNonUniform21) { ASSERT_NE(nullptr, pKernel); size_t globalWorkSize[3] = {12, 12, 12}; - size_t localWorkSize[3] = {11, 12, 12}; + size_t localWorkSize[3] = {11, 12, 1}; retVal = pCmdQ->enqueueKernel( pKernel, diff --git a/unit_tests/test_files/required_work_group.cl b/unit_tests/test_files/required_work_group.cl index 969cec018b..2744912675 100644 --- a/unit_tests/test_files/required_work_group.cl +++ b/unit_tests/test_files/required_work_group.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -__kernel __attribute__((reqd_work_group_size(16, 8, 4))) +__kernel __attribute__((reqd_work_group_size(8, 4, 4))) void CopyBuffer( __global unsigned int *src, __global unsigned int *dst)