Add check for local work group size in clEnqueueNDRangeKernel call.

- Incoming local work group size cannot exceed device capabilities. Change-Id: I89a7503155c71443e3ebc630debb5d5b466c6cb5
2025-09-10 12:53:42 +08:00 · 2018-04-20 07:58:48 +02:00
parent 5ed13d7c88
commit ce8c44cae3
6 changed files with 34 additions and 18 deletions
--- a/runtime/command_queue/enqueue_kernel.h
+++ b/runtime/command_queue/enqueue_kernel.h
@ -71,6 +71,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
    }

    size_t remainder = 0;
+    size_t totalWorkItems = 1u;
    const size_t *localWkgSizeToPass = localWorkSizeIn ? workGroupSize : nullptr;

    for (auto i = 0u; i < workDim; i++) {
@ -86,6 +87,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
                }
            }
            workGroupSize[i] = localWorkSizeIn[i];
+            totalWorkItems *= localWorkSizeIn[i];
        }

        remainder += region[i] % workGroupSize[i];
@ -126,6 +128,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
            ",", globalWorkSizeIn[2],
            ",SIMD:, ", kernel.getKernelInfo().getMaxSimdSize());

+    if (totalWorkItems > this->getDevice().getDeviceInfo().maxWorkGroupSize) {
+        return CL_INVALID_WORK_GROUP_SIZE;
+    }
+
    enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(
        surfaces,
        false,
--- a/unit_tests/command_queue/enqueue_kernel_local_work_size_tests.cpp
+++ b/unit_tests/command_queue/enqueue_kernel_local_work_size_tests.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@ -78,12 +78,12 @@ TEST_F(EnqueueKernelRequiredWorkSize, unspecifiedWorkGroupSize) {

    EXPECT_EQ(CL_SUCCESS, retVal);

-    EXPECT_EQ(*pKernel->localWorkSizeX, 16u);
-    EXPECT_EQ(*pKernel->localWorkSizeY, 8u);
+    EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
+    EXPECT_EQ(*pKernel->localWorkSizeY, 4u);
    EXPECT_EQ(*pKernel->localWorkSizeZ, 4u);

-    EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 16u);
-    EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 8u);
+    EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
+    EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u);
    EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u);
 }

@ -91,7 +91,7 @@ TEST_F(EnqueueKernelRequiredWorkSize, unspecifiedWorkGroupSize) {
 TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {
    size_t globalWorkOffset[3] = {0, 0, 0};
    size_t globalWorkSize[3] = {32, 32, 32};
-    size_t localWorkSize[3] = {16, 8, 4};
+    size_t localWorkSize[3] = {8, 4, 4};

    auto retVal = pCmdQ->enqueueKernel(
        pKernel,
@ -105,12 +105,12 @@ TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {

    EXPECT_EQ(CL_SUCCESS, retVal);

-    EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 16u);
-    EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 8u);
+    EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
+    EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u);
    EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u);

-    EXPECT_EQ(*pKernel->localWorkSizeX, 16u);
-    EXPECT_EQ(*pKernel->localWorkSizeY, 8u);
+    EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
+    EXPECT_EQ(*pKernel->localWorkSizeY, 4u);
    EXPECT_EQ(*pKernel->localWorkSizeZ, 4u);
 }

@ -118,7 +118,7 @@ TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {
 TEST_F(EnqueueKernelRequiredWorkSize, givenKernelRequiringLocalWorkgroupSizeWhen1DimensionIsPassedThatIsCorrectThenNdRangeIsSuccesful) {
    size_t globalWorkOffset[1] = {0};
    size_t globalWorkSize[1] = {32};
-    size_t localWorkSize[1] = {16};
+    size_t localWorkSize[1] = {8};

    auto retVal = pCmdQ->enqueueKernel(
        pKernel,
--- a/unit_tests/command_queue/enqueue_kernel_tests.cpp
+++ b/unit_tests/command_queue/enqueue_kernel_tests.cpp
@ -1547,3 +1547,13 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqu

    clReleaseCommandQueue(pCmdQ2);
 }
+
+TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLwsExceedsDeviceCapabilitiesWhenEnqueueNDRangeKernelIsCalledThenErrorIsReturned) {
+    auto maxWorkgroupSize = pDevice->getDeviceInfo().maxWorkGroupSize;
+    size_t globalWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
+    size_t localWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
+    MockKernelWithInternals mockKernel(*pDevice);
+
+    auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
+    EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status);
+}
--- a/unit_tests/context/driver_diagnostics_enqueue_tests.cpp
+++ b/unit_tests/context/driver_diagnostics_enqueue_tests.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@ -764,10 +764,10 @@ TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEn
    }

    badSizeDimension = GetParam();
-    if (badSizeDimension == 0) {
+    if (localWorkGroupSize[badSizeDimension] > 1) {
        localWorkGroupSize[badSizeDimension] /= 2;
    } else {
-        localWorkGroupSize[badSizeDimension] *= 2;
+        localWorkGroupSize[0] /= 2;
    }

    retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, localWorkGroupSize, 0, nullptr, nullptr);
--- a/unit_tests/program/program_nonuniform.cpp
+++ b/unit_tests/program/program_nonuniform.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@ -240,7 +240,7 @@ TEST_F(ProgramNonUniformTest, ExecuteKernelNonUniform21) {
        ASSERT_NE(nullptr, pKernel);

        size_t globalWorkSize[3] = {12, 12, 12};
-        size_t localWorkSize[3] = {11, 12, 12};
+        size_t localWorkSize[3] = {11, 12, 1};

        retVal = pCmdQ->enqueueKernel(
            pKernel,
--- a/unit_tests/test_files/required_work_group.cl
+++ b/unit_tests/test_files/required_work_group.cl
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@ -20,7 +20,7 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 */

-__kernel __attribute__((reqd_work_group_size(16, 8, 4)))
+__kernel __attribute__((reqd_work_group_size(8, 4, 4)))
 void CopyBuffer(
    __global unsigned int *src,
    __global unsigned int *dst)