diff --git a/runtime/api/api.cpp b/runtime/api/api.cpp
index 298c08509a..91bf1f85b0 100644
--- a/runtime/api/api.cpp
+++ b/runtime/api/api.cpp
@@ -3977,6 +3977,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) {
     RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemAdviseINTEL);
     RETURN_FUNC_PTR_IF_EXIST(clGetDeviceFunctionPointerINTEL);
     RETURN_FUNC_PTR_IF_EXIST(clGetDeviceGlobalVariablePointerINTEL);
+    RETURN_FUNC_PTR_IF_EXIST(clGetKernelSuggestedLocalWorkSizeINTEL);
     RETURN_FUNC_PTR_IF_EXIST(clGetExecutionInfoINTEL);
     RETURN_FUNC_PTR_IF_EXIST(clEnqueueNDRangeKernelINTEL);
 
@@ -5199,6 +5200,58 @@ cl_int CL_API_CALL clSetProgramSpecializationConstant(cl_program program, cl_uin
     return retVal;
 }
 
+cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(cl_command_queue commandQueue,
+                                                          cl_kernel kernel,
+                                                          cl_uint workDim,
+                                                          const size_t *globalWorkOffset,
+                                                          const size_t *globalWorkSize,
+                                                          size_t *suggestedLocalWorkSize) {
+    cl_int retVal = CL_SUCCESS;
+    API_ENTER(&retVal);
+    DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel,
+                   "globalWorkOffset[0]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 0),
+                   "globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1),
+                   "globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2),
+                   "globalWorkSize", NEO::FileLoggerInstance().getSizes(globalWorkSize, workDim, true),
+                   "suggestedLocalWorkSize", suggestedLocalWorkSize);
+
+    retVal = validateObjects(commandQueue, kernel);
+
+    if (CL_SUCCESS != retVal) {
+        return retVal;
+    }
+
+    if ((workDim == 0) || (workDim > 3)) {
+        retVal = CL_INVALID_WORK_DIMENSION;
+        return retVal;
+    }
+
+    if (globalWorkOffset == nullptr) {
+        retVal = CL_INVALID_GLOBAL_OFFSET;
+        return retVal;
+    }
+
+    if (globalWorkSize == nullptr) {
+        retVal = CL_INVALID_GLOBAL_WORK_SIZE;
+        return retVal;
+    }
+
+    auto pKernel = castToObjectOrAbort<Kernel>(kernel);
+    if (!pKernel->isPatched()) {
+        retVal = CL_INVALID_KERNEL;
+        return retVal;
+    }
+
+    if (suggestedLocalWorkSize == nullptr) {
+        retVal = CL_INVALID_VALUE;
+        return retVal;
+    }
+
+    pKernel->getSuggestedLocalWorkSize(workDim, globalWorkSize, globalWorkOffset, suggestedLocalWorkSize);
+
+    return retVal;
+}
+
 cl_int CL_API_CALL clGetExecutionInfoINTEL(cl_command_queue commandQueue,
                                            cl_kernel kernel,
                                            cl_uint workDim,
diff --git a/runtime/api/api.h b/runtime/api/api.h
index c90f905ce7..efb65b4b34 100644
--- a/runtime/api/api.h
+++ b/runtime/api/api.h
@@ -1026,6 +1026,14 @@ cl_int CL_API_CALL clGetDeviceGlobalVariablePointerINTEL(
     size_t *globalVariableSizeRet,
     void **globalVariablePointerRet);
 
+cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(
+    cl_command_queue commandQueue,
+    cl_kernel kernel,
+    cl_uint workDim,
+    const size_t *globalWorkOffset,
+    const size_t *globalWorkSize,
+    size_t *suggestedLocalWorkSize);
+
 cl_int CL_API_CALL clGetExecutionInfoINTEL(
     cl_command_queue commandQueue,
     cl_kernel kernel,
diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp
index 892d5f89e2..8674c822f3 100644
--- a/runtime/kernel/kernel.cpp
+++ b/runtime/kernel/kernel.cpp
@@ -21,11 +21,13 @@
 #include "runtime/built_ins/built_ins.h"
 #include "runtime/built_ins/builtins_dispatch_builder.h"
 #include "runtime/command_queue/command_queue.h"
+#include "runtime/command_queue/gpgpu_walker.h"
 #include "runtime/command_stream/command_stream_receiver.h"
 #include "runtime/context/context.h"
 #include "runtime/device_queue/device_queue.h"
 #include "runtime/execution_model/device_enqueue.h"
 #include "runtime/gtpin/gtpin_notify.h"
+#include "runtime/helpers/dispatch_info.h"
 #include "runtime/helpers/get_info.h"
 #include "runtime/helpers/per_thread_data.h"
 #include "runtime/helpers/sampler_helpers.h"
@@ -999,6 +1001,31 @@ cl_int Kernel::setKernelExecutionType(cl_execution_info_kernel_type_intel execut
     return CL_SUCCESS;
 }
 
+void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *globalWorkSize, const size_t *globalWorkOffset,
+                                       size_t *localWorkSize) {
+    UNRECOVERABLE_IF((workDim == 0) || (workDim > 3));
+    UNRECOVERABLE_IF(globalWorkOffset == nullptr);
+    UNRECOVERABLE_IF(globalWorkSize == nullptr);
+    Vec3<size_t> elws{0, 0, 0};
+    Vec3<size_t> gws{
+        globalWorkSize[0],
+        (workDim > 1) ? globalWorkSize[1] : 0,
+        (workDim > 2) ? globalWorkSize[2] : 0};
+    Vec3<size_t> offset{
+        globalWorkOffset[0],
+        (workDim > 1) ? globalWorkOffset[1] : 0,
+        (workDim > 2) ? globalWorkOffset[2] : 0};
+
+    const DispatchInfo dispatchInfo{this, workDim, gws, elws, offset};
+    auto suggestedLws = computeWorkgroupSize(dispatchInfo);
+
+    localWorkSize[0] = suggestedLws.x;
+    if (workDim > 1)
+        localWorkSize[1] = suggestedLws.y;
+    if (workDim > 2)
+        localWorkSize[2] = suggestedLws.z;
+}
+
 uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const {
     auto &hardwareInfo = getDevice().getHardwareInfo();
     auto executionEnvironment = kernelInfo.patchInfo.executionEnvironment;
diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h
index ca45be8b6c..b838e791d4 100644
--- a/runtime/kernel/kernel.h
+++ b/runtime/kernel/kernel.h
@@ -401,6 +401,8 @@ class Kernel : public BaseObject<_cl_kernel> {
     void setThreadArbitrationPolicy(uint32_t policy) {
         this->threadArbitrationPolicy = policy;
     }
+    void getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *globalWorkSize, const size_t *globalWorkOffset,
+                                   size_t *localWorkSize);
     uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const;
 
   protected:
diff --git a/runtime/program/kernel_info.cpp b/runtime/program/kernel_info.cpp
index a34328bc75..6445032a43 100644
--- a/runtime/program/kernel_info.cpp
+++ b/runtime/program/kernel_info.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017-2019 Intel Corporation
+ * Copyright (C) 2017-2020 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -156,7 +156,8 @@ WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t
 }
 WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) {
     this->maxWorkGroupSize = dispatchInfo.getKernel()->maxKernelWorkGroupSize;
-    this->hasBarriers = !!dispatchInfo.getKernel()->getKernelInfo().patchInfo.executionEnvironment->HasBarriers;
+    auto pExecutionEnvironment = dispatchInfo.getKernel()->getKernelInfo().patchInfo.executionEnvironment;
+    this->hasBarriers = (pExecutionEnvironment != nullptr) && (pExecutionEnvironment->HasBarriers);
     this->simdSize = (uint32_t)dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize();
     this->slmTotalSize = (uint32_t)dispatchInfo.getKernel()->slmTotalSize;
     this->coreFamily = dispatchInfo.getKernel()->getDevice().getHardwareInfo().platform.eRenderCoreFamily;
diff --git a/unit_tests/api/CMakeLists.txt b/unit_tests/api/CMakeLists.txt
index 84be664b38..947874734a 100644
--- a/unit_tests/api/CMakeLists.txt
+++ b/unit_tests/api/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2017-2019 Intel Corporation
+# Copyright (C) 2017-2020 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 #
@@ -83,6 +83,7 @@ set(IGDRCL_SRCS_tests_api
   ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_info_tests.inl
   ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_sub_group_info_khr_tests.inl
   ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_sub_group_info_tests.inl
+  ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_suggested_local_work_size_intel_tests.inl
   ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_work_group_info_tests.inl
   ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_mem_object_info_tests.inl
   ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_pipe_info_tests.inl
diff --git a/unit_tests/api/api_tests_wrapper3.cpp b/unit_tests/api/api_tests_wrapper3.cpp
index 4245f74574..70fcbc35e0 100644
--- a/unit_tests/api/api_tests_wrapper3.cpp
+++ b/unit_tests/api/api_tests_wrapper3.cpp
@@ -1,11 +1,12 @@
 /*
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
  */
 
 #include "unit_tests/api/cl_get_execution_info_intel_tests.inl"
+#include "unit_tests/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl"
 #include "unit_tests/api/cl_get_kernel_work_group_info_tests.inl"
 #include "unit_tests/api/cl_get_mem_object_info_tests.inl"
 #include "unit_tests/api/cl_get_pipe_info_tests.inl"
diff --git a/unit_tests/api/cl_get_extension_function_address_for_platform_tests.inl b/unit_tests/api/cl_get_extension_function_address_for_platform_tests.inl
index 5ba0f228ab..edbf21c47b 100644
--- a/unit_tests/api/cl_get_extension_function_address_for_platform_tests.inl
+++ b/unit_tests/api/cl_get_extension_function_address_for_platform_tests.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017-2019 Intel Corporation
+ * Copyright (C) 2017-2020 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -78,6 +78,11 @@ TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetTracingStateINTE
     EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetTracingStateINTEL));
 }
 
+TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetKernelSuggestedLocalWorkSizeINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) {
+    auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetKernelSuggestedLocalWorkSizeINTEL");
+    EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetKernelSuggestedLocalWorkSizeINTEL));
+}
+
 TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetExecutionInfoINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) {
     auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetExecutionInfoINTEL");
     EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetExecutionInfoINTEL));
diff --git a/unit_tests/api/cl_get_extension_function_address_tests.inl b/unit_tests/api/cl_get_extension_function_address_tests.inl
index 3326af0139..40de145602 100644
--- a/unit_tests/api/cl_get_extension_function_address_tests.inl
+++ b/unit_tests/api/cl_get_extension_function_address_tests.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017-2019 Intel Corporation
+ * Copyright (C) 2017-2020 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -163,6 +163,11 @@ TEST_F(clGetExtensionFunctionAddressTests, GivenClGetDeviceFunctionPointerINTELW
     EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetDeviceFunctionPointerINTEL));
 }
 
+TEST_F(clGetExtensionFunctionAddressTests, GivenClGetKernelSuggestedLocalWorkSizeINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) {
+    auto retVal = clGetExtensionFunctionAddress("clGetKernelSuggestedLocalWorkSizeINTEL");
+    EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetKernelSuggestedLocalWorkSizeINTEL));
+}
+
 TEST_F(clGetExtensionFunctionAddressTests, GivenClGetExecutionInfoINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) {
     auto retVal = clGetExtensionFunctionAddress("clGetExecutionInfoINTEL");
     EXPECT_EQ(retVal, reinterpret_cast<void *>(clGetExecutionInfoINTEL));
diff --git a/unit_tests/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl b/unit_tests/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl
new file mode 100644
index 0000000000..a0647fb8dc
--- /dev/null
+++ b/unit_tests/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "runtime/command_queue/command_queue.h"
+#include "runtime/command_queue/gpgpu_walker.h"
+#include "unit_tests/mocks/mock_kernel.h"
+
+#include "cl_api_tests.h"
+
+using namespace NEO;
+
+using clGetKernelSuggestedLocalWorkSizeTests = api_tests;
+
+namespace ULT {
+
+TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenInvalidInputWhenCallingGetKernelSuggestedLocalWorkSizeThenErrorIsReturned) {
+    size_t globalWorkOffset[3];
+    size_t globalWorkSize[3];
+    size_t suggestedLocalWorkSize[3];
+    cl_uint workDim = 1;
+
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(nullptr, pKernel, workDim,
+                                                    globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal);
+
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, nullptr, workDim,
+                                                    globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_INVALID_KERNEL, retVal);
+
+    pKernel->isPatchedOverride = false;
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, workDim,
+                                                    globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_INVALID_KERNEL, retVal);
+    pKernel->isPatchedOverride = true;
+
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, workDim,
+                                                    globalWorkOffset, globalWorkSize, nullptr);
+    EXPECT_EQ(CL_INVALID_VALUE, retVal);
+
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, 0,
+                                                    globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal);
+
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, 4,
+                                                    globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal);
+
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, workDim,
+                                                    nullptr, globalWorkSize, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_INVALID_GLOBAL_OFFSET, retVal);
+
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, workDim,
+                                                    globalWorkOffset, nullptr, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal);
+}
+
+TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) {
+    size_t globalWorkOffset[] = {0, 0, 0};
+    size_t globalWorkSize[] = {128, 128, 128};
+    size_t suggestedLocalWorkSize[] = {0, 0, 0};
+
+    Vec3<size_t> elws{0, 0, 0};
+    Vec3<size_t> gws{128, 128, 128};
+    Vec3<size_t> offset{0, 0, 0};
+    DispatchInfo dispatchInfo{pKernel, 1, gws, elws, offset};
+    auto expectedLws = computeWorkgroupSize(dispatchInfo);
+    EXPECT_GT(expectedLws.x, 1u);
+
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, 1, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_SUCCESS, retVal);
+    EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]);
+    EXPECT_EQ(0u, suggestedLocalWorkSize[1]);
+    EXPECT_EQ(0u, suggestedLocalWorkSize[2]);
+
+    dispatchInfo.setDim(2);
+    expectedLws = computeWorkgroupSize(dispatchInfo);
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, 2, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_SUCCESS, retVal);
+    EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]);
+    EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]);
+    EXPECT_EQ(0u, suggestedLocalWorkSize[2]);
+
+    dispatchInfo.setDim(3);
+    expectedLws = computeWorkgroupSize(dispatchInfo);
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_SUCCESS, retVal);
+    EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]);
+    EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]);
+    EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]);
+}
+
+TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithExecutionEnvironmentPatchedWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) {
+    std::unique_ptr<MockKernel> kernelWithExecutionEnvironmentPatch(MockKernel::create(pCommandQueue->getDevice(), pProgram));
+
+    size_t globalWorkOffset[] = {0, 0, 0};
+    size_t globalWorkSize[] = {128, 128, 128};
+    size_t suggestedLocalWorkSize[] = {0, 0, 0};
+    cl_uint workDim = 3;
+
+    Vec3<size_t> elws{0, 0, 0};
+    Vec3<size_t> gws{128, 128, 128};
+    Vec3<size_t> offset{0, 0, 0};
+    const DispatchInfo dispatchInfo{kernelWithExecutionEnvironmentPatch.get(), workDim, gws, elws, offset};
+    auto expectedLws = computeWorkgroupSize(dispatchInfo);
+    EXPECT_GT(expectedLws.x * expectedLws.y * expectedLws.z, 1u);
+
+    retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, kernelWithExecutionEnvironmentPatch.get(), workDim, globalWorkOffset,
+                                                    globalWorkSize, suggestedLocalWorkSize);
+    EXPECT_EQ(CL_SUCCESS, retVal);
+    EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]);
+    EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]);
+    EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]);
+}
+
+} // namespace ULT
diff --git a/unit_tests/command_queue/local_work_size_tests.cpp b/unit_tests/command_queue/local_work_size_tests.cpp
index 2d4fab8454..2fd61d72b4 100644
--- a/unit_tests/command_queue/local_work_size_tests.cpp
+++ b/unit_tests/command_queue/local_work_size_tests.cpp
@@ -640,6 +640,24 @@ TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenItHasCorre
     WorkSizeInfo workSizeInfo(dispatchInfo);
     EXPECT_EQ(workSizeInfo.numThreadsPerSubSlice, threadsPerEu * euPerSubSlice);
 }
+
+TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenHasBarriersIsCorrectlySet) {
+    MockClDevice device{new MockDevice};
+    MockKernelWithInternals kernel(device);
+    DispatchInfo dispatchInfo;
+    dispatchInfo.setKernel(kernel.mockKernel);
+
+    kernel.kernelInfo.patchInfo.executionEnvironment = nullptr;
+    EXPECT_FALSE(WorkSizeInfo{dispatchInfo}.hasBarriers);
+
+    kernel.executionEnvironment.HasBarriers = 0;
+    kernel.kernelInfo.patchInfo.executionEnvironment = &kernel.executionEnvironment;
+    EXPECT_FALSE(WorkSizeInfo{dispatchInfo}.hasBarriers);
+
+    kernel.executionEnvironment.HasBarriers = 1;
+    EXPECT_TRUE(WorkSizeInfo{dispatchInfo}.hasBarriers);
+}
+
 TEST(localWorkSizeTest, givenMaxWorkgroupSizeEqualToSimdSizeWhenLwsIsCalculatedThenItIsDownsizedToMaxWorkgroupSize) {
     WorkSizeInfo wsInfo(32, 0u, 32, 0u, platformDevices[0]->platform.eRenderCoreFamily, 32u, 0u, false, false);
     uint32_t workDim = 2;