diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp
index f4e7f82f8f..cda485bac8 100644
--- a/level_zero/core/source/kernel/kernel_imp.cpp
+++ b/level_zero/core/source/kernel/kernel_imp.cpp
@@ -387,13 +387,13 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
     auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment();
     auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<NEO::GfxCoreHelper>();
     this->numThreadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(
-        simdSize, static_cast<uint32_t>(itemsInGroup), grfCount, !kernelRequiresGenerationOfLocalIdsByRuntime, rootDeviceEnvironment);
+        simdSize, static_cast<uint32_t>(itemsInGroup), grfCount, rootDeviceEnvironment);
 
     if (kernelRequiresGenerationOfLocalIdsByRuntime) {
         auto grfSize = this->module->getDevice()->getHwInfo().capabilityTable.grfSize;
         uint32_t perThreadDataSizeForWholeThreadGroupNeeded =
             static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(
-                simdSize, grfSize, grfCount, numChannels, itemsInGroup, !kernelRequiresGenerationOfLocalIdsByRuntime, rootDeviceEnvironment));
+                simdSize, grfSize, grfCount, numChannels, itemsInGroup, rootDeviceEnvironment));
         if (perThreadDataSizeForWholeThreadGroupNeeded >
             perThreadDataSizeForWholeThreadGroupAllocated) {
             alignedFree(perThreadDataForWholeThreadGroup);
@@ -940,7 +940,7 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties)
 
     uint32_t maxKernelWorkGroupSize = static_cast<uint32_t>(this->module->getMaxGroupSize(kernelDescriptor));
     const auto &rootDeviceEnvironment = this->module->getDevice()->getNEODevice()->getRootDeviceEnvironment();
-    maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, !kernelRequiresGenerationOfLocalIdsByRuntime, maxKernelWorkGroupSize, rootDeviceEnvironment);
+    maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, maxKernelWorkGroupSize, rootDeviceEnvironment);
     pKernelProperties->maxNumSubgroups = maxKernelWorkGroupSize / kernelDescriptor.kernelAttributes.simdSize;
 
     void *pNext = pKernelProperties->pNext;
diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp
index d6f2e2f1e1..892924bb62 100644
--- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp
+++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp
@@ -929,7 +929,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
     generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize, numGrf, rootDeviceEnvironment);
 
     auto localIdsProgrammingSize = implicitArgsProgrammingSize - ImplicitArgsV0::getAlignedSize();
-    size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, numGrf, 3u, totalLocalSize, !kernelRequiresGenerationOfLocalIdsByRuntime, rootDeviceEnvironment);
+    size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, numGrf, 3u, totalLocalSize, rootDeviceEnvironment);
 
     EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds));
     alignedFree(expectedLocalIds);
@@ -976,7 +976,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
     generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize, numGrf, rootDeviceEnvironment);
 
     auto localIdsProgrammingSize = implicitArgsProgrammingSize - ImplicitArgsV0::getAlignedSize();
-    size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, numGrf, 3u, totalLocalSize, !kernelRequiresGenerationOfLocalIdsByRuntime, rootDeviceEnvironment);
+    size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, numGrf, 3u, totalLocalSize, rootDeviceEnvironment);
 
     EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds));
     alignedFree(expectedLocalIds);
diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp
index d4fade693e..d2b5a5857a 100644
--- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp
+++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp
@@ -405,7 +405,6 @@ TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeEnabledWhenSett
         mockKernel.descriptor.kernelAttributes.simdSize,
         groupSize[0] * groupSize[1] * groupSize[2],
         numGrf,
-        mockKernel.kernelRequiresGenerationOfLocalIdsByRuntime,
         rootDeviceEnvironment);
     auto perThreadDataSizeForWholeTGNeeded =
         static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(
@@ -414,7 +413,6 @@ TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeEnabledWhenSett
             numGrf,
             mockKernel.descriptor.kernelAttributes.numLocalIdChannels,
             groupSize[0] * groupSize[1] * groupSize[2],
-            !mockKernel.kernelRequiresGenerationOfLocalIdsByRuntime,
             rootDeviceEnvironment));
 
     EXPECT_EQ(numThreadsPerTG, mockKernel.getNumThreadsPerThreadGroup());
@@ -1968,7 +1966,7 @@ TEST_F(KernelPropertiesTests, whenPassingKernelMaxGroupSizePropertiesStructToGet
     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
     auto &device = *module->getDevice();
     auto &gfxCoreHelper = device.getGfxCoreHelper();
-    uint32_t maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, false, static_cast<uint32_t>(this->module->getMaxGroupSize(kernelDescriptor)), device.getNEODevice()->getRootDeviceEnvironment());
+    uint32_t maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, static_cast<uint32_t>(this->module->getMaxGroupSize(kernelDescriptor)), device.getNEODevice()->getRootDeviceEnvironment());
     EXPECT_EQ(maxKernelWorkGroupSize, maxGroupSizeProperties.maxGroupSize);
 }
 
diff --git a/level_zero/core/test/unit_tests/xe2_hpg_core/test_module_xe2_hpg_core.cpp b/level_zero/core/test/unit_tests/xe2_hpg_core/test_module_xe2_hpg_core.cpp
index fde48b7068..666edcd8d0 100644
--- a/level_zero/core/test/unit_tests/xe2_hpg_core/test_module_xe2_hpg_core.cpp
+++ b/level_zero/core/test/unit_tests/xe2_hpg_core/test_module_xe2_hpg_core.cpp
@@ -69,17 +69,13 @@ XE2_HPG_CORETEST_F(Xe2KernelSetupTests, givenParamsWhenSetupGroupSizeThenNumThre
         module.getMaxGroupSizeResult = UINT32_MAX;
         kernel.module = &module;
 
-        std::array<std::array<uint32_t, 3>, 4> values = {{
-            {16u, 1u, 64u}, // SIMT Size, HW local-id generation, Max Num of threads
-            {32u, 1u, 32u},
-            {16u, 0u, 64u},
-            {32u, 0u, 64u},
-
+        std::array<std::array<uint32_t, 2>, 2> values = {{
+            {16u, 64u}, // SIMT Size, Max Num of threads
+            {32u, 32u},
         }};
 
-        for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
+        for (auto &[simtSize, expectedNumThreadsPerThreadGroup] : values) {
             kernel.descriptor.kernelAttributes.simdSize = simtSize;
-            kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
             kernel.setGroupSize(1024u, 1024u, 1024u);
             EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
             kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
@@ -96,16 +92,13 @@ XE2_HPG_CORETEST_F(Xe2KernelSetupTests, givenParamsWhenSetupGroupSizeThenNumThre
         module.getMaxGroupSizeResult = UINT32_MAX;
         kernel.module = &module;
 
-        std::array<std::array<uint32_t, 3>, 4> values = {{
-            {16u, 0u, 32u}, // SIMT Size, HW local-id generation, Max Num of threads
-            {16u, 1u, 32u},
-            {32u, 0u, 32u},
-            {32u, 1u, 32u},
+        std::array<std::array<uint32_t, 2>, 2> values = {{
+            {16u, 32u}, // SIMT Size, Max Num of threads
+            {32u, 32u},
         }};
 
-        for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
+        for (auto &[simtSize, expectedNumThreadsPerThreadGroup] : values) {
             kernel.descriptor.kernelAttributes.simdSize = simtSize;
-            kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
             kernel.setGroupSize(1024u, 1024u, 1024u);
             EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
             kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
diff --git a/level_zero/core/test/unit_tests/xe3_core/test_module_xe3_core.cpp b/level_zero/core/test/unit_tests/xe3_core/test_module_xe3_core.cpp
index 9597220a04..9634849cd6 100644
--- a/level_zero/core/test/unit_tests/xe3_core/test_module_xe3_core.cpp
+++ b/level_zero/core/test/unit_tests/xe3_core/test_module_xe3_core.cpp
@@ -70,17 +70,14 @@ XE3_CORETEST_F(Xe3KernelSetupTests, givenParamsWhenSetupGroupSizeThenNumThreadsP
         module.getMaxGroupSizeResult = UINT32_MAX;
         kernel.module = &module;
 
-        std::array<std::array<uint32_t, 3>, 4> values = {{
-            {16u, 0u, 64u}, // SIMT Size, HW local-id generation, Max Num of threads
-            {16u, 1u, 64u},
-            {32u, 1u, 32u},
-            {32u, 0u, 64u},
+        std::array<std::array<uint32_t, 2>, 2> values = {{
+            {16u, 64u}, // SIMT Size, Max Num of threads
+            {32u, 32u},
 
         }};
 
-        for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
+        for (auto &[simtSize, expectedNumThreadsPerThreadGroup] : values) {
             kernel.descriptor.kernelAttributes.simdSize = simtSize;
-            kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
             kernel.setGroupSize(1024u, 1024u, 1024u);
             EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
             kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
@@ -97,16 +94,13 @@ XE3_CORETEST_F(Xe3KernelSetupTests, givenParamsWhenSetupGroupSizeThenNumThreadsP
         module.getMaxGroupSizeResult = UINT32_MAX;
         kernel.module = &module;
 
-        std::array<std::array<uint32_t, 3>, 4> values = {{
-            {16u, 0u, 48u}, // SIMT Size, HW local-id generation, Max Num of threads
-            {16u, 1u, 48u},
-            {32u, 1u, 32u},
-            {32u, 0u, 48u},
+        std::array<std::array<uint32_t, 2>, 2> values = {{
+            {16u, 48u}, // SIMT Size,  Max Num of threads
+            {32u, 32u},
         }};
 
-        for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
+        for (auto &[simtSize, expectedNumThreadsPerThreadGroup] : values) {
             kernel.descriptor.kernelAttributes.simdSize = simtSize;
-            kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
             kernel.setGroupSize(1024u, 1024u, 1024u);
             EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
             kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
@@ -123,16 +117,13 @@ XE3_CORETEST_F(Xe3KernelSetupTests, givenParamsWhenSetupGroupSizeThenNumThreadsP
         module.getMaxGroupSizeResult = UINT32_MAX;
         kernel.module = &module;
 
-        std::array<std::array<uint32_t, 3>, 4> values = {{
-            {16u, 0u, 40u}, // SIMT Size, HW local-id generation, Max Num of threads
-            {16u, 1u, 40u},
-            {32u, 1u, 32u},
-            {32u, 0u, 40u},
+        std::array<std::array<uint32_t, 2>, 2> values = {{
+            {16u, 40u}, // SIMT Size, Max Num of threads
+            {32u, 32u},
         }};
 
-        for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
+        for (auto &[simtSize, expectedNumThreadsPerThreadGroup] : values) {
             kernel.descriptor.kernelAttributes.simdSize = simtSize;
-            kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
             kernel.setGroupSize(1024u, 1024u, 1024u);
             EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
             kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
@@ -149,16 +140,13 @@ XE3_CORETEST_F(Xe3KernelSetupTests, givenParamsWhenSetupGroupSizeThenNumThreadsP
         module.getMaxGroupSizeResult = UINT32_MAX;
         kernel.module = &module;
 
-        std::array<std::array<uint32_t, 3>, 4> values = {{
-            {16u, 0u, 32u}, // SIMT Size, HW local-id generation, Max Num of threads
-            {16u, 1u, 32u},
-            {32u, 1u, 32u},
-            {32u, 0u, 32u},
+        std::array<std::array<uint32_t, 2>, 2> values = {{
+            {16u, 32u}, // SIMT Size, Max Num of threads
+            {32u, 32u},
         }};
 
-        for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
+        for (auto &[simtSize, expectedNumThreadsPerThreadGroup] : values) {
             kernel.descriptor.kernelAttributes.simdSize = simtSize;
-            kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
             kernel.setGroupSize(1024u, 1024u, 1024u);
             EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
             kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
@@ -175,16 +163,13 @@ XE3_CORETEST_F(Xe3KernelSetupTests, givenParamsWhenSetupGroupSizeThenNumThreadsP
         module.getMaxGroupSizeResult = UINT32_MAX;
         kernel.module = &module;
 
-        std::array<std::array<uint32_t, 3>, 4> values = {{
-            {16u, 0u, 16u}, // SIMT Size, HW local-id generation, Max Num of threads
-            {16u, 1u, 16u},
-            {32u, 1u, 16u},
-            {32u, 0u, 16u},
+        std::array<std::array<uint32_t, 2>, 2> values = {{
+            {16u, 16u}, // SIMT Size, Max Num of threads
+            {32u, 16u},
         }};
 
-        for (auto &[simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
+        for (auto &[simtSize, expectedNumThreadsPerThreadGroup] : values) {
             kernel.descriptor.kernelAttributes.simdSize = simtSize;
-            kernel.forceGenerateLocalIdByHw = isHwLocalIdGeneration;
             kernel.setGroupSize(1024u, 1024u, 1024u);
             EXPECT_EQ(expectedNumThreadsPerThreadGroup, kernel.numThreadsPerThreadGroup);
             kernel.groupSize[0] = kernel.groupSize[1] = kernel.groupSize[2] = 0;
diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl
index 79286c84fd..bf6c7d1a05 100644
--- a/opencl/source/helpers/hardware_commands_helper_base.inl
+++ b/opencl/source/helpers/hardware_commands_helper_base.inl
@@ -69,7 +69,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredIOH(const Kernel &kerne
         requiredWalkOrder,
         simdSize);
     auto size = kernel.getCrossThreadDataSize() +
-                HardwareCommandsHelper::getPerThreadDataSizeTotal(simdSize, grfSize, grfCount, numChannels, localWorkSize, isHwLocalIdGeneration, rootDeviceEnvironment);
+                HardwareCommandsHelper::getPerThreadDataSizeTotal(simdSize, grfSize, grfCount, numChannels, localWorkSize, rootDeviceEnvironment);
 
     auto pImplicitArgs = kernel.getImplicitArgs();
     if (pImplicitArgs) {
@@ -297,7 +297,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
     auto &gfxCoreHelper = device.getGfxCoreHelper();
     auto grfCount = kernel.getDescriptor().kernelAttributes.numGrfRequired;
     auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2];
-    auto threadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkItems), grfCount, !localIdsGenerationByRuntime, device.getRootDeviceEnvironment());
+    auto threadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkItems), grfCount, device.getRootDeviceEnvironment());
 
     uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize();
 
diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp
index 116616686e..020bbb4ba3 100644
--- a/opencl/source/kernel/kernel.cpp
+++ b/opencl/source/kernel/kernel.cpp
@@ -2235,10 +2235,9 @@ void Kernel::reconfigureKernel() {
     const auto &kernelDescriptor = kernelInfo.kernelDescriptor;
     const auto &gfxCoreHelper = this->getGfxCoreHelper();
     auto maxWorkGroupSize = gfxCoreHelper.calculateMaxWorkGroupSize(kernelDescriptor, this->maxKernelWorkGroupSize);
-    bool isLocalIdsGeneratedByHw = false; // if local ids generated by runtime then more work groups available
     maxWorkGroupSize = static_cast<uint32_t>(kernelInfo.getMaxRequiredWorkGroupSize(maxWorkGroupSize));
 
-    this->maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, isLocalIdsGeneratedByHw, maxWorkGroupSize, getDevice().getRootDeviceEnvironment());
+    this->maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, maxWorkGroupSize, getDevice().getRootDeviceEnvironment());
 
     this->containsStatelessWrites = kernelDescriptor.kernelAttributes.flags.usesStatelessWrites;
     this->systolicPipelineSelectMode = kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode;
diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp
index ff833eb283..2a9db40284 100644
--- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp
+++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp
@@ -1380,7 +1380,7 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
         auto numGrf = GrfConfig::defaultGrfNumber;
 
         auto size = kernelWithImplicitArgs.getCrossThreadDataSize() +
-                    HardwareCommandsHelper<FamilyType>::getPerThreadDataSizeTotal(simdSize, grfSize, numGrf, numChannels, Math::computeTotalElementsCount(workGroupSize), false, rootDeviceEnvironment) +
+                    HardwareCommandsHelper<FamilyType>::getPerThreadDataSizeTotal(simdSize, grfSize, numGrf, numChannels, Math::computeTotalElementsCount(workGroupSize), rootDeviceEnvironment) +
                     ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernelWithImplicitArgs.getImplicitArgs(), kernelWithImplicitArgs.getDescriptor(), false, rootDeviceEnvironment);
 
         size = alignUp(size, NEO::EncodeDispatchKernel<FamilyType>::getDefaultIOHAlignment());
diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp
index 0a75ce1bc3..b56325de7a 100644
--- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp
+++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp
@@ -569,7 +569,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, HardwareCommandsTest, whenSendingIndirectStateThe
     auto numChannels = modifiedKernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels;
     auto numGrf = GrfConfig::defaultGrfNumber;
     const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
-    size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(modifiedKernelInfo.getMaxSimdSize(), grfSize, numGrf, numChannels, localWorkSize, !kernelUsesLocalIds, rootDeviceEnvironment);
+    size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(modifiedKernelInfo.getMaxSimdSize(), grfSize, numGrf, numChannels, localWorkSize, rootDeviceEnvironment);
     ASSERT_LE(expectedIohSize, ioh.getUsed());
 
     auto expectedLocalIds = alignedMalloc(expectedIohSize, 64);
@@ -1295,7 +1295,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithI
     generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize, numGrf, rootDeviceEnvironment);
 
     auto localIdsProgrammingSize = implicitArgsProgrammingSize - ImplicitArgsV0::getAlignedSize();
-    size_t sizeForLocalIds = PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, numGrf, 3u, totalLocalSize, false, rootDeviceEnvironment);
+    size_t sizeForLocalIds = PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, numGrf, 3u, totalLocalSize, rootDeviceEnvironment);
 
     EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds));
     alignedFree(expectedLocalIds);
@@ -1330,7 +1330,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithI
     generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize, numGrf, rootDeviceEnvironment);
 
     auto localIdsProgrammingSize = implicitArgsProgrammingSize - ImplicitArgsV0::getAlignedSize();
-    size_t sizeForLocalIds = PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, numGrf, 3u, totalLocalSize, false, rootDeviceEnvironment);
+    size_t sizeForLocalIds = PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, numGrf, 3u, totalLocalSize, rootDeviceEnvironment);
 
     EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds));
     alignedFree(expectedLocalIds);
diff --git a/shared/source/helpers/aarch64/local_id_gen.cpp b/shared/source/helpers/aarch64/local_id_gen.cpp
index eee848a437..62ccfa8f06 100644
--- a/shared/source/helpers/aarch64/local_id_gen.cpp
+++ b/shared/source/helpers/aarch64/local_id_gen.cpp
@@ -44,9 +44,8 @@ LocalIDHelper::LocalIDHelper() {
 LocalIDHelper LocalIDHelper::initializer;
 
 void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment) {
-    bool localIdsGeneratedByHw = false;
     auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<NEO::GfxCoreHelper>();
-    auto threadsPerWorkGroup = static_cast<uint16_t>(gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2]), grfCount, localIdsGeneratedByHw, rootDeviceEnvironment));
+    auto threadsPerWorkGroup = static_cast<uint16_t>(gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2]), grfCount, rootDeviceEnvironment));
     bool useLayoutForImages = isImageOnlyKernel && isCompatibleWithLayoutForImages(localWorkgroupSize, dimensionsOrder, simd);
     if (useLayoutForImages) {
         generateLocalIDsWithLayoutForImages(buffer, localWorkgroupSize, simd);
diff --git a/shared/source/helpers/gfx_core_helper.h b/shared/source/helpers/gfx_core_helper.h
index 09cc32d5b8..193ae9bce2 100644
--- a/shared/source/helpers/gfx_core_helper.h
+++ b/shared/source/helpers/gfx_core_helper.h
@@ -120,7 +120,7 @@ class GfxCoreHelper {
     virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
     virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
                                              const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
-    virtual uint32_t adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, bool isHwLocalGeneration, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
+    virtual uint32_t adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
     virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0;
     virtual bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const = 0;
     virtual aub_stream::MMIOList getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const = 0;
@@ -165,7 +165,7 @@ class GfxCoreHelper {
     virtual bool isChipsetUniqueUUIDSupported() const = 0;
     virtual bool isTimestampShiftRequired() const = 0;
     virtual bool isRelaxedOrderingSupported() const = 0;
-    virtual uint32_t calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, bool isHwLocalIdGeneration, const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
+    virtual uint32_t calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
     virtual uint32_t overrideMaxWorkGroupSize(uint32_t maxWG) const = 0;
     virtual DeviceHierarchyMode getDefaultDeviceHierarchy() const = 0;
     static bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo, const ProductHelper &productHelper);
@@ -362,7 +362,7 @@ class GfxCoreHelperHw : public GfxCoreHelper {
     uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
                                      const RootDeviceEnvironment &rootDeviceEnvironment) const override;
 
-    uint32_t adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, bool isHwLocalGeneration, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const override;
+    uint32_t adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const override;
     size_t getMaxFillPaternSizeForCopyEngine() const override;
 
     bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const override;
@@ -414,7 +414,7 @@ class GfxCoreHelperHw : public GfxCoreHelper {
     bool isChipsetUniqueUUIDSupported() const override;
     bool isTimestampShiftRequired() const override;
     bool isRelaxedOrderingSupported() const override;
-    uint32_t calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, bool isHwLocalIdGeneration, const RootDeviceEnvironment &rootDeviceEnvironment) const override;
+    uint32_t calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment) const override;
     uint32_t overrideMaxWorkGroupSize(uint32_t maxWG) const override;
     DeviceHierarchyMode getDefaultDeviceHierarchy() const override;
 
diff --git a/shared/source/helpers/gfx_core_helper_base.inl b/shared/source/helpers/gfx_core_helper_base.inl
index 2fafe1c6be..5fb83ec222 100644
--- a/shared/source/helpers/gfx_core_helper_base.inl
+++ b/shared/source/helpers/gfx_core_helper_base.inl
@@ -684,7 +684,7 @@ uint32_t GfxCoreHelperHw<GfxFamily>::overrideMaxWorkGroupSize(uint32_t maxWG) co
 }
 
 template <typename GfxFamily>
-uint32_t GfxCoreHelperHw<GfxFamily>::adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, bool isHwLocalGeneration, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const {
+uint32_t GfxCoreHelperHw<GfxFamily>::adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const {
     return defaultMaxGroupSize;
 }
 
@@ -694,7 +694,7 @@ uint32_t GfxCoreHelperHw<GfxFamily>::getMinimalGrfSize() const {
 }
 
 template <typename GfxFamily>
-uint32_t GfxCoreHelperHw<GfxFamily>::calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, bool isHwLocalIdGeneration, const RootDeviceEnvironment &rootDeviceEnvironment) const {
+uint32_t GfxCoreHelperHw<GfxFamily>::calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment) const {
     return getThreadsPerWG(simd, totalWorkItems);
 }
 
diff --git a/shared/source/helpers/gfx_core_helper_xe3_and_later.inl b/shared/source/helpers/gfx_core_helper_xe3_and_later.inl
index d6f65ae0cf..348c8ce21a 100644
--- a/shared/source/helpers/gfx_core_helper_xe3_and_later.inl
+++ b/shared/source/helpers/gfx_core_helper_xe3_and_later.inl
@@ -25,4 +25,38 @@ uint32_t GfxCoreHelperHw<Family>::calculateAvailableThreadCount(const HardwareIn
     }
     return std::min(hwInfo.gtSystemInfo.ThreadCount, maxThreadsPerEuCount * hwInfo.gtSystemInfo.EUCount);
 }
+
+template <>
+uint32_t GfxCoreHelperHw<Family>::calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment) const {
+    uint32_t numThreadsPerThreadGroup = getThreadsPerWG(simd, totalWorkItems);
+    if (debugManager.flags.RemoveRestrictionsOnNumberOfThreadsInGpgpuThreadGroup.get() == 1) {
+        return numThreadsPerThreadGroup;
+    }
+
+    const auto &compilerProductHelper = rootDeviceEnvironment.getHelper<CompilerProductHelper>();
+    const auto &productHelper = rootDeviceEnvironment.getProductHelper();
+    const auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
+    auto isHeaplessMode = compilerProductHelper.isHeaplessModeEnabled(hwInfo);
+
+    uint32_t maxThreadsPerThreadGroup = 32u;
+    if (grfCount == 512) {
+        maxThreadsPerThreadGroup = 16u;
+    } else if ((grfCount == 256) || (simd == 32u)) {
+        // driver limit maxWorkgroupSize to 1024 (NEO-11881) so for simt 32 the max threads per thread group is 32
+        maxThreadsPerThreadGroup = 32u;
+    } else if (grfCount == 192) {
+        maxThreadsPerThreadGroup = 40u;
+    } else if (grfCount == 160) {
+        maxThreadsPerThreadGroup = 48u;
+    } else if (grfCount <= 128) {
+        maxThreadsPerThreadGroup = 64u;
+    }
+
+    maxThreadsPerThreadGroup = productHelper.adjustMaxThreadsPerThreadGroup(maxThreadsPerThreadGroup, simd, grfCount, isHeaplessMode);
+
+    numThreadsPerThreadGroup = std::min(numThreadsPerThreadGroup, maxThreadsPerThreadGroup);
+    DEBUG_BREAK_IF(numThreadsPerThreadGroup * simd > CommonConstants::maxWorkgroupSize);
+    return numThreadsPerThreadGroup;
+}
+
 } // namespace NEO
diff --git a/shared/source/helpers/per_thread_data.h b/shared/source/helpers/per_thread_data.h
index 90f1bb2d34..284524885d 100644
--- a/shared/source/helpers/per_thread_data.h
+++ b/shared/source/helpers/per_thread_data.h
@@ -24,14 +24,13 @@ struct PerThreadDataHelper {
         uint32_t grfCount,
         uint32_t numChannels,
         size_t localWorkSize,
-        bool isHwLocalIdGeneration,
         const RootDeviceEnvironment &rootDeviceEnvironment) {
         auto perThreadSizeLocalIDs = static_cast<size_t>(getPerThreadSizeLocalIDs(simd, grfSize, numChannels));
         if (isSimd1(simd)) {
             return perThreadSizeLocalIDs * localWorkSize;
         }
         auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<NEO::GfxCoreHelper>();
-        return perThreadSizeLocalIDs * gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkSize), grfCount, isHwLocalIdGeneration, rootDeviceEnvironment);
+        return perThreadSizeLocalIDs * gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkSize), grfCount, rootDeviceEnvironment);
     }
 }; // namespace PerThreadDataHelper
 } // namespace NEO
diff --git a/shared/source/helpers/x86_64/local_id_gen.cpp b/shared/source/helpers/x86_64/local_id_gen.cpp
index ab69181a41..8fe776440d 100644
--- a/shared/source/helpers/x86_64/local_id_gen.cpp
+++ b/shared/source/helpers/x86_64/local_id_gen.cpp
@@ -47,9 +47,8 @@ LocalIDHelper LocalIDHelper::initializer;
 
 // traditional function to generate local IDs
 void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment) {
-    bool localIdsGeneratedByHw = false;
     auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<NEO::GfxCoreHelper>();
-    auto threadsPerWorkGroup = static_cast<uint16_t>(gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2]), grfCount, localIdsGeneratedByHw, rootDeviceEnvironment));
+    auto threadsPerWorkGroup = static_cast<uint16_t>(gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2]), grfCount, rootDeviceEnvironment));
     bool useLayoutForImages = isImageOnlyKernel && isCompatibleWithLayoutForImages(localWorkgroupSize, dimensionsOrder, simd);
     if (useLayoutForImages) {
         generateLocalIDsWithLayoutForImages(buffer, localWorkgroupSize, simd);
diff --git a/shared/source/kernel/implicit_args_helper.cpp b/shared/source/kernel/implicit_args_helper.cpp
index a45be26cd9..b99245703b 100644
--- a/shared/source/kernel/implicit_args_helper.cpp
+++ b/shared/source/kernel/implicit_args_helper.cpp
@@ -73,7 +73,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const
         }
 
         auto itemsInGroup = Math::computeTotalElementsCount(localWorkSize);
-        localIdsSize = static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(simdSize, grfSize, grfCount, 3u, itemsInGroup, isHwLocalIdGeneration, rootDeviceEnvironment));
+        localIdsSize = static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(simdSize, grfSize, grfCount, 3u, itemsInGroup, rootDeviceEnvironment));
         localIdsSize = alignUp(localIdsSize, MemoryConstants::cacheLineSize);
     }
     return implicitArgsStructSize + localIdsSize;
diff --git a/shared/source/kernel/local_ids_cache.cpp b/shared/source/kernel/local_ids_cache.cpp
index 729e07d9d8..16c13a83f4 100644
--- a/shared/source/kernel/local_ids_cache.cpp
+++ b/shared/source/kernel/local_ids_cache.cpp
@@ -42,7 +42,7 @@ size_t LocalIdsCache::getLocalIdsSizeForGroup(const Vec3<uint16_t> &group, const
         return static_cast<size_t>(numElementsInGroup * localIdsSizePerThread);
     }
     auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<NEO::GfxCoreHelper>();
-    const auto numberOfThreads = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simdSize, numElementsInGroup, grfCount, false, rootDeviceEnvironment);
+    const auto numberOfThreads = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simdSize, numElementsInGroup, grfCount, rootDeviceEnvironment);
     return static_cast<size_t>(numberOfThreads * localIdsSizePerThread);
 }
 
diff --git a/shared/source/os_interface/product_helper.h b/shared/source/os_interface/product_helper.h
index 7b7a71a7d7..2dd644243a 100644
--- a/shared/source/os_interface/product_helper.h
+++ b/shared/source/os_interface/product_helper.h
@@ -257,7 +257,7 @@ class ProductHelper {
     virtual bool supports2DBlockStore() const = 0;
     virtual bool supports2DBlockLoad() const = 0;
     virtual uint32_t getNumCacheRegions() const = 0;
-    virtual uint32_t adjustMaxThreadsPerThreadGroup(uint32_t maxThreadsPerThreadGroup, uint32_t simt, uint32_t totalWorkItems, uint32_t grfCount, bool isHwLocalIdGeneration, bool isHeaplessModeEnabled) const = 0;
+    virtual uint32_t adjustMaxThreadsPerThreadGroup(uint32_t maxThreadsPerThreadGroup, uint32_t simt, uint32_t grfCount, bool isHeaplessModeEnabled) const = 0;
     virtual uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const = 0;
     virtual uint32_t getGmmResourceUsageOverride(uint32_t usageType) const = 0;
     virtual bool isSharingWith3dOrMediaAllowed() const = 0;
diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl
index 41715fb3a6..8470fe101e 100644
--- a/shared/source/os_interface/product_helper.inl
+++ b/shared/source/os_interface/product_helper.inl
@@ -972,7 +972,7 @@ bool ProductHelperHw<gfxProduct>::isL3FlushAfterPostSyncRequired(bool heaplessEn
 }
 
 template <PRODUCT_FAMILY gfxProduct>
-uint32_t ProductHelperHw<gfxProduct>::adjustMaxThreadsPerThreadGroup(uint32_t maxThreadsPerThreadGroup, uint32_t simt, uint32_t totalWorkItems, uint32_t grfCount, bool isHwLocalIdGeneration, bool isHeaplessModeEnabled) const {
+uint32_t ProductHelperHw<gfxProduct>::adjustMaxThreadsPerThreadGroup(uint32_t maxThreadsPerThreadGroup, uint32_t simt, uint32_t grfCount, bool isHeaplessModeEnabled) const {
     return maxThreadsPerThreadGroup;
 }
 
diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h
index cc9e5ffe4e..e115f354c6 100644
--- a/shared/source/os_interface/product_helper_hw.h
+++ b/shared/source/os_interface/product_helper_hw.h
@@ -194,7 +194,7 @@ class ProductHelperHw : public ProductHelper {
     bool supports2DBlockStore() const override;
     bool supports2DBlockLoad() const override;
     uint32_t getNumCacheRegions() const override;
-    uint32_t adjustMaxThreadsPerThreadGroup(uint32_t maxThreadsPerThreadGroup, uint32_t simt, uint32_t totalWorkItems, uint32_t grfCount, bool isHwLocalIdGeneration, bool isHeaplessModeEnabled) const override;
+    uint32_t adjustMaxThreadsPerThreadGroup(uint32_t maxThreadsPerThreadGroup, uint32_t simt, uint32_t grfCount, bool isHeaplessModeEnabled) const override;
     uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const override;
     uint32_t getGmmResourceUsageOverride(uint32_t usageType) const override;
     bool isSharingWith3dOrMediaAllowed() const override;
diff --git a/shared/source/xe2_hpg_core/gfx_core_helper_xe2_hpg_core.cpp b/shared/source/xe2_hpg_core/gfx_core_helper_xe2_hpg_core.cpp
index 31cf227b9f..9fbb820976 100644
--- a/shared/source/xe2_hpg_core/gfx_core_helper_xe2_hpg_core.cpp
+++ b/shared/source/xe2_hpg_core/gfx_core_helper_xe2_hpg_core.cpp
@@ -256,22 +256,25 @@ uint32_t GfxCoreHelperHw<Family>::overrideMaxWorkGroupSize(uint32_t maxWG) const
 }
 
 template <>
-uint32_t GfxCoreHelperHw<Family>::calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, bool isHwLocalIdGeneration, const RootDeviceEnvironment &rootDeviceEnvironment) const {
+uint32_t GfxCoreHelperHw<Family>::calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment) const {
     uint32_t numThreadsPerThreadGroup = getThreadsPerWG(simd, totalWorkItems);
     if (debugManager.flags.RemoveRestrictionsOnNumberOfThreadsInGpgpuThreadGroup.get() == 1) {
         return numThreadsPerThreadGroup;
     }
-    auto simt = isSimd1(simd) ? 32u : simd;
+
     uint32_t maxThreadsPerThreadGroup = 32u;
-    if (grfCount != GrfConfig::largeGrfNumber && ((simt == 16u) || (simt == 32u && !isHwLocalIdGeneration))) {
+    // driver limit maxWorkgroupSize to 1024 (NEO-11881) so for simt 32 the max threads per thread group is 32
+    if ((grfCount != GrfConfig::largeGrfNumber && (simd == 16u)) || isSimd1(simd)) {
         maxThreadsPerThreadGroup = 64u;
     }
-    return std::min(numThreadsPerThreadGroup, maxThreadsPerThreadGroup);
+    numThreadsPerThreadGroup = std::min(numThreadsPerThreadGroup, maxThreadsPerThreadGroup);
+    DEBUG_BREAK_IF(numThreadsPerThreadGroup * simd > CommonConstants::maxWorkgroupSize);
+    return numThreadsPerThreadGroup;
 }
 
 template <>
-uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, bool isHwLocalGeneration, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const {
-    const uint32_t threadsPerThreadGroup = calculateNumThreadsPerThreadGroup(simd, defaultMaxGroupSize, grfCount, isHwLocalGeneration, rootDeviceEnvironment);
+uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const {
+    const uint32_t threadsPerThreadGroup = calculateNumThreadsPerThreadGroup(simd, defaultMaxGroupSize, grfCount, rootDeviceEnvironment);
     return (threadsPerThreadGroup * simd);
 }
 
diff --git a/shared/source/xe3_core/gfx_core_helper_xe3_core.cpp b/shared/source/xe3_core/gfx_core_helper_xe3_core.cpp
index 57e7725e7f..0093ae6072 100644
--- a/shared/source/xe3_core/gfx_core_helper_xe3_core.cpp
+++ b/shared/source/xe3_core/gfx_core_helper_xe3_core.cpp
@@ -255,28 +255,8 @@ uint32_t GfxCoreHelperHw<Family>::overrideMaxWorkGroupSize(uint32_t maxWG) const
 }
 
 template <>
-uint32_t GfxCoreHelperHw<Family>::calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfCount, bool isHwLocalIdGeneration, const RootDeviceEnvironment &rootDeviceEnvironment) const {
-    uint32_t numThreadsPerThreadGroup = getThreadsPerWG(simd, totalWorkItems);
-    if (debugManager.flags.RemoveRestrictionsOnNumberOfThreadsInGpgpuThreadGroup.get() == 1) {
-        return numThreadsPerThreadGroup;
-    }
-    auto simt = isSimd1(simd) ? 32u : simd;
-    uint32_t maxThreadsPerThreadGroup = 32u;
-    if (grfCount == 512) {
-        maxThreadsPerThreadGroup = 16u;
-    } else if (grfCount == 192 && ((simt == 16u) || (simt == 32u && !isHwLocalIdGeneration))) {
-        maxThreadsPerThreadGroup = 40u;
-    } else if (grfCount == 160 && ((simt == 16u) || (simt == 32u && !isHwLocalIdGeneration))) {
-        maxThreadsPerThreadGroup = 48u;
-    } else if (grfCount <= 128 && ((simt == 16u) || (simt == 32u && !isHwLocalIdGeneration))) {
-        maxThreadsPerThreadGroup = 64u;
-    }
-    return std::min(numThreadsPerThreadGroup, maxThreadsPerThreadGroup);
-}
-
-template <>
-uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, bool isHwLocalGeneration, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const {
-    const uint32_t threadsPerThreadGroup = calculateNumThreadsPerThreadGroup(simd, defaultMaxGroupSize, grfCount, isHwLocalGeneration, rootDeviceEnvironment);
+uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupSize(const uint32_t grfCount, const uint32_t simd, const uint32_t defaultMaxGroupSize, const RootDeviceEnvironment &rootDeviceEnvironment) const {
+    const uint32_t threadsPerThreadGroup = calculateNumThreadsPerThreadGroup(simd, defaultMaxGroupSize, grfCount, rootDeviceEnvironment);
     return (threadsPerThreadGroup * simd);
 }
 } // namespace NEO
diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp
index b7edf62560..a28d2ea2bc 100644
--- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp
+++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp
@@ -1650,18 +1650,16 @@ HWTEST_F(GfxCoreHelperTest, givenNumGrfAndSimdSizeWhenAdjustingMaxWorkGroupSizeT
     constexpr auto defaultMaxGroupSize = 1024u;
 
     uint32_t simdSize = 16u;
-    uint32_t isHwLocalIdGeneration = true;
     uint32_t numGrfRequired = GrfConfig::largeGrfNumber;
-    EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, isHwLocalIdGeneration, defaultMaxGroupSize, rootDeviceEnvironment));
+    EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize, rootDeviceEnvironment));
 
     simdSize = 32u;
     numGrfRequired = GrfConfig::largeGrfNumber;
-    EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, isHwLocalIdGeneration, defaultMaxGroupSize, rootDeviceEnvironment));
+    EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize, rootDeviceEnvironment));
 
     simdSize = 16u;
-    isHwLocalIdGeneration = false;
     numGrfRequired = GrfConfig::defaultGrfNumber;
-    EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, isHwLocalIdGeneration, defaultMaxGroupSize, rootDeviceEnvironment));
+    EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize, rootDeviceEnvironment));
 }
 
 HWTEST2_F(GfxCoreHelperTest, givenParamsWhenCalculateNumThreadsPerThreadGroupThenMethodReturnProperValue, IsAtMostXeHpcCore) {
@@ -1679,7 +1677,7 @@ HWTEST2_F(GfxCoreHelperTest, givenParamsWhenCalculateNumThreadsPerThreadGroupThe
     }};
 
     for (auto &[simtSize, totalWgSize, expectedNumThreadsPerThreadGroup] : values) {
-        EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, 32u, true, rootDeviceEnvironment));
+        EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, 32u, rootDeviceEnvironment));
     }
 }
 
@@ -1689,19 +1687,19 @@ HWTEST_F(GfxCoreHelperTest, givenFlagRemoveRestrictionsOnNumberOfThreadsInGpgpuT
     const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
     const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
 
-    std::array<std::array<uint32_t, 5>, 8> values = {{
-        {32u, 32u, 128u, 1, 1u}, // SIMT Size, totalWorkItems, Max Num of threads, Grf size, Hw local id generation
-        {32u, 64u, 32u, 1, 2u},
-        {32u, 128u, 256u, 1, 4u},
-        {32u, 1024u, 128u, 1, 32u},
-        {16u, 32u, 32u, 0, 2u},
-        {16u, 64u, 256u, 0, 4u},
-        {16u, 128u, 128u, 0, 8u},
-        {16u, 1024u, 256u, 0, 64u},
+    std::array<std::array<uint32_t, 4>, 8> values = {{
+        {32u, 32u, 128u, 1u}, // SIMT Size, totalWorkItems,Grf size, Max Num of threads
+        {32u, 64u, 32u, 2u},
+        {32u, 128u, 256u, 4u},
+        {32u, 1024u, 128u, 32u},
+        {16u, 32u, 32u, 2u},
+        {16u, 64u, 256u, 4u},
+        {16u, 128u, 128u, 8u},
+        {16u, 1024u, 256u, 64u},
     }};
 
-    for (auto &[simtSize, totalWgSize, grfsize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
-        EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfsize, isHwLocalIdGeneration, rootDeviceEnvironment));
+    for (auto &[simtSize, totalWgSize, grfsize, expectedNumThreadsPerThreadGroup] : values) {
+        EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfsize, rootDeviceEnvironment));
     }
 }
 
diff --git a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp
index b343b08e70..a64f0aee59 100644
--- a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp
+++ b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp
@@ -81,7 +81,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP
 
     NEO::MockExecutionEnvironment mockExecutionEnvironment{};
     auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
-    auto localIdsSize = alignUp(PerThreadDataHelper::getPerThreadDataSizeTotal(implicitArgs.v0.simdWidth, 32u /* grfSize */, GrfConfig::defaultGrfNumber /* numGrf */, 3u /* num channels */, totalWorkgroupSize, false, rootDeviceEnvironment), MemoryConstants::cacheLineSize);
+    auto localIdsSize = alignUp(PerThreadDataHelper::getPerThreadDataSizeTotal(implicitArgs.v0.simdWidth, 32u /* grfSize */, GrfConfig::defaultGrfNumber /* numGrf */, 3u /* num channels */, totalWorkgroupSize, rootDeviceEnvironment), MemoryConstants::cacheLineSize);
     EXPECT_EQ(localIdsSize + ImplicitArgsV0::getAlignedSize(), ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, false, rootDeviceEnvironment));
 }
 
diff --git a/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp b/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp
index 2d1196e8c8..c4a83fba22 100644
--- a/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp
+++ b/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp
@@ -807,23 +807,17 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenNumGrfAndSimdSizeWhenAdjus
     auto defaultMaxWorkGroupSize = 2048u;
     const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
     const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
-    std::array<std::array<uint32_t, 4>, 12> values = {{
-        {GrfConfig::defaultGrfNumber, 16u, 0u, 1024u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
-        {GrfConfig::defaultGrfNumber, 16u, 1u, 1024u},
-        {GrfConfig::defaultGrfNumber, 32u, 1u, 1024u},
-        {GrfConfig::defaultGrfNumber, 32u, 0u, 2048u},
-        {GrfConfig::largeGrfNumber, 16u, 0u, 512u},
-        {GrfConfig::largeGrfNumber, 16u, 1u, 512u},
-        {GrfConfig::largeGrfNumber, 32u, 0u, 1024u},
-        {GrfConfig::largeGrfNumber, 32u, 1u, 1024u},
-        {GrfConfig::defaultGrfNumber, 1u, 1u, 32u},
-        {GrfConfig::defaultGrfNumber, 1u, 0u, 64u},
-        {GrfConfig::largeGrfNumber, 1u, 0u, 32u},
-        {GrfConfig::largeGrfNumber, 1u, 1u, 32u},
+    std::array<std::array<uint32_t, 3>, 6> values = {{
+        {GrfConfig::defaultGrfNumber, 16u, 1024u}, // Grf Size, SIMT Size, Max Num of threads
+        {GrfConfig::defaultGrfNumber, 32u, 1024u},
+        {GrfConfig::largeGrfNumber, 16u, 512u},
+        {GrfConfig::largeGrfNumber, 32u, 1024u},
+        {GrfConfig::defaultGrfNumber, 1u, 64u},
+        {GrfConfig::largeGrfNumber, 1u, 64u},
     }};
 
-    for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
-        EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, isHwLocalIdGeneration, defaultMaxWorkGroupSize, rootDeviceEnvironment));
+    for (auto &[grfSize, simtSize, expectedNumThreadsPerThreadGroup] : values) {
+        EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, defaultMaxWorkGroupSize, rootDeviceEnvironment));
     }
 }
 
@@ -831,23 +825,17 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenParamsWhenCalculateNumThre
     auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
     const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
     auto totalWgSize = 2048u;
-    std::array<std::array<uint32_t, 4>, 12> values = {{
-        {GrfConfig::defaultGrfNumber, 16u, 0u, 64u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
-        {GrfConfig::defaultGrfNumber, 16u, 1u, 64u},
-        {GrfConfig::defaultGrfNumber, 32u, 1u, 32u},
-        {GrfConfig::defaultGrfNumber, 32u, 0u, 64u},
-        {GrfConfig::defaultGrfNumber, 1u, 1u, 32u},
-        {GrfConfig::defaultGrfNumber, 1u, 0u, 64u},
-        {GrfConfig::largeGrfNumber, 16u, 0u, 32u},
-        {GrfConfig::largeGrfNumber, 16u, 1u, 32u},
-        {GrfConfig::largeGrfNumber, 32u, 0u, 32u},
-        {GrfConfig::largeGrfNumber, 32u, 1u, 32u},
-        {GrfConfig::largeGrfNumber, 1u, 0u, 32u},
-        {GrfConfig::largeGrfNumber, 1u, 1u, 32u},
+    std::array<std::array<uint32_t, 3>, 6> values = {{
+        {GrfConfig::defaultGrfNumber, 16u, 64u}, // Grf Size, SIMT Size, Max Num of threads
+        {GrfConfig::defaultGrfNumber, 32u, 32u},
+        {GrfConfig::defaultGrfNumber, 1u, 64u},
+        {GrfConfig::largeGrfNumber, 16u, 32u},
+        {GrfConfig::largeGrfNumber, 32u, 32u},
+        {GrfConfig::largeGrfNumber, 1u, 64u},
     }};
 
-    for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThdreadsPerThreadGroup] : values) {
-        EXPECT_EQ(expectedNumThdreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, isHwLocalIdGeneration, rootDeviceEnvironment));
+    for (auto &[grfSize, simtSize, expectedNumThdreadsPerThreadGroup] : values) {
+        EXPECT_EQ(expectedNumThdreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, rootDeviceEnvironment));
     }
 }
 
diff --git a/shared/test/unit_test/xe3_core/gfx_core_helper_xe3_core_tests.cpp b/shared/test/unit_test/xe3_core/gfx_core_helper_xe3_core_tests.cpp
index 610e7e239a..3f158368e2 100644
--- a/shared/test/unit_test/xe3_core/gfx_core_helper_xe3_core_tests.cpp
+++ b/shared/test/unit_test/xe3_core/gfx_core_helper_xe3_core_tests.cpp
@@ -754,41 +754,26 @@ XE3_CORETEST_F(GfxCoreHelperTestsXe3Core, givenNumGrfAndSimdSizeWhenAdjustingMax
     auto defaultMaxWorkGroupSize = 2048u;
     const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
     const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
-    std::array<std::array<uint32_t, 4>, 30> values = {{
-        {128u, 16u, 0u, 1024u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
-        {128u, 16u, 1u, 1024u},
-        {128u, 32u, 1u, 1024u},
-        {128u, 32u, 0u, 2048u},
-        {160u, 16u, 0u, 768u},
-        {160u, 16u, 1u, 768u},
-        {160u, 32u, 1u, 1024u},
-        {160u, 32u, 0u, 1536u},
-        {192u, 16u, 0u, 640u},
-        {192u, 16u, 1u, 640u},
-        {192u, 32u, 1u, 1024u},
-        {192u, 32u, 0u, 1280u},
-        {256u, 16u, 0u, 512u},
-        {256u, 16u, 1u, 512u},
-        {256u, 32u, 1u, 1024u},
-        {256u, 32u, 0u, 1024u},
-        {512u, 16u, 0u, 256u},
-        {512u, 16u, 1u, 256u},
-        {512u, 32u, 1u, 512u},
-        {512u, 32u, 0u, 512u},
-        {128u, 1u, 1u, 32u},
-        {128u, 1u, 0u, 64u},
-        {160u, 1u, 1u, 32u},
-        {160u, 1u, 0u, 48u},
-        {192u, 1u, 1u, 32u},
-        {192u, 1u, 0u, 40u},
-        {256u, 1u, 1u, 32u},
-        {256u, 1u, 0u, 32u},
-        {512u, 1u, 1u, 16u},
-        {512u, 1u, 0u, 16u},
+    std::array<std::array<uint32_t, 3>, 15> values = {{
+        {128u, 16u, 1024u}, // Grf Size, SIMT Size, Max Num of threads
+        {128u, 32u, 1024u},
+        {160u, 16u, 768u},
+        {160u, 32u, 1024u},
+        {192u, 16u, 640u},
+        {192u, 32u, 1024u},
+        {256u, 16u, 512u},
+        {256u, 32u, 1024u},
+        {512u, 16u, 256u},
+        {512u, 32u, 512u},
+        {128u, 1u, 64u},
+        {160u, 1u, 48u},
+        {192u, 1u, 40u},
+        {256u, 1u, 32u},
+        {512u, 1u, 16u},
     }};
 
-    for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
-        EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, isHwLocalIdGeneration, defaultMaxWorkGroupSize, rootDeviceEnvironment));
+    for (auto &[grfSize, simtSize, expectedNumThreadsPerThreadGroup] : values) {
+        EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.adjustMaxWorkGroupSize(grfSize, simtSize, defaultMaxWorkGroupSize, rootDeviceEnvironment));
     }
 }
 
@@ -801,41 +786,26 @@ XE3_CORETEST_F(GfxCoreHelperTestsXe3Core, givenParamsWhenCalculateNumThreadsPerT
     auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
     const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
     auto totalWgSize = 2048u;
-    std::array<std::array<uint32_t, 4>, 30> values = {{
-        {128u, 16u, 0u, 64u}, // Grf Size, SIMT Size, HW local-id generation, Max Num of threads
-        {128u, 16u, 1u, 64u},
-        {128u, 32u, 1u, 32u},
-        {128u, 32u, 0u, 64u},
-        {128u, 1u, 1u, 32u},
-        {128u, 1u, 0u, 64u},
-        {160u, 16u, 0u, 48u},
-        {160u, 16u, 1u, 48u},
-        {160u, 32u, 1u, 32u},
-        {160u, 32u, 0u, 48u},
-        {160u, 1u, 1u, 32u},
-        {160u, 1u, 0u, 48u},
-        {192u, 16u, 0u, 40u},
-        {192u, 16u, 1u, 40u},
-        {192u, 32u, 1u, 32u},
-        {192u, 32u, 0u, 40u},
-        {192u, 1u, 1u, 32u},
-        {192u, 1u, 0u, 40u},
-        {256u, 16u, 0u, 32u},
-        {256u, 16u, 1u, 32u},
-        {256u, 32u, 1u, 32u},
-        {256u, 32u, 0u, 32u},
-        {256u, 1u, 1u, 32u},
-        {256u, 1u, 0u, 32u},
-        {512u, 16u, 0u, 16u},
-        {512u, 16u, 1u, 16u},
-        {512u, 32u, 1u, 16u},
-        {512u, 32u, 0u, 16u},
-        {512u, 1u, 1u, 16u},
-        {512u, 1u, 0u, 16u},
+    std::array<std::array<uint32_t, 3>, 15> values = {{
+        {128u, 16u, 64u}, // Grf Size, SIMT Size, Max Num of threads
+        {128u, 32u, 32u},
+        {128u, 1u, 64u},
+        {160u, 16u, 48u},
+        {160u, 32u, 32u},
+        {160u, 1u, 48u},
+        {192u, 16u, 40u},
+        {192u, 32u, 32u},
+        {192u, 1u, 40u},
+        {256u, 16u, 32u},
+        {256u, 32u, 32u},
+        {256u, 1u, 32u},
+        {512u, 16u, 16u},
+        {512u, 32u, 16u},
+        {512u, 1u, 16u},
     }};
 
-    for (auto &[grfSize, simtSize, isHwLocalIdGeneration, expectedNumThreadsPerThreadGroup] : values) {
-        EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, isHwLocalIdGeneration, rootDeviceEnvironment));
+    for (auto &[grfSize, simtSize, expectedNumThreadsPerThreadGroup] : values) {
+        EXPECT_EQ(expectedNumThreadsPerThreadGroup, gfxCoreHelper.calculateNumThreadsPerThreadGroup(simtSize, totalWgSize, grfSize, rootDeviceEnvironment));
     }
 }