diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index ea528e59ae..b0059ff63b 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -36,7 +36,7 @@ void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCR if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) { interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1); - bool adjustTGDispatchSize = true; + bool adjustTGDispatchSize = hwInfo.gtSystemInfo.MaxDualSubSlicesSupported == hwInfo.gtSystemInfo.DualSubSliceCount; if (DebugManager.flags.AdjustThreadGroupDispatchSize.get() != -1) { adjustTGDispatchSize = !!DebugManager.flags.AdjustThreadGroupDispatchSize.get(); } @@ -49,7 +49,6 @@ void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCR uint32_t availableThreadCount = hwHelper.calculateAvailableThreadCount(hwInfo, numGrf); uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup(); uint32_t dispatchedTotalThreadCount = numberOfThreadsInThreadGroup * threadGroupCount; - UNRECOVERABLE_IF(numberOfThreadsInThreadGroup == 0u); if (dispatchedTotalThreadCount <= availableThreadCount) { diff --git a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp index 5c252b877d..4a2fae956c 100644 --- a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp @@ -510,6 +510,28 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenCleanHeapsAndSlmNotChangedAndU (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED))); } +XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenInterfaceDescriptorDataAndNonSymmetricalSkuWhenAdjustInterfaceDescriptorDataIsCalledThenThreadGroupDispatchSizeIsCorrectlySet) { + using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + + INTERFACE_DESCRIPTOR_DATA iddArg = FamilyType::cmdInitInterfaceDescriptorData; + const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); + auto hwInfo = pDevice->getHardwareInfo(); + hwInfo.gtSystemInfo.DualSubSliceCount = 32u; + ASSERT_NE(hwInfo.gtSystemInfo.DualSubSliceCount, hwInfo.gtSystemInfo.MaxDualSubSlicesSupported); + + for (const auto &revision : {REVISION_A0, REVISION_B}) { + hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(revision, hwInfo); + + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, hwInfo, 0, 0); + + if (hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) { + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); + } else { + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); + } + } +} + XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDispatchSizeSmallerOrEqualToAvailableThreadCountWhenAdjustInterfaceDescriptorDataIsCalledThenThreadGroupDispatchSizeIsCorrectlySet) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; @@ -548,7 +570,11 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNumberOfThreadsInThreadGroupWh EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, hwInfo, threadGroupCount, numGrf); - EXPECT_EQ(expectedThreadGroupDispatchSize, iddArg.getThreadGroupDispatchSize()); + if (hwInfo.gtSystemInfo.MaxDualSubSlicesSupported == hwInfo.gtSystemInfo.DualSubSliceCount) { + EXPECT_EQ(expectedThreadGroupDispatchSize, iddArg.getThreadGroupDispatchSize()); + } else { + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); + } } }