From ed897c302d929c6ead9c5d442884399be54db961 Mon Sep 17 00:00:00 2001 From: Michal Mrozek Date: Fri, 3 Nov 2023 11:56:01 +0000 Subject: [PATCH] performance: Implement V2 version of tg dispatch size algorithm. Signed-off-by: Michal Mrozek Related-To: NEO-6989 -Prevent imbalance in multi dimensional dispatches -Make sure to utilize as much Eus as possible -Prefer highest possible tg dspatch count possible -Make sure that xe_core doesn't have uneven workgroups --- .../debug_settings/debug_variables_base.inl | 1 + .../command_encoder_xe_hpc_core.cpp | 90 ++++++++--- shared/test/common/test_files/igdrcl.config | 1 + .../xe_hpc_core/test_encode_xe_hpc_core.cpp | 144 ++++++++++++++++++ 4 files changed, 215 insertions(+), 21 deletions(-) diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index b6a815c794..680db53585 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -234,6 +234,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceStatelessMocsEncryptionBit, -1, "-1: defaul DECLARE_DEBUG_VARIABLE(int32_t, CopyHostPtrOnCpu, -1, "-1: default, 0: disable, 1:enable, In clCreateBuffer with CL_MEM_COPY_HOST_PTR, copy memory using locked ptr on cpu") DECLARE_DEBUG_VARIABLE(int32_t, ForceZeDeviceCanAccessPerReturnValue, -1, "-1: default, 0: zeDeviceCanAccessPeer always return false 1: zeDeviceCanAccessPeer always return true") DECLARE_DEBUG_VARIABLE(int32_t, AdjustThreadGroupDispatchSize, -1, "-1: default, 0: do not adjust thread group dispatch size 1: adjust thread group dispatch size (PVC)") +DECLARE_DEBUG_VARIABLE(int32_t, ForceThreadGroupDispatchSizeAlgorithm, -1, "-1: default, 0: algorithm v1.0 1: algorithm v2.0") DECLARE_DEBUG_VARIABLE(int32_t, ForceNonblockingExecbufferCalls, -1, "-1: default, 0: make execbuffer call blocking, 1: make execbuffer call nonblocking. Supported only in prelim i915 kernels.") DECLARE_DEBUG_VARIABLE(int32_t, ForceComputeWalkerPostSyncFlush, -1, "-1: default, 0: disable 1: Enable all flushing bits in ComputeWalker->PostSync") DECLARE_DEBUG_VARIABLE(int32_t, NumberOfRegularContextsPerEngine, -1, "-1: default, >0: Create more than 1 Regular contexts for the same engine") diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index cc83220462..ec42ac44e9 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -41,31 +41,19 @@ void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCR if (DebugManager.flags.AdjustThreadGroupDispatchSize.get() != -1) { adjustTGDispatchSize = !!DebugManager.flags.AdjustThreadGroupDispatchSize.get(); } - if (adjustTGDispatchSize) { - UNRECOVERABLE_IF(numGrf == 0u); + auto algorithmVersion = 1u; + if (DebugManager.flags.ForceThreadGroupDispatchSizeAlgorithm.get() != -1) { + algorithmVersion = DebugManager.flags.ForceThreadGroupDispatchSizeAlgorithm.get(); + } - constexpr uint32_t maxThreadsInTGForTGDispatchSize8 = 16u; - constexpr uint32_t maxThreadsInTGForTGDispatchSize4 = 32u; - auto &gfxCoreHelper = device.getGfxCoreHelper(); - uint32_t availableThreadCount = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, numGrf); - if (ImplicitScalingHelper::isImplicitScalingEnabled(device.getDeviceBitfield(), true)) { - const uint32_t tilesCount = device.getNumSubDevices(); - availableThreadCount *= tilesCount; + if (algorithmVersion == 2) { + auto threadsPerXeCore = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.MaxSubSlicesSupported; + if (numGrf == 256) { + threadsPerXeCore /= 2; } + auto tgDispatchSizeSelected = 8; uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup(); - uint32_t dispatchedTotalThreadCount = numberOfThreadsInThreadGroup * threadGroupCount; - UNRECOVERABLE_IF(numberOfThreadsInThreadGroup == 0u); - auto tgDispatchSizeSelected = 1u; - if (dispatchedTotalThreadCount <= availableThreadCount) { - tgDispatchSizeSelected = 1; - } else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize8) { - tgDispatchSizeSelected = 8; - } else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize4) { - tgDispatchSizeSelected = 4; - } else { - tgDispatchSizeSelected = 2; - } if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) { while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) { tgDispatchSizeSelected /= 2; @@ -75,6 +63,22 @@ void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCR tgDispatchSizeSelected /= 2; } } + + auto workgroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); + auto tileCount = ImplicitScalingHelper::isImplicitScalingEnabled(device.getDeviceBitfield(), true) ? device.getNumSubDevices() : 1u; + + // make sure we fit all xe core + while (workgroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) { + tgDispatchSizeSelected /= 2; + } + + auto threadCountPerGrouping = tgDispatchSizeSelected * numberOfThreadsInThreadGroup; + // make sure we do not use more threads then present on each xe core + while (threadCountPerGrouping > threadsPerXeCore && tgDispatchSizeSelected > 1) { + tgDispatchSizeSelected /= 2; + threadCountPerGrouping /= 2; + } + if (tgDispatchSizeSelected == 8) { interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8); } else if (tgDispatchSizeSelected == 1) { @@ -84,6 +88,50 @@ void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCR } else { interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4); } + } else { + if (adjustTGDispatchSize) { + UNRECOVERABLE_IF(numGrf == 0u); + constexpr uint32_t maxThreadsInTGForTGDispatchSize8 = 16u; + constexpr uint32_t maxThreadsInTGForTGDispatchSize4 = 32u; + auto &gfxCoreHelper = device.getGfxCoreHelper(); + uint32_t availableThreadCount = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, numGrf); + if (ImplicitScalingHelper::isImplicitScalingEnabled(device.getDeviceBitfield(), true)) { + const uint32_t tilesCount = device.getNumSubDevices(); + availableThreadCount *= tilesCount; + } + uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup(); + uint32_t dispatchedTotalThreadCount = numberOfThreadsInThreadGroup * threadGroupCount; + UNRECOVERABLE_IF(numberOfThreadsInThreadGroup == 0u); + auto tgDispatchSizeSelected = 1u; + + if (dispatchedTotalThreadCount <= availableThreadCount) { + tgDispatchSizeSelected = 1; + } else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize8) { + tgDispatchSizeSelected = 8; + } else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize4) { + tgDispatchSizeSelected = 4; + } else { + tgDispatchSizeSelected = 2; + } + if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) { + while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) { + tgDispatchSizeSelected /= 2; + } + } else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) { + while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) { + tgDispatchSizeSelected /= 2; + } + } + if (tgDispatchSizeSelected == 8) { + interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8); + } else if (tgDispatchSizeSelected == 1) { + interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1); + } else if (tgDispatchSizeSelected == 2) { + interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2); + } else { + interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4); + } + } } } diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index fb2b00a5a8..0b3d19b73f 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -558,4 +558,5 @@ PrintGmmCompressionParams = 0 SkipInOrderNonWalkerSignalingAllowed = 0 PrintKernelDispatchParameters = 0 SetAmountOfReusableAllocationsPerCmdQueue = -1 +ForceThreadGroupDispatchSizeAlgorithm = -1 # Please don't edit below this line diff --git a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp index 2100d3297e..d73015a6d2 100644 --- a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp @@ -535,6 +535,8 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenMultipleTilesAndImplicitScalin } XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNumberOfThreadsInThreadGroupWhenCallingAdjustInterfaceDescriptorDataThenThreadGroupDispatchSizeIsCorrectlySet) { + DebugManagerStateRestore restorer; + DebugManager.flags.ForceThreadGroupDispatchSizeAlgorithm.set(1u); using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; WALKER_TYPE walkerCmd{}; @@ -559,6 +561,8 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNumberOfThreadsInThreadGroupWh } XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNumberOfThreadsInThreadGroupAndDimensionsWhenCallingAdjustInterfaceDescriptorDataThenThreadGroupDispatchSizeIsCorrectlySet) { + DebugManagerStateRestore restorer; + DebugManager.flags.ForceThreadGroupDispatchSizeAlgorithm.set(1u); using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; WALKER_TYPE walkerCmd{}; @@ -675,6 +679,146 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDifferentNumGrfWhenCallingAdju } } +XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenVariousDispatchParamtersWhenAlogrithmV2IsUsedThenProperThreadGroupDispatchSizeIsChoosen) { + DebugManagerStateRestore restorer; + DebugManager.flags.ForceThreadGroupDispatchSizeAlgorithm.set(2u); + using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + WALKER_TYPE walkerCmd{}; + const auto &productHelper = pDevice->getProductHelper(); + auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); + mutableHwInfo->gtSystemInfo.MaxSubSlicesSupported = 64u; + mutableHwInfo->gtSystemInfo.ThreadCount = 4096u; + auto hwInfo = pDevice->getHardwareInfo(); + + hwInfo.platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, hwInfo); + auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); + uint32_t numGrf = GrfConfig::DefaultGrfNumber; + const uint32_t threadGroupCount = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, numGrf); + + INTERFACE_DESCRIPTOR_DATA iddArg = FamilyType::cmdInitInterfaceDescriptorData; + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(1u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(1); + walkerCmd.setThreadGroupIdYDimension(1); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(1u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(256); + walkerCmd.setThreadGroupIdYDimension(1); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(64u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(64); + walkerCmd.setThreadGroupIdYDimension(1); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(1u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(512); + walkerCmd.setThreadGroupIdYDimension(1); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(32u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(512); + walkerCmd.setThreadGroupIdYDimension(1); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(8u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(512); + walkerCmd.setThreadGroupIdYDimension(1); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(32u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(510); + walkerCmd.setThreadGroupIdYDimension(512); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(32u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(509); + walkerCmd.setThreadGroupIdYDimension(512); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(32u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(508); + walkerCmd.setThreadGroupIdYDimension(512); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(16u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(508); + walkerCmd.setThreadGroupIdYDimension(512); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(16u); + numGrf = GrfConfig::LargeGrfNumber; + walkerCmd.setThreadGroupIdXDimension(508); + walkerCmd.setThreadGroupIdYDimension(512); + walkerCmd.setThreadGroupIdZDimension(1); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(32u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(1); + walkerCmd.setThreadGroupIdYDimension(510); + walkerCmd.setThreadGroupIdZDimension(512); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(32u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(1); + walkerCmd.setThreadGroupIdYDimension(509); + walkerCmd.setThreadGroupIdZDimension(512); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(16u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(1); + walkerCmd.setThreadGroupIdYDimension(508); + walkerCmd.setThreadGroupIdZDimension(512); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4, iddArg.getThreadGroupDispatchSize()); + + iddArg.setNumberOfThreadsInGpgpuThreadGroup(32u); + numGrf = GrfConfig::DefaultGrfNumber; + walkerCmd.setThreadGroupIdXDimension(1); + walkerCmd.setThreadGroupIdYDimension(508); + walkerCmd.setThreadGroupIdZDimension(512); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize()); +} + XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNumberOfThreadsInThreadGroupAndDebugFlagDisabledWhenCallingAdjustInterfaceDescriptorDataThenThreadGroupDispatchSizeIsDefault) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using WALKER_TYPE = typename FamilyType::WALKER_TYPE;