diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 073068d219..bb5344455f 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -1143,26 +1143,23 @@ void EncodeDispatchKernel::encodeThreadGroupDispatch(InterfaceDescrip threadsPerXeCore /= 2; } auto tgDispatchSizeSelected = 8; - uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup(); - if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) { - while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) { + if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) { + while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) { tgDispatchSizeSelected /= 2; } - } else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) { - while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) { + } else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) { + while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) { tgDispatchSizeSelected /= 2; } } - auto workgroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension(); - // make sure we fit all xe core - while (workgroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) { + while (threadGroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) { tgDispatchSizeSelected /= 2; } - auto threadCountPerGrouping = tgDispatchSizeSelected * numberOfThreadsInThreadGroup; + auto threadCountPerGrouping = tgDispatchSizeSelected * threadsPerThreadGroup; // make sure we do not use more threads then present on each xe core while (threadCountPerGrouping > threadsPerXeCore && tgDispatchSizeSelected > 1) { tgDispatchSizeSelected /= 2; @@ -1187,26 +1184,25 @@ void EncodeDispatchKernel::encodeThreadGroupDispatch(InterfaceDescrip uint32_t availableThreadCount = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount); availableThreadCount *= tileCount; - uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup(); - uint32_t dispatchedTotalThreadCount = numberOfThreadsInThreadGroup * threadGroupCount; - UNRECOVERABLE_IF(numberOfThreadsInThreadGroup == 0u); + uint32_t dispatchedTotalThreadCount = threadsPerThreadGroup * threadGroupCount; + UNRECOVERABLE_IF(threadsPerThreadGroup == 0u); auto tgDispatchSizeSelected = 1u; if (dispatchedTotalThreadCount <= availableThreadCount) { tgDispatchSizeSelected = 1; - } else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize8) { + } else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize8) { tgDispatchSizeSelected = 8; - } else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize4) { + } else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize4) { tgDispatchSizeSelected = 4; } else { tgDispatchSizeSelected = 2; } - if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) { - while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) { + if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) { + while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) { tgDispatchSizeSelected /= 2; } - } else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) { - while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) { + } else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) { + while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) { tgDispatchSizeSelected /= 2; } } diff --git a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp index fc9ec22531..6881eec74f 100644 --- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp @@ -33,7 +33,7 @@ void EncodeDispatchKernel::encodeThreadGroupDispatch(InterfaceDescriptor const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) { const auto &productHelper = device.getProductHelper(); if (productHelper.isDisableOverdispatchAvailable(hwInfo)) { - if (interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup() == 1) { + if (threadsPerThreadGroup == 1) { interfaceDescriptor.setThreadGroupDispatchSize(static_cast(2u)); } else { interfaceDescriptor.setThreadGroupDispatchSize(static_cast(3u));