Revert "refactor: change encoder for thread group over dispatch 2/n"

This reverts commit 046631767b.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation 2024-10-29 01:25:40 +01:00 committed by Compute-Runtime-Automation
parent 7f811798c0
commit 022f9e642d
2 changed files with 19 additions and 15 deletions

View File

@ -1143,23 +1143,26 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescrip
threadsPerXeCore /= 2;
}
auto tgDispatchSizeSelected = 8;
uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup();
if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) {
while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) {
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
} else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) {
while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) {
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
}
auto workgroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension();
// make sure we fit all xe core
while (threadGroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) {
while (workgroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) {
tgDispatchSizeSelected /= 2;
}
auto threadCountPerGrouping = tgDispatchSizeSelected * threadsPerThreadGroup;
auto threadCountPerGrouping = tgDispatchSizeSelected * numberOfThreadsInThreadGroup;
// make sure we do not use more threads then present on each xe core
while (threadCountPerGrouping > threadsPerXeCore && tgDispatchSizeSelected > 1) {
tgDispatchSizeSelected /= 2;
@ -1184,25 +1187,26 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescrip
uint32_t availableThreadCount = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
availableThreadCount *= tileCount;
uint32_t dispatchedTotalThreadCount = threadsPerThreadGroup * threadGroupCount;
UNRECOVERABLE_IF(threadsPerThreadGroup == 0u);
uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup();
uint32_t dispatchedTotalThreadCount = numberOfThreadsInThreadGroup * threadGroupCount;
UNRECOVERABLE_IF(numberOfThreadsInThreadGroup == 0u);
auto tgDispatchSizeSelected = 1u;
if (dispatchedTotalThreadCount <= availableThreadCount) {
tgDispatchSizeSelected = 1;
} else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize8) {
} else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize8) {
tgDispatchSizeSelected = 8;
} else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize4) {
} else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize4) {
tgDispatchSizeSelected = 4;
} else {
tgDispatchSizeSelected = 2;
}
if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) {
while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) {
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
} else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) {
while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) {
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
}

View File

@ -33,7 +33,7 @@ void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptor
const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) {
const auto &productHelper = device.getProductHelper();
if (productHelper.isDisableOverdispatchAvailable(hwInfo)) {
if (threadsPerThreadGroup == 1) {
if (interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup() == 1) {
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(2u));
} else {
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(3u));