refactor: change encoder for thread group over dispatch 2/n

- bind algorithms to input arguments

Related-To: NEO-12639

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2024-10-25 16:31:41 +00:00 committed by Compute-Runtime-Automation
parent 7103af52a0
commit 046631767b
2 changed files with 15 additions and 19 deletions

View File

@ -1143,26 +1143,23 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescrip
threadsPerXeCore /= 2;
}
auto tgDispatchSizeSelected = 8;
uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup();
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) {
while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
} else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) {
while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
}
auto workgroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension();
// make sure we fit all xe core
while (workgroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) {
while (threadGroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) {
tgDispatchSizeSelected /= 2;
}
auto threadCountPerGrouping = tgDispatchSizeSelected * numberOfThreadsInThreadGroup;
auto threadCountPerGrouping = tgDispatchSizeSelected * threadsPerThreadGroup;
// make sure we do not use more threads then present on each xe core
while (threadCountPerGrouping > threadsPerXeCore && tgDispatchSizeSelected > 1) {
tgDispatchSizeSelected /= 2;
@ -1187,26 +1184,25 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescrip
uint32_t availableThreadCount = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
availableThreadCount *= tileCount;
uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup();
uint32_t dispatchedTotalThreadCount = numberOfThreadsInThreadGroup * threadGroupCount;
UNRECOVERABLE_IF(numberOfThreadsInThreadGroup == 0u);
uint32_t dispatchedTotalThreadCount = threadsPerThreadGroup * threadGroupCount;
UNRECOVERABLE_IF(threadsPerThreadGroup == 0u);
auto tgDispatchSizeSelected = 1u;
if (dispatchedTotalThreadCount <= availableThreadCount) {
tgDispatchSizeSelected = 1;
} else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize8) {
} else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize8) {
tgDispatchSizeSelected = 8;
} else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize4) {
} else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize4) {
tgDispatchSizeSelected = 4;
} else {
tgDispatchSizeSelected = 2;
}
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) {
while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
} else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) {
while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) {
tgDispatchSizeSelected /= 2;
}
}

View File

@ -33,7 +33,7 @@ void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptor
const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) {
const auto &productHelper = device.getProductHelper();
if (productHelper.isDisableOverdispatchAvailable(hwInfo)) {
if (interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup() == 1) {
if (threadsPerThreadGroup == 1) {
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(2u));
} else {
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(3u));