refactor: change encoder for thread group over dispatch 2/n
- bind algorithms to input arguments Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
7103af52a0
commit
046631767b
|
@ -1143,26 +1143,23 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescrip
|
|||
threadsPerXeCore /= 2;
|
||||
}
|
||||
auto tgDispatchSizeSelected = 8;
|
||||
uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup();
|
||||
|
||||
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
|
||||
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
|
||||
if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) {
|
||||
while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) {
|
||||
tgDispatchSizeSelected /= 2;
|
||||
}
|
||||
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
|
||||
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
|
||||
} else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) {
|
||||
while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) {
|
||||
tgDispatchSizeSelected /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
auto workgroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension();
|
||||
|
||||
// make sure we fit all xe core
|
||||
while (workgroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) {
|
||||
while (threadGroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) {
|
||||
tgDispatchSizeSelected /= 2;
|
||||
}
|
||||
|
||||
auto threadCountPerGrouping = tgDispatchSizeSelected * numberOfThreadsInThreadGroup;
|
||||
auto threadCountPerGrouping = tgDispatchSizeSelected * threadsPerThreadGroup;
|
||||
// make sure we do not use more threads then present on each xe core
|
||||
while (threadCountPerGrouping > threadsPerXeCore && tgDispatchSizeSelected > 1) {
|
||||
tgDispatchSizeSelected /= 2;
|
||||
|
@ -1187,26 +1184,25 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescrip
|
|||
uint32_t availableThreadCount = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
|
||||
availableThreadCount *= tileCount;
|
||||
|
||||
uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup();
|
||||
uint32_t dispatchedTotalThreadCount = numberOfThreadsInThreadGroup * threadGroupCount;
|
||||
UNRECOVERABLE_IF(numberOfThreadsInThreadGroup == 0u);
|
||||
uint32_t dispatchedTotalThreadCount = threadsPerThreadGroup * threadGroupCount;
|
||||
UNRECOVERABLE_IF(threadsPerThreadGroup == 0u);
|
||||
auto tgDispatchSizeSelected = 1u;
|
||||
|
||||
if (dispatchedTotalThreadCount <= availableThreadCount) {
|
||||
tgDispatchSizeSelected = 1;
|
||||
} else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize8) {
|
||||
} else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize8) {
|
||||
tgDispatchSizeSelected = 8;
|
||||
} else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize4) {
|
||||
} else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize4) {
|
||||
tgDispatchSizeSelected = 4;
|
||||
} else {
|
||||
tgDispatchSizeSelected = 2;
|
||||
}
|
||||
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
|
||||
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
|
||||
if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) {
|
||||
while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) {
|
||||
tgDispatchSizeSelected /= 2;
|
||||
}
|
||||
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
|
||||
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
|
||||
} else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) {
|
||||
while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) {
|
||||
tgDispatchSizeSelected /= 2;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptor
|
|||
const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) {
|
||||
const auto &productHelper = device.getProductHelper();
|
||||
if (productHelper.isDisableOverdispatchAvailable(hwInfo)) {
|
||||
if (interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup() == 1) {
|
||||
if (threadsPerThreadGroup == 1) {
|
||||
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(2u));
|
||||
} else {
|
||||
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(3u));
|
||||
|
|
Loading…
Reference in New Issue