fix: Use proper value about hw local id generations

- remove useless flag ForceNumberOfThreadsInGpgpuThreadGroup
- add new flag "RemoveRestrictionsOnNumberOfThreadsInGpgpuThreadGroup"
to restore old path without restrictions about number of threads in
thread group
- fix forwarding information about hw local ids generations to
calculate numOfThreadsInThreadGroup correctly

Related-To: NEO-7952, NEO-7982
Signed-off-by: Cencelewska, Katarzyna <katarzyna.cencelewska@intel.com>
This commit is contained in:
Cencelewska, Katarzyna
2023-06-23 14:26:00 +00:00
committed by Compute-Runtime-Automation
parent aea5f435db
commit 68d81c82a7
9 changed files with 31 additions and 19 deletions

View File

@@ -243,7 +243,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
auto &gfxCoreHelper = device.getGfxCoreHelper();
auto grfSize = kernel.getDescriptor().kernelAttributes.numGrfRequired;
auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2];
auto threadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkItems), grfSize, localIdsGenerationByRuntime);
auto threadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkItems), grfSize, !localIdsGenerationByRuntime);
uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize();