mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
fix: Use proper value about hw local id generations
- remove useless flag ForceNumberOfThreadsInGpgpuThreadGroup - add new flag "RemoveRestrictionsOnNumberOfThreadsInGpgpuThreadGroup" to restore old path without restrictions about number of threads in thread group - fix forwarding information about hw local ids generations to calculate numOfThreadsInThreadGroup correctly Related-To: NEO-7952, NEO-7982 Signed-off-by: Cencelewska, Katarzyna <katarzyna.cencelewska@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
aea5f435db
commit
68d81c82a7
@@ -243,7 +243,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
||||
auto grfSize = kernel.getDescriptor().kernelAttributes.numGrfRequired;
|
||||
auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2];
|
||||
auto threadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkItems), grfSize, localIdsGenerationByRuntime);
|
||||
auto threadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkItems), grfSize, !localIdsGenerationByRuntime);
|
||||
|
||||
uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user