fix: Correct logic for SIMD1

- For calculating number of threads per workgroup, treat simd 1 as it
  was simd 32
- Correct logic of calculating space for per thread data for simd 1
- Minor: unit tests refactor
- Corrected naming
Related-To: NEO-8261
Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
This commit is contained in:
Kacper Nowak
2023-09-01 19:20:39 +02:00
committed by Compute-Runtime-Automation
parent b053e9348e
commit fc099ead2e
4 changed files with 68 additions and 17 deletions

View File

@@ -7,17 +7,21 @@
#pragma once
#include "shared/source/helpers/simd_helper.h"
#include <array>
#include <cstddef>
#include <cstdint>
namespace NEO {
class GfxCoreHelper;
inline uint32_t getGRFsPerThread(uint32_t simd, uint32_t grfSize) {
inline uint32_t getNumGrfsPerLocalIdCoordinate(uint32_t simd, uint32_t grfSize) {
return (simd == 32 && grfSize == 32) ? 2 : 1;
}
inline uint32_t getThreadsPerWG(uint32_t simd, uint32_t lws) {
if (isSimd1(simd))
simd = 32;
auto result = lws + simd - 1;
// Original logic:
@@ -27,17 +31,17 @@ inline uint32_t getThreadsPerWG(uint32_t simd, uint32_t lws) {
? 5
: simd == 16
? 4
: simd == 8
? 3
: 0;
: 3; // for SIMD 8
return result;
}
inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32_t numChannels = 3) {
auto numGRFSPerThread = getGRFsPerThread(simd, grfSize);
uint32_t returnSize = numGRFSPerThread * grfSize * (simd == 1 ? 1u : numChannels);
returnSize = std::max(returnSize, grfSize);
if (isSimd1(simd)) {
return grfSize;
}
auto numGRFSPerLocalIdCoord = getNumGrfsPerLocalIdCoordinate(simd, grfSize);
uint32_t returnSize = numGRFSPerLocalIdCoord * grfSize * numChannels;
return returnSize;
}

View File

@@ -8,6 +8,7 @@
#pragma once
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/local_id_gen.h"
#include "shared/source/helpers/simd_helper.h"
#include <cstddef>
#include <cstdint>
@@ -23,7 +24,11 @@ struct PerThreadDataHelper {
size_t localWorkSize,
bool isHwLocalIdGeneration,
const GfxCoreHelper &gfxCoreHelper) {
return gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkSize), grfSize, isHwLocalIdGeneration) * getPerThreadSizeLocalIDs(simd, grfSize, numChannels);
auto perThreadSizeLocalIDs = static_cast<size_t>(getPerThreadSizeLocalIDs(simd, grfSize, numChannels));
if (isSimd1(simd)) {
return perThreadSizeLocalIDs * localWorkSize;
}
return perThreadSizeLocalIDs * gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkSize), grfSize, isHwLocalIdGeneration);
}
}; // namespace PerThreadDataHelper
} // namespace NEO