mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-23 11:03:02 +08:00
fix: Correct logic for SIMD1
- For calculating number of threads per workgroup, treat simd 1 as it was simd 32 - Correct logic of calculating space for per thread data for simd 1 - Minor: unit tests refactor - Corrected naming Related-To: NEO-8261 Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b053e9348e
commit
fc099ead2e
@@ -7,17 +7,21 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/helpers/simd_helper.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
class GfxCoreHelper;
|
||||
inline uint32_t getGRFsPerThread(uint32_t simd, uint32_t grfSize) {
|
||||
inline uint32_t getNumGrfsPerLocalIdCoordinate(uint32_t simd, uint32_t grfSize) {
|
||||
return (simd == 32 && grfSize == 32) ? 2 : 1;
|
||||
}
|
||||
|
||||
inline uint32_t getThreadsPerWG(uint32_t simd, uint32_t lws) {
|
||||
if (isSimd1(simd))
|
||||
simd = 32;
|
||||
auto result = lws + simd - 1;
|
||||
|
||||
// Original logic:
|
||||
@@ -27,17 +31,17 @@ inline uint32_t getThreadsPerWG(uint32_t simd, uint32_t lws) {
|
||||
? 5
|
||||
: simd == 16
|
||||
? 4
|
||||
: simd == 8
|
||||
? 3
|
||||
: 0;
|
||||
: 3; // for SIMD 8
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32_t numChannels = 3) {
|
||||
auto numGRFSPerThread = getGRFsPerThread(simd, grfSize);
|
||||
uint32_t returnSize = numGRFSPerThread * grfSize * (simd == 1 ? 1u : numChannels);
|
||||
returnSize = std::max(returnSize, grfSize);
|
||||
if (isSimd1(simd)) {
|
||||
return grfSize;
|
||||
}
|
||||
auto numGRFSPerLocalIdCoord = getNumGrfsPerLocalIdCoordinate(simd, grfSize);
|
||||
uint32_t returnSize = numGRFSPerLocalIdCoord * grfSize * numChannels;
|
||||
return returnSize;
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#pragma once
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/local_id_gen.h"
|
||||
#include "shared/source/helpers/simd_helper.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
@@ -23,7 +24,11 @@ struct PerThreadDataHelper {
|
||||
size_t localWorkSize,
|
||||
bool isHwLocalIdGeneration,
|
||||
const GfxCoreHelper &gfxCoreHelper) {
|
||||
return gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkSize), grfSize, isHwLocalIdGeneration) * getPerThreadSizeLocalIDs(simd, grfSize, numChannels);
|
||||
auto perThreadSizeLocalIDs = static_cast<size_t>(getPerThreadSizeLocalIDs(simd, grfSize, numChannels));
|
||||
if (isSimd1(simd)) {
|
||||
return perThreadSizeLocalIDs * localWorkSize;
|
||||
}
|
||||
return perThreadSizeLocalIDs * gfxCoreHelper.calculateNumThreadsPerThreadGroup(simd, static_cast<uint32_t>(localWorkSize), grfSize, isHwLocalIdGeneration);
|
||||
}
|
||||
}; // namespace PerThreadDataHelper
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user