2017-12-21 00:45:38 +01:00
|
|
|
/*
|
2024-03-21 18:54:41 +00:00
|
|
|
* Copyright (C) 2018-2024 Intel Corporation
|
2017-12-21 00:45:38 +01:00
|
|
|
*
|
2018-09-18 09:11:08 +02:00
|
|
|
* SPDX-License-Identifier: MIT
|
2017-12-21 00:45:38 +01:00
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
2023-09-26 14:07:13 +00:00
|
|
|
#include "shared/source/helpers/simd_helper.h"
|
|
|
|
|
|
2018-08-06 11:35:59 +02:00
|
|
|
#include <array>
|
2023-01-13 15:18:40 +00:00
|
|
|
#include <cstddef>
|
2018-08-06 11:35:59 +02:00
|
|
|
#include <cstdint>
|
|
|
|
|
|
2019-03-26 11:59:46 +01:00
|
|
|
namespace NEO {
|
2024-03-21 18:54:41 +00:00
|
|
|
struct RootDeviceEnvironment;
|
2023-09-26 14:07:13 +00:00
|
|
|
inline uint32_t getNumGrfsPerLocalIdCoordinate(uint32_t simd, uint32_t grfSize) {
|
2020-02-24 13:21:37 +01:00
|
|
|
return (simd == 32 && grfSize == 32) ? 2 : 1;
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
|
2023-06-12 11:41:13 +00:00
|
|
|
inline uint32_t getThreadsPerWG(uint32_t simd, uint32_t lws) {
|
2023-09-26 14:07:13 +00:00
|
|
|
if (isSimd1(simd)) {
|
|
|
|
|
return lws;
|
|
|
|
|
}
|
2017-12-21 00:45:38 +01:00
|
|
|
auto result = lws + simd - 1;
|
|
|
|
|
|
|
|
|
|
// Original logic:
|
|
|
|
|
// result = (lws + simd - 1) / simd;
|
|
|
|
|
// This sequence is meant to avoid an CPU DIV instruction.
|
|
|
|
|
result >>= simd == 32
|
|
|
|
|
? 5
|
2021-03-15 11:39:58 +00:00
|
|
|
: simd == 16
|
|
|
|
|
? 4
|
2023-09-26 14:07:13 +00:00
|
|
|
: 3; // for SIMD 8
|
2017-12-21 00:45:38 +01:00
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-17 08:55:09 +01:00
|
|
|
inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32_t numChannels = 3) {
|
2023-09-26 14:07:13 +00:00
|
|
|
if (isSimd1(simd)) {
|
|
|
|
|
return grfSize;
|
|
|
|
|
}
|
|
|
|
|
auto numGRFSPerLocalIdCoord = getNumGrfsPerLocalIdCoordinate(simd, grfSize);
|
|
|
|
|
uint32_t returnSize = numGRFSPerLocalIdCoord * grfSize * numChannels;
|
2018-04-03 17:06:44 +02:00
|
|
|
return returnSize;
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct LocalIDHelper {
|
2020-02-04 17:58:41 +01:00
|
|
|
static void (*generateSimd8)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
|
|
|
|
static void (*generateSimd16)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
|
|
|
|
static void (*generateSimd32)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
2017-12-21 00:45:38 +01:00
|
|
|
|
|
|
|
|
static LocalIDHelper initializer;
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
LocalIDHelper();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
extern const uint16_t initialLocalID[];
|
|
|
|
|
|
|
|
|
|
template <typename Vec, int simd>
|
2018-08-06 11:35:59 +02:00
|
|
|
void generateLocalIDsSimd(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup,
|
2020-02-04 17:58:41 +01:00
|
|
|
const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
2018-08-06 11:35:59 +02:00
|
|
|
|
|
|
|
|
void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize,
|
2024-03-22 09:39:15 +00:00
|
|
|
const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment);
|
2018-08-20 11:23:20 +02:00
|
|
|
void generateLocalIDsWithLayoutForImages(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t simd);
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2018-08-20 11:23:20 +02:00
|
|
|
bool isCompatibleWithLayoutForImages(const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, uint16_t simd);
|
2019-10-23 09:36:37 +02:00
|
|
|
|
|
|
|
|
void generateLocalIDsForSimdOne(void *b, const std::array<uint16_t, 3> &localWorkgroupSize,
|
2019-12-17 08:55:09 +01:00
|
|
|
const std::array<uint8_t, 3> &dimensionsOrder, uint32_t grfSize);
|
2020-02-04 17:58:41 +01:00
|
|
|
} // namespace NEO
|