2017-12-21 07:45:38 +08:00
|
|
|
/*
|
2024-03-22 02:54:41 +08:00
|
|
|
* Copyright (C) 2018-2024 Intel Corporation
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
2018-09-18 15:11:08 +08:00
|
|
|
* SPDX-License-Identifier: MIT
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2023-09-26 22:07:13 +08:00
|
|
|
#include "shared/source/helpers/simd_helper.h"
|
|
|
|
|
2018-08-06 17:35:59 +08:00
|
|
|
#include <array>
|
2023-01-13 23:18:40 +08:00
|
|
|
#include <cstddef>
|
2018-08-06 17:35:59 +08:00
|
|
|
#include <cstdint>
|
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
namespace NEO {
|
2024-03-22 02:54:41 +08:00
|
|
|
struct RootDeviceEnvironment;
|
2023-09-26 22:07:13 +08:00
|
|
|
inline uint32_t getNumGrfsPerLocalIdCoordinate(uint32_t simd, uint32_t grfSize) {
|
2020-02-24 20:21:37 +08:00
|
|
|
return (simd == 32 && grfSize == 32) ? 2 : 1;
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2023-06-12 19:41:13 +08:00
|
|
|
inline uint32_t getThreadsPerWG(uint32_t simd, uint32_t lws) {
|
2023-09-26 22:07:13 +08:00
|
|
|
if (isSimd1(simd)) {
|
|
|
|
return lws;
|
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
auto result = lws + simd - 1;
|
|
|
|
|
|
|
|
// Original logic:
|
|
|
|
// result = (lws + simd - 1) / simd;
|
|
|
|
// This sequence is meant to avoid an CPU DIV instruction.
|
|
|
|
result >>= simd == 32
|
|
|
|
? 5
|
2021-03-15 19:39:58 +08:00
|
|
|
: simd == 16
|
|
|
|
? 4
|
2023-09-26 22:07:13 +08:00
|
|
|
: 3; // for SIMD 8
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2019-12-17 15:55:09 +08:00
|
|
|
inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32_t numChannels = 3) {
|
2023-09-26 22:07:13 +08:00
|
|
|
if (isSimd1(simd)) {
|
|
|
|
return grfSize;
|
|
|
|
}
|
|
|
|
auto numGRFSPerLocalIdCoord = getNumGrfsPerLocalIdCoordinate(simd, grfSize);
|
|
|
|
uint32_t returnSize = numGRFSPerLocalIdCoord * grfSize * numChannels;
|
2018-04-03 23:06:44 +08:00
|
|
|
return returnSize;
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
struct LocalIDHelper {
|
2020-02-05 00:58:41 +08:00
|
|
|
static void (*generateSimd8)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
|
|
|
static void (*generateSimd16)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
|
|
|
static void (*generateSimd32)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
static LocalIDHelper initializer;
|
|
|
|
|
|
|
|
private:
|
|
|
|
LocalIDHelper();
|
|
|
|
};
|
|
|
|
|
|
|
|
extern const uint16_t initialLocalID[];
|
|
|
|
|
|
|
|
template <typename Vec, int simd>
|
2018-08-06 17:35:59 +08:00
|
|
|
void generateLocalIDsSimd(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup,
|
2020-02-05 00:58:41 +08:00
|
|
|
const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
2018-08-06 17:35:59 +08:00
|
|
|
|
|
|
|
void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize,
|
2024-03-22 17:39:15 +08:00
|
|
|
const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment);
|
2018-08-20 17:23:20 +08:00
|
|
|
void generateLocalIDsWithLayoutForImages(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t simd);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2018-08-20 17:23:20 +08:00
|
|
|
bool isCompatibleWithLayoutForImages(const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, uint16_t simd);
|
2019-10-23 15:36:37 +08:00
|
|
|
|
|
|
|
void generateLocalIDsForSimdOne(void *b, const std::array<uint16_t, 3> &localWorkgroupSize,
|
2019-12-17 15:55:09 +08:00
|
|
|
const std::array<uint8_t, 3> &dimensionsOrder, uint32_t grfSize);
|
2020-02-05 00:58:41 +08:00
|
|
|
} // namespace NEO
|