/* * Copyright (C) 2017-2018 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "runtime/helpers/ptr_math.h" #include #include #include namespace OCLRT { union GRF { float fRegs[8]; uint32_t dwRegs[8]; uint16_t wRegs[16]; }; inline size_t getGRFsPerThread(uint32_t simd) { return simd == 32 ? 2 : 1; } inline size_t getThreadsPerWG(uint32_t simd, size_t lws) { auto result = lws + simd - 1; // Original logic: // result = (lws + simd - 1) / simd; // This sequence is meant to avoid an CPU DIV instruction. result >>= simd == 32 ? 5 : simd == 16 ? 4 : 3; return result; } inline size_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t numChannels = 3) { auto numGRFSPerThread = getGRFsPerThread(simd); auto returnSize = numChannels * numGRFSPerThread * sizeof(GRF); returnSize = std::max(returnSize, sizeof(GRF)); return returnSize; } struct LocalIDHelper { static void (*generateSimd8)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); static void (*generateSimd16)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); static void (*generateSimd32)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); static LocalIDHelper initializer; private: LocalIDHelper(); }; extern const uint16_t initialLocalID[]; template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); void generateLocalIDs(void *buffer, uint16_t simd, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, bool isImageOnlyKernel); void generateLocalIDsWithLayoutForImages(void *b, const std::array &localWorkgroupSize, uint16_t simd); bool isCompatibleWithLayoutForImages(const std::array &localWorkgroupSize, const std::array &dimensionsOrder, uint16_t simd); } // namespace OCLRT