Workgroup walk order

Change-Id: Id02db6a383e21dc17be64655e7f51a84103b2e0b
This commit is contained in:
Chodor, Jaroslaw
2018-08-06 11:35:59 +02:00
committed by sys_ocldev
parent dfd331c568
commit c10d0d79f5
13 changed files with 235 additions and 37 deletions

View File

@@ -22,10 +22,12 @@
#pragma once
#include <cstdint>
#include <algorithm>
#include "runtime/helpers/ptr_math.h"
#include <algorithm>
#include <array>
#include <cstdint>
namespace OCLRT {
union GRF {
float fRegs[8];
@@ -60,9 +62,9 @@ inline size_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t numChannels = 3)
}
struct LocalIDHelper {
static void (*generateSimd8)(void *buffer, size_t lwsX, size_t lwsY, size_t threadsPerWorkGroup);
static void (*generateSimd16)(void *buffer, size_t lwsX, size_t lwsY, size_t threadsPerWorkGroup);
static void (*generateSimd32)(void *buffer, size_t lwsX, size_t lwsY, size_t threadsPerWorkGroup);
static void (*generateSimd8)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder);
static void (*generateSimd16)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder);
static void (*generateSimd32)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder);
static LocalIDHelper initializer;
@@ -73,7 +75,10 @@ struct LocalIDHelper {
extern const uint16_t initialLocalID[];
template <typename Vec, int simd>
void generateLocalIDsSimd(void *b, size_t lwsX, size_t lwsY, size_t threadsPerWorkGroup);
void generateLocalIDsSimd(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup,
const std::array<uint8_t, 3> &dimensionsOrder);
void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize,
const std::array<uint8_t, 3> &dimensionsOrder);
void generateLocalIDs(void *buffer, uint32_t simd, size_t lwsX, size_t lwsY, size_t lwsZ);
} // namespace OCLRT