compute-runtime/unit_tests/test_files/simple_kernels.cl

125 lines
3.1 KiB
Common Lisp

/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
__kernel void simple_kernel_0(
const uint arg0,
const float arg1,
__global uint *dst) {
uint idx = get_global_id(0);
uint data = arg0 + (uint)arg1;
dst[idx] = data;
}
__kernel void simple_kernel_1(
__global uint *src,
const uint arg1,
__global uint *dst) {
uint idx = get_global_id(0);
dst[idx] = src[idx] + arg1;
}
__kernel void simple_kernel_2(
const uint arg0,
__global uint *dst) {
uint idx = get_global_id(0);
dst[idx] = arg0;
}
__kernel void simple_kernel_3(
__global uint *dst) {
dst[get_global_id(0)] = 0;
}
__kernel void simple_kernel_4() {
}
__kernel void simple_kernel_5(__global uint *dst) {
//first uint holds the total work item count
atomic_inc(dst);
uint groupIdX = get_group_id(0);
uint groupIdY = get_group_id(1);
uint groupIdZ = get_group_id(2);
uint groupCountX = get_num_groups(0);
uint groupCountY = get_num_groups(1);
uint groupCountZ = get_num_groups(2);
__global uint* groupCounters = dst+1;
//store current group position in 3D array
uint destination = groupIdZ * groupCountY * groupCountX + groupIdY * groupCountX + groupIdX;
atomic_inc(&groupCounters[destination]);
}
#define SIMPLE_KERNEL_6_ARRAY_SIZE 256
__kernel void simple_kernel_6(__global uint *dst, __global uint2 *src, uint scalar, uint maxIterations, uint maxIterations2) {
__private uint2 array[SIMPLE_KERNEL_6_ARRAY_SIZE];
__private uint2 sum;
__private size_t gid = get_global_id(0);
__private size_t lid = get_local_id(0);
__private uint multi = 1;
if(lid == 1024) {
multi = 4;
}
sum = (uint2)(0, 0);
for(int i = 0; i < maxIterations; ++i) {
array[i] = src[i] + (uint2)(i*multi, i*multi+scalar);
}
for(int i = 0; i < maxIterations2; ++i) {
sum.x = array[i].x + sum.x;
sum.y = array[i].y + sum.y;
}
vstore2(sum, gid, dst);
}
typedef long16 TYPE;
__attribute__((reqd_work_group_size(32, 1, 1))) // force LWS to 32
__attribute__((intel_reqd_sub_group_size(8))) // force SIMD to 8
__kernel void simple_kernel_7(__global int *resIdx, global TYPE *src, global TYPE *dst){
size_t lid = get_local_id(0);
size_t gid = get_global_id(0);
TYPE res1 = src[gid*3];
TYPE res2 = src[gid*3+1];
TYPE res3 = src[gid*3+2];
__local TYPE locMem[32];
locMem[lid] = res1;
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
TYPE res = (locMem[resIdx[gid]]*res3)*res2 + res1;
dst[gid] = res;
}
__kernel void simple_kernel_8(__global uint *dst, uint incrementationsCount) {
uint groupIdX = get_group_id(0);
uint groupIdY = get_group_id(1);
uint groupIdZ = get_group_id(2);
uint groupCountX = get_num_groups(0);
uint groupCountY = get_num_groups(1);
uint groupCountZ = get_num_groups(2);
uint destination = groupIdZ * groupCountY * groupCountX + groupIdY * groupCountX + groupIdX;
for(uint i = 0; i < incrementationsCount; i++){
dst[destination]++;
}
}