mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Move test kernel compilation to shared tests
Change-Id: I623a94cf9f6baa29fe23b94541f578c8a9680f92 Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
This commit is contained in:
@@ -109,7 +109,7 @@ if(NOT SKIP_NEO_UNIT_TESTS AND NOT SKIP_UNIT_TESTS)
|
||||
endif()
|
||||
|
||||
add_subdirectories()
|
||||
|
||||
add_dependencies(${TARGET_NAME} prepare_test_kernel_for_shared)
|
||||
create_project_source_tree(${TARGET_NAME})
|
||||
|
||||
endif()
|
||||
|
||||
596
shared/test/unit_test/test_files/7020674763881029420.cl
Normal file
596
shared/test/unit_test/test_files/7020674763881029420.cl
Normal file
@@ -0,0 +1,596 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void fullCopy(__global const uint* src, __global uint* dst) {
|
||||
unsigned int gid = get_global_id(0);
|
||||
uint4 loaded = vload4(gid, src);
|
||||
vstore4(loaded, gid, dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint bytesToRead )
|
||||
{
|
||||
pSrc += ( srcOffsetInBytes + get_global_id(0) );
|
||||
pDst += ( dstOffsetInBytes + get_global_id(0) );
|
||||
pDst[ 0 ] = pSrc[ 0 ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) {
|
||||
unsigned int gid = get_global_id(0);
|
||||
dst[gid] = (uchar)(src[gid]);
|
||||
}
|
||||
__kernel void CopyBufferToBufferSideRegion(
|
||||
__global uchar* pDst,
|
||||
const __global uchar* pSrc,
|
||||
unsigned int len,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
__global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < len) {
|
||||
pDstWithOffset[ gid ] = pSrcWithOffset[ gid ];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddleRegion(
|
||||
__global uint* pDst,
|
||||
const __global uint* pSrc,
|
||||
unsigned int elems,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
unsigned int gid = get_global_id(0);
|
||||
__global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < elems) {
|
||||
uint4 loaded = vload4(gid, pSrcWithOffset);
|
||||
vstore4(loaded, gid, pDstWithOffset);
|
||||
}
|
||||
}
|
||||
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
uint srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint dstIndex = get_global_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
pDst[dstIndex] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const __global uchar* pPattern,
|
||||
uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
uint dstIndex = get_global_id(0);
|
||||
uint srcIndex = get_local_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
|
||||
pDst[dstIndex] = pSrc[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillImage1d(
|
||||
__write_only image1d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
|
||||
const int dstCoord = x + dstOffset.x;
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
|
||||
__kernel void FillImage2d(
|
||||
__write_only image2d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void FillImage3d(
|
||||
__write_only image3d_t output,
|
||||
uint4 color,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
write_imageui(output, dstCoord, color);
|
||||
}
|
||||
|
||||
__kernel void CopyImageToImage1d(
|
||||
__read_only image1d_t input,
|
||||
__write_only image1d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
|
||||
const int srcCoord = x + srcOffset.x;
|
||||
const int dstCoord = x + dstOffset.x;
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyImageToImage2d(
|
||||
__read_only image2d_t input,
|
||||
__write_only image2d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int2 srcCoord = (int2)(x, y) + (int2)(srcOffset.x, srcOffset.y);
|
||||
const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y);
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyImageToImage3d(
|
||||
__read_only image3d_t input,
|
||||
__write_only image3d_t output,
|
||||
int4 srcOffset,
|
||||
int4 dstOffset) {
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
const int z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
c.x = (uint)combined;
|
||||
}
|
||||
else{
|
||||
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 4));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
}
|
||||
else{
|
||||
c.x = (*(__global uint*)(src + LOffset + x * 4));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 8));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
|
||||
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint2*)(src + LOffset + x * 8));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x0000000f){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 16));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.z = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.w = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint4 *)(src + LOffset + x * 16));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
}
|
||||
else{
|
||||
uint2 d = (uint2)(c.x,c.y);
|
||||
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x0000000f){
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
}
|
||||
else{
|
||||
*(__global uint4*)(dst + DstOffset + x * 16) = c;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void QueryKernelTimestamps(__global ulong* srcEvents, __global ulong* dst) {
|
||||
uint gid = get_global_id(0);
|
||||
const ulong tsMask = (1ull << 32) - 1;
|
||||
uint currentOffset = gid * 4;
|
||||
dst[currentOffset] = 0;
|
||||
dst[currentOffset + 1] = 0;
|
||||
dst[currentOffset + 2] = 0;
|
||||
dst[currentOffset + 3] = 0;
|
||||
|
||||
ulong srcPtr = srcEvents[gid];
|
||||
__global uint *src = (__global uint *) srcPtr;
|
||||
dst[currentOffset] = src[1] & tsMask;
|
||||
dst[currentOffset + 1] = src[3] & tsMask;
|
||||
dst[currentOffset + 2] = src[0] & tsMask;
|
||||
dst[currentOffset + 3] = src[2] & tsMask;
|
||||
}
|
||||
|
||||
__kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __global ulong* dst, __global ulong *offsets) {
|
||||
uint gid = get_global_id(0);
|
||||
const ulong tsMask = (1ull << 32) - 1;
|
||||
uint currentOffset = offsets[gid] / 8;
|
||||
dst[currentOffset] = 0;
|
||||
dst[currentOffset + 1] = 0;
|
||||
dst[currentOffset + 2] = 0;
|
||||
dst[currentOffset + 3] = 0;
|
||||
|
||||
ulong srcPtr = srcEvents[gid];
|
||||
__global uint *src = (__global uint *) srcPtr;
|
||||
dst[currentOffset] = src[1] & tsMask;
|
||||
dst[currentOffset + 1] = src[3] & tsMask;
|
||||
dst[currentOffset + 2] = src[0] & tsMask;
|
||||
dst[currentOffset + 3] = src[2] & tsMask;
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
-cl-fast-relaxed-math
|
||||
95
shared/test/unit_test/test_files/CMakeLists.txt
Normal file
95
shared/test/unit_test/test_files/CMakeLists.txt
Normal file
@@ -0,0 +1,95 @@
|
||||
#
|
||||
# Copyright (C) 2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(NOT DEFINED cloc_cmd_prefix)
|
||||
if(WIN32)
|
||||
set(cloc_cmd_prefix ocloc)
|
||||
else()
|
||||
if(DEFINED NEO__IGC_LIBRARY_PATH)
|
||||
set(cloc_cmd_prefix LD_LIBRARY_PATH=${NEO__IGC_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib> $<TARGET_FILE:ocloc>)
|
||||
else()
|
||||
set(cloc_cmd_prefix LD_LIBRARY_PATH=$<TARGET_FILE_DIR:ocloc_lib> $<TARGET_FILE:ocloc>)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
function(compile_kernels_gen platform_name_with_type platform_name suffix)
|
||||
|
||||
set(outputdir "${TargetDir}/${suffix}/test_files/${NEO_ARCH}/")
|
||||
set(compiled_kernels)
|
||||
|
||||
foreach(filepath ${ARGN})
|
||||
get_filename_component(filename ${filepath} NAME)
|
||||
get_filename_component(basename ${filepath} NAME_WE)
|
||||
get_filename_component(workdir ${filepath} DIRECTORY)
|
||||
|
||||
set(outputpath_base "${outputdir}${basename}_${suffix}")
|
||||
|
||||
set(output_files
|
||||
${outputpath_base}.spv
|
||||
${outputpath_base}.bin
|
||||
${outputpath_base}.gen
|
||||
)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${output_files}
|
||||
COMMAND ${cloc_cmd_prefix} -file ${filename} -device ${platform_name} -${NEO_BITS} -out_dir ${outputdir}
|
||||
WORKING_DIRECTORY ${workdir}
|
||||
DEPENDS ${filepath} ocloc copy_compiler_files
|
||||
)
|
||||
|
||||
list(APPEND compiled_kernels ${output_files})
|
||||
|
||||
endforeach()
|
||||
list(APPEND compiled_kernels_${platform_name_with_type} ${compiled_kernels})
|
||||
set(compiled_kernels_${platform_name_with_type} ${compiled_kernels_${platform_name_with_type}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(neo_shared_copy_test_files target product)
|
||||
string(TOLOWER ${product} product)
|
||||
set(dest_dir "${TargetDir}/${product}/test_files")
|
||||
set(source_dir "${NEO_SOURCE_DIR}/shared/test/unit_test/test_files")
|
||||
add_custom_target(${target})
|
||||
add_custom_command(
|
||||
TARGET ${target}
|
||||
POST_BUILD
|
||||
COMMAND echo copying test files from ${source_dir} to ${dest_dir}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${source_dir} ${dest_dir}
|
||||
WORKING_DIRECTORY ${TargetDir}
|
||||
DEPENDS ${source_dir}
|
||||
)
|
||||
add_dependencies(${target} copy_compiler_files)
|
||||
set_target_properties(${target} PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${product}")
|
||||
endfunction()
|
||||
|
||||
file(GLOB_RECURSE TEST_KERNELS *.cl)
|
||||
add_custom_target(prepare_test_kernel_for_shared)
|
||||
|
||||
macro(macro_for_each_platform)
|
||||
neo_shared_copy_test_files(copy_test_kernel_${PLATFORM_IT} ${PLATFORM_IT})
|
||||
add_dependencies(prepare_test_kernel_for_shared copy_test_kernel_${PLATFORM_IT})
|
||||
endmacro()
|
||||
|
||||
macro(macro_for_each_gen)
|
||||
apply_macro_for_each_platform("TESTED")
|
||||
foreach(PLATFORM_TYPE ${PLATFORM_TYPES})
|
||||
|
||||
if(${GEN_TYPE}_HAS_${PLATFORM_TYPE})
|
||||
|
||||
get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE})
|
||||
set(PLATFORM_LOWER ${DEFAULT_SUPPORTED_${GEN_TYPE}_${PLATFORM_TYPE}_PLATFORM})
|
||||
|
||||
compile_kernels_gen(${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${TEST_KERNELS})
|
||||
|
||||
add_custom_target(prepare_test_kernel_for_shared_${family_name_with_type} DEPENDS ${compiled_kernels_${family_name_with_type}})
|
||||
set_target_properties(prepare_test_kernel_for_shared_${family_name_with_type} PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${family_name_with_type}")
|
||||
add_dependencies(prepare_test_kernel_for_shared prepare_test_kernel_for_shared_${family_name_with_type})
|
||||
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
apply_macro_for_each_gen("TESTED")
|
||||
Reference in New Issue
Block a user