mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Revert "refactor: unify stateless and not stateless builtins kernels"
This reverts commit 4eb37124cb.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c3e98e346a
commit
575f48f738
@@ -7,5 +7,5 @@
|
||||
|
||||
#include "shared/test/common/helpers/kernel_binary_helper.h"
|
||||
|
||||
const std::string KernelBinaryHelper::BUILT_INS("9138976784737507795");
|
||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("15961210520135641457_images");
|
||||
const std::string KernelBinaryHelper::BUILT_INS("6133084427540774618");
|
||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("10619261412647190096_images");
|
||||
|
||||
@@ -11,16 +11,14 @@ __kernel void fullCopy(__global const uint* src, __global uint* dst) {
|
||||
vstore4(loaded, gid, dst);
|
||||
}
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
||||
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
offset_t bytesToRead )
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint bytesToRead )
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
@@ -32,24 +30,24 @@ __kernel void CopyBufferToBufferBytes(
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
@@ -59,17 +57,17 @@ __kernel void CopyBufferToBufferMiddle(
|
||||
__kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__global const uint* pSrc,
|
||||
__global uint* pDst,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint misalignmentInBits)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
const size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
const uint4 src0 = vload4(gid, pSrc);
|
||||
const uint4 src1 = vload4((gid + 1), pSrc);
|
||||
const uint4 src1 = vload4(gid + 1, pSrc);
|
||||
|
||||
uint4 result;
|
||||
result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
|
||||
@@ -82,33 +80,32 @@ __kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) {
|
||||
ALIGNED4(dst);
|
||||
ALIGNED4(src);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
dst[gid] = (uchar)(src[gid]);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferSideRegion(
|
||||
__global uchar* pDst,
|
||||
const __global uchar* pSrc,
|
||||
idx_t len,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
unsigned int len,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
__global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < len) {
|
||||
@@ -119,14 +116,14 @@ __kernel void CopyBufferToBufferSideRegion(
|
||||
__kernel void CopyBufferToBufferMiddleRegion(
|
||||
__global uint* pDst,
|
||||
const __global uint* pSrc,
|
||||
idx_t elems,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
unsigned int elems,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
__global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < elems) {
|
||||
@@ -135,181 +132,179 @@ __kernel void CopyBufferToBufferMiddleRegion(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t lid = get_local_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[lid];
|
||||
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
uint srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const offset_t patternSizeInEls )
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const offset_t patternSizeInEls )
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
idx_t gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
uint gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const offset_t patternSizeInEls )
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uchar* ptr,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
idx_t gid = get_global_id(0);
|
||||
uint gid = get_global_id(0);
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
idx_t gid = get_global_id(0);
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
__global uchar* ptr,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const __global uchar* pPattern,
|
||||
offset_t patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
ALIGNED4(pPattern);
|
||||
idx_t dstIndex = get_global_id(0);
|
||||
idx_t srcIndex = get_local_id(0);
|
||||
uint dstIndex = get_global_id(0);
|
||||
uint srcIndex = get_local_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
|
||||
pDst[dstIndex] = pSrc[srcIndex];
|
||||
}
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
coord2_t SrcOrigin,
|
||||
coord2_t DstOrigin,
|
||||
idx_t SrcPitch,
|
||||
idx_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
uint2 SrcOrigin,
|
||||
uint2 DstOrigin,
|
||||
uint SrcPitch,
|
||||
uint DstPitch )
|
||||
|
||||
idx_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
idx_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle2d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
coord2_t SrcOrigin,
|
||||
coord2_t DstOrigin,
|
||||
idx_t SrcPitch,
|
||||
idx_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
uint2 SrcOrigin,
|
||||
uint2 DstOrigin,
|
||||
uint SrcPitch,
|
||||
uint DstPitch )
|
||||
|
||||
idx_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
idx_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x, src);
|
||||
vstore4(loaded, x, dst);
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
coord4_t SrcOrigin,
|
||||
coord4_t DstOrigin,
|
||||
coord2_t SrcPitch,
|
||||
coord2_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
idx_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
idx_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle3d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
coord4_t SrcOrigin,
|
||||
coord4_t DstOrigin,
|
||||
coord2_t SrcPitch,
|
||||
coord2_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
idx_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
idx_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x, src);
|
||||
vstore4(loaded, x, dst);
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
|
||||
void SetDstData(__global ulong* dst, uint currentOffset, ulong contextStart, ulong globalStart, ulong contextEnd, ulong globalEnd, uint useOnlyGlobalTimestamps) {
|
||||
dst[currentOffset] = globalStart;
|
||||
dst[currentOffset + 1] = globalEnd;
|
||||
@@ -540,38 +535,36 @@ __kernel void CopyImage1dBufferToImage1dBuffer(
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(((ulong)(src + srcOffset)) & 0x00000001){
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
@@ -585,19 +578,19 @@ __kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
|
||||
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(((ulong)(src + srcOffset)) & 0x00000003){
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
@@ -612,44 +605,45 @@ __kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d3To4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
|
||||
uint upper2 = 0;
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 3 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 3 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 3));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(((ulong)(src + srcOffset)) & 0x00000007){
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
@@ -671,26 +665,27 @@ __kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d6To8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
|
||||
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 6 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 6 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 6 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 6));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
|
||||
|
||||
upper2 = upper = 0;
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 6 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 6 + 4));
|
||||
@@ -702,15 +697,15 @@ __kernel void CopyBufferToImage3d6To8Bytes(__global uchar *src,
|
||||
|
||||
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
@@ -743,35 +738,33 @@ __kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16BytesAligned(__global uint4 *src,
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = src[(LOffset >> 4) + x];
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
@@ -780,18 +773,18 @@ __kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(((ulong)(dst + dstOffset)) & 0x00000001){
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
@@ -803,37 +796,39 @@ __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer3Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 2)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 1)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
|
||||
|
||||
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(((ulong)(dst + dstOffset)) & 0x00000003){
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
@@ -845,19 +840,19 @@ __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4To3Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
@@ -866,40 +861,40 @@ __kernel void CopyImage3dToBuffer4To3Bytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer6Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 3)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 2)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 5)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 4)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 4)) = convert_uchar_sat(c.z & 0xff);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(((ulong)(dst + dstOffset)) & 0x00000007){
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
@@ -916,16 +911,16 @@ __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8To6Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
@@ -941,14 +936,14 @@ __kernel void CopyImage3dToBuffer8To6Bytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
@@ -971,16 +966,16 @@ __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16BytesAligned(__read_only image3d_t input,
|
||||
__global uint4 *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
__global uint4 *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
* Copyright (C) 2024-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -11,16 +11,14 @@ __kernel void fullCopy(__global const uint* src, __global uint* dst) {
|
||||
vstore4(loaded, gid, dst);
|
||||
}
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
||||
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
offset_t bytesToRead )
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint bytesToRead )
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
@@ -32,24 +30,24 @@ __kernel void CopyBufferToBufferBytes(
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
@@ -59,17 +57,17 @@ __kernel void CopyBufferToBufferMiddle(
|
||||
__kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__global const uint* pSrc,
|
||||
__global uint* pDst,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint misalignmentInBits)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
const size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
const uint4 src0 = vload4(gid, pSrc);
|
||||
const uint4 src1 = vload4((gid + 1), pSrc);
|
||||
const uint4 src1 = vload4(gid + 1, pSrc);
|
||||
|
||||
uint4 result;
|
||||
result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
|
||||
@@ -82,33 +80,32 @@ __kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) {
|
||||
ALIGNED4(dst);
|
||||
ALIGNED4(src);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
dst[gid] = (uchar)(src[gid]);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferSideRegion(
|
||||
__global uchar* pDst,
|
||||
const __global uchar* pSrc,
|
||||
idx_t len,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
unsigned int len,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
__global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < len) {
|
||||
@@ -119,14 +116,14 @@ __kernel void CopyBufferToBufferSideRegion(
|
||||
__kernel void CopyBufferToBufferMiddleRegion(
|
||||
__global uint* pDst,
|
||||
const __global uint* pSrc,
|
||||
idx_t elems,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
unsigned int elems,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
idx_t gid = get_global_id(0);
|
||||
unsigned int gid = get_global_id(0);
|
||||
__global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < elems) {
|
||||
@@ -135,181 +132,179 @@ __kernel void CopyBufferToBufferMiddleRegion(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t lid = get_local_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[lid];
|
||||
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
uint srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const offset_t patternSizeInEls )
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const offset_t patternSizeInEls )
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
idx_t gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
uint gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const offset_t patternSizeInEls )
|
||||
const uint patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uchar* ptr,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
idx_t gid = get_global_id(0);
|
||||
uint gid = get_global_id(0);
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
idx_t gid = get_global_id(0);
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
__global uchar* ptr,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const __global uchar* pPattern,
|
||||
offset_t patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
ALIGNED4(pPattern);
|
||||
idx_t dstIndex = get_global_id(0);
|
||||
idx_t srcIndex = get_local_id(0);
|
||||
uint dstIndex = get_global_id(0);
|
||||
uint srcIndex = get_local_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
|
||||
pDst[dstIndex] = pSrc[srcIndex];
|
||||
}
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
coord2_t SrcOrigin,
|
||||
coord2_t DstOrigin,
|
||||
idx_t SrcPitch,
|
||||
idx_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
uint2 SrcOrigin,
|
||||
uint2 DstOrigin,
|
||||
uint SrcPitch,
|
||||
uint DstPitch )
|
||||
|
||||
idx_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
idx_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle2d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
coord2_t SrcOrigin,
|
||||
coord2_t DstOrigin,
|
||||
idx_t SrcPitch,
|
||||
idx_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
uint2 SrcOrigin,
|
||||
uint2 DstOrigin,
|
||||
uint SrcPitch,
|
||||
uint DstPitch )
|
||||
|
||||
idx_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
idx_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x, src);
|
||||
vstore4(loaded, x, dst);
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
coord4_t SrcOrigin,
|
||||
coord4_t DstOrigin,
|
||||
coord2_t SrcPitch,
|
||||
coord2_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
idx_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
idx_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle3d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
coord4_t SrcOrigin,
|
||||
coord4_t DstOrigin,
|
||||
coord2_t SrcPitch,
|
||||
coord2_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
idx_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
idx_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x, src);
|
||||
vstore4(loaded, x, dst);
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
|
||||
void SetDstData(__global ulong* dst, uint currentOffset, ulong contextStart, ulong globalStart, ulong contextEnd, ulong globalEnd, uint useOnlyGlobalTimestamps) {
|
||||
dst[currentOffset] = globalStart;
|
||||
dst[currentOffset + 1] = globalEnd;
|
||||
@@ -5,7 +5,7 @@
|
||||
#
|
||||
|
||||
set(SHARED_TEST_PROJECTS_SUB_FOLDER "prepare test files")
|
||||
function(compile_kernels_gen device revision_id platform_name use_stateless use_heapless)
|
||||
function(compile_kernels_gen device revision_id platform_name use_stateless_suffix use_heapless)
|
||||
|
||||
set(outputdir "${TargetDir}/${platform_name}/${revision_id}/test_files/${NEO_ARCH}/")
|
||||
set(compiled_kernels)
|
||||
@@ -19,8 +19,8 @@ function(compile_kernels_gen device revision_id platform_name use_stateless use_
|
||||
set(outputname_base "${basename}_${platform_name}")
|
||||
if(${use_heapless})
|
||||
set(outputname_base "${outputname_base}-heapless_")
|
||||
elseif(${use_stateless})
|
||||
set(outputname_base "${outputname_base}-cl-intel-greater-than-4GB-buffer-required_-DWIDE_STATELESS=1_")
|
||||
elseif(${use_stateless_suffix})
|
||||
set(outputname_base "${outputname_base}-cl-intel-greater-than-4GB-buffer-required_")
|
||||
endif()
|
||||
|
||||
set(outputpath_base "${outputdir}${outputname_base}")
|
||||
@@ -38,14 +38,9 @@ function(compile_kernels_gen device revision_id platform_name use_stateless use_
|
||||
set(internal_options "${HEAPLESS_INTERNAL_OPTIONS}")
|
||||
endif()
|
||||
|
||||
if(${use_stateless} OR ${use_heapless})
|
||||
list(APPEND __ocloc__options__ "-DWIDE_STATELESS=1")
|
||||
endif()
|
||||
list(APPEND __ocloc__options__ "-I${NEO_SOURCE_DIR}/shared/source/built_ins/kernels")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${output_files}
|
||||
COMMAND ${ocloc_cmd_prefix} -file ${absolute_filepath} -device ${device} -heapless_mode ${heapless_mode} -internal_options ${internal_options} -${NEO_BITS} -out_dir ${outputdir} -output_no_suffix -output ${outputname_base} -revision_id ${revision_id} -options "$<JOIN:${__ocloc__options__}, >"
|
||||
COMMAND ${ocloc_cmd_prefix} -file ${absolute_filepath} -device ${device} -heapless_mode ${heapless_mode} -internal_options ${internal_options} -${NEO_BITS} -out_dir ${outputdir} -output_no_suffix -output ${outputname_base} -revision_id ${revision_id}
|
||||
WORKING_DIRECTORY ${workdir}
|
||||
DEPENDS ${filepath} ocloc copy_compiler_files
|
||||
)
|
||||
@@ -84,7 +79,6 @@ add_dependencies(prepare_test_kernels_for_shared copy_compiler_files)
|
||||
|
||||
macro(macro_for_each_platform)
|
||||
set(KERNELS_TO_COMPILE ${TEST_KERNELS})
|
||||
|
||||
set(IMAGE_SUPPORT FALSE)
|
||||
CORE_CONTAINS_PLATFORM("SUPPORTED_IMAGES" ${CORE_TYPE} ${PLATFORM_IT} IMAGE_SUPPORT)
|
||||
if(NOT ${IMAGE_SUPPORT})
|
||||
|
||||
@@ -84,7 +84,7 @@ HWTEST2_F(BuiltInSharedTest, GivenStatelessBuiltinWhenGettingResourceNameThenAdd
|
||||
|
||||
auto resourceNames = getBuiltinResourceNames(EBuiltInOps::copyBufferToBufferStateless, BuiltinCode::ECodeType::binary, *pDevice);
|
||||
|
||||
std::string expectedResourceNameGeneric = "stateless_copy_buffer_to_buffer.builtin_kernel.bin";
|
||||
std::string expectedResourceNameGeneric = "stateless_copy_buffer_to_buffer_stateless.builtin_kernel.bin";
|
||||
std::string expectedResourceNameForRelease = deviceIpString + "_" + expectedResourceNameGeneric;
|
||||
|
||||
EXPECT_EQ(1u, resourceNames.size());
|
||||
@@ -103,7 +103,7 @@ HWTEST2_F(BuiltInSharedTest, GivenPlatformWithoutStatefulAddresingSupportWhenGet
|
||||
|
||||
{
|
||||
auto resourceNames = getBuiltinResourceNames(EBuiltInOps::copyBufferToBufferStateless, BuiltinCode::ECodeType::binary, *pDevice);
|
||||
std::string expectedResourceName = deviceIpString + "_stateless_copy_buffer_to_buffer.builtin_kernel.bin";
|
||||
std::string expectedResourceName = deviceIpString + "_stateless_copy_buffer_to_buffer_stateless.builtin_kernel.bin";
|
||||
EXPECT_EQ(1u, resourceNames.size());
|
||||
EXPECT_EQ(resourceNames[0], expectedResourceName);
|
||||
}
|
||||
@@ -130,20 +130,20 @@ TEST_F(BuiltInSharedTest, GivenValidBuiltinTypeAndExtensionWhenCreatingBuiltinRe
|
||||
const std::pair<EBuiltInOps::Type, const char *> testCases[] = {
|
||||
{EBuiltInOps::auxTranslation, "aux_translation.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToBuffer, "copy_buffer_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToBufferStateless, "copy_buffer_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToBufferStatelessHeapless, "copy_buffer_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToBufferStateless, "copy_buffer_to_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToBufferStatelessHeapless, "copy_buffer_to_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferRect, "copy_buffer_rect.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferRectStateless, "copy_buffer_rect.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferRectStatelessHeapless, "copy_buffer_rect.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferRectStateless, "copy_buffer_rect_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferRectStatelessHeapless, "copy_buffer_rect_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::fillBuffer, "fill_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::fillBufferStateless, "fill_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::fillBufferStatelessHeapless, "fill_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::fillBufferStateless, "fill_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::fillBufferStatelessHeapless, "fill_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToImage3d, "copy_buffer_to_image3d.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToImage3dStateless, "copy_buffer_to_image3d.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToImage3dHeapless, "copy_buffer_to_image3d.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToImage3dStateless, "copy_buffer_to_image3d_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToImage3dHeapless, "copy_buffer_to_image3d_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyImage3dToBuffer, "copy_image3d_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyImage3dToBufferStateless, "copy_image3d_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyImage3dToBufferHeapless, "copy_image3d_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyImage3dToBufferStateless, "copy_image3d_to_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyImage3dToBufferHeapless, "copy_image3d_to_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyImageToImage1d, "copy_image_to_image1d.builtin_kernel"},
|
||||
{EBuiltInOps::copyImageToImage1dHeapless, "copy_image_to_image1d.builtin_kernel"},
|
||||
{EBuiltInOps::copyImageToImage2d, "copy_image_to_image2d.builtin_kernel"},
|
||||
@@ -195,9 +195,9 @@ TEST_F(BuiltInSharedTest, GivenHeaplessModeEnabledWhenGetBuiltinResourceNamesIsC
|
||||
};
|
||||
|
||||
TestParam params[] = {
|
||||
{"copy_buffer_to_buffer", EBuiltInOps::copyBufferToBufferStatelessHeapless},
|
||||
{"copy_buffer_rect", EBuiltInOps::copyBufferRectStatelessHeapless},
|
||||
{"fill_buffer", EBuiltInOps::fillBufferStatelessHeapless}};
|
||||
{"copy_buffer_to_buffer_stateless", EBuiltInOps::copyBufferToBufferStatelessHeapless},
|
||||
{"copy_buffer_rect_stateless", EBuiltInOps::copyBufferRectStatelessHeapless},
|
||||
{"fill_buffer_stateless", EBuiltInOps::fillBufferStatelessHeapless}};
|
||||
|
||||
for (auto &[builtInTypeAsString, builtInType] : params) {
|
||||
|
||||
|
||||
Reference in New Issue
Block a user