compute-runtime/shared/source/built_ins/kernels/fill_buffer_stateless.built...

82 lines
2.4 KiB
Plaintext

/*
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
// assumption is local work size = pattern size
__kernel void FillBufferBytesStateless(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern )
{
size_t dstIndex = get_global_id(0) + dstOffsetInBytes;
size_t srcIndex = get_local_id(0);
pDst[dstIndex] = pPattern[srcIndex];
}
__kernel void FillBufferLeftLeftoverStateless(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferMiddleStateless(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uint* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferRightLeftoverStateless(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferImmediateStateless(
__global uchar* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
size_t gid = get_global_id(0);
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
dstPtr[gid] = value;
}
__kernel void FillBufferImmediateLeftOverStateless(
__global uchar* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
size_t gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
}
__kernel void FillBufferSSHOffsetStateless(
__global uchar* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const __global uchar* pPattern,
ulong patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
)
{
size_t dstIndex = get_global_id(0);
size_t srcIndex = get_local_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
pDst[dstIndex] = pSrc[srcIndex];
}
)==="