2017-12-21 07:45:38 +08:00
|
|
|
/*
|
2023-09-26 01:06:15 +08:00
|
|
|
* Copyright (C) 2020-2023 Intel Corporation
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
2020-02-24 03:33:40 +08:00
|
|
|
* SPDX-License-Identifier: MIT
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
R"===(
|
2023-09-26 01:06:15 +08:00
|
|
|
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
// assumption is local work size = pattern size
|
|
|
|
__kernel void FillBufferBytes(
|
|
|
|
__global uchar* pDst,
|
|
|
|
uint dstOffsetInBytes,
|
|
|
|
const __global uchar* pPattern )
|
|
|
|
{
|
2023-09-26 01:06:15 +08:00
|
|
|
ALIGNED4(pDst);
|
|
|
|
ALIGNED4(pPattern);
|
2017-12-21 07:45:38 +08:00
|
|
|
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
|
|
|
uint srcIndex = get_local_id(0);
|
|
|
|
pDst[dstIndex] = pPattern[srcIndex];
|
|
|
|
}
|
|
|
|
|
|
|
|
__kernel void FillBufferLeftLeftover(
|
|
|
|
__global uchar* pDst,
|
|
|
|
uint dstOffsetInBytes,
|
|
|
|
const __global uchar* pPattern,
|
|
|
|
const uint patternSizeInEls )
|
|
|
|
{
|
2023-09-26 01:06:15 +08:00
|
|
|
ALIGNED4(pDst);
|
|
|
|
ALIGNED4(pPattern);
|
2017-12-21 07:45:38 +08:00
|
|
|
uint gid = get_global_id(0);
|
|
|
|
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
|
|
|
}
|
|
|
|
|
|
|
|
__kernel void FillBufferMiddle(
|
|
|
|
__global uchar* pDst,
|
|
|
|
uint dstOffsetInBytes,
|
|
|
|
const __global uint* pPattern,
|
|
|
|
const uint patternSizeInEls )
|
|
|
|
{
|
2023-09-26 01:06:15 +08:00
|
|
|
ALIGNED4(pDst);
|
|
|
|
ALIGNED4(pPattern);
|
2017-12-21 07:45:38 +08:00
|
|
|
uint gid = get_global_id(0);
|
|
|
|
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
|
|
|
}
|
|
|
|
|
|
|
|
__kernel void FillBufferRightLeftover(
|
|
|
|
__global uchar* pDst,
|
|
|
|
uint dstOffsetInBytes,
|
|
|
|
const __global uchar* pPattern,
|
|
|
|
const uint patternSizeInEls )
|
|
|
|
{
|
2023-09-26 01:06:15 +08:00
|
|
|
ALIGNED4(pDst);
|
|
|
|
ALIGNED4(pPattern);
|
2017-12-21 07:45:38 +08:00
|
|
|
uint gid = get_global_id(0);
|
|
|
|
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
|
|
|
}
|
2020-02-24 20:10:44 +08:00
|
|
|
|
|
|
|
__kernel void FillBufferImmediate(
|
2022-09-03 00:16:41 +08:00
|
|
|
__global uchar* ptr,
|
2022-08-26 20:56:55 +08:00
|
|
|
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
|
|
|
const uint value)
|
|
|
|
{
|
2023-09-26 01:06:15 +08:00
|
|
|
ALIGNED4(ptr);
|
2022-08-26 20:56:55 +08:00
|
|
|
uint gid = get_global_id(0);
|
2022-09-03 00:16:41 +08:00
|
|
|
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
|
|
|
dstPtr[gid] = value;
|
2022-08-26 20:56:55 +08:00
|
|
|
}
|
|
|
|
|
2022-09-03 00:16:41 +08:00
|
|
|
__kernel void FillBufferImmediateLeftOver(
|
2020-02-24 20:10:44 +08:00
|
|
|
__global uchar* ptr,
|
2022-08-26 20:56:55 +08:00
|
|
|
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
2020-02-24 20:10:44 +08:00
|
|
|
const uint value)
|
|
|
|
{
|
2023-09-26 01:06:15 +08:00
|
|
|
ALIGNED4(ptr);
|
2022-08-26 20:56:55 +08:00
|
|
|
uint gid = get_global_id(0);
|
|
|
|
(ptr + dstSshOffset)[gid] = value;
|
2020-02-24 20:10:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
__kernel void FillBufferSSHOffset(
|
|
|
|
__global uchar* ptr,
|
|
|
|
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
|
|
|
const __global uchar* pPattern,
|
|
|
|
uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
|
|
|
)
|
|
|
|
{
|
2023-09-26 01:06:15 +08:00
|
|
|
ALIGNED4(ptr);
|
|
|
|
ALIGNED4(pPattern);
|
2020-02-24 20:10:44 +08:00
|
|
|
uint dstIndex = get_global_id(0);
|
|
|
|
uint srcIndex = get_local_id(0);
|
|
|
|
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
|
|
|
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
|
|
|
|
pDst[dstIndex] = pSrc[srcIndex];
|
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
)==="
|