mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
performance: Add copy buffer rect middle builtin
Resolves: NEO-12132 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
90354f5e58
commit
bbb44c7a4d
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,5 +7,5 @@
|
||||
|
||||
#include "shared/test/common/helpers/kernel_binary_helper.h"
|
||||
|
||||
const std::string KernelBinaryHelper::BUILT_INS("1535368482363896758");
|
||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("9730917519387864121_images");
|
||||
const std::string KernelBinaryHelper::BUILT_INS("15672580764041246108");
|
||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("8269212628108939982_images");
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -220,7 +220,7 @@ __kernel void FillBufferSSHOffset(
|
||||
pDst[dstIndex] = pSrc[srcIndex];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
@@ -239,7 +239,29 @@ __kernel void CopyBufferRectBytes2d(
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle2d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
@@ -260,6 +282,29 @@ __kernel void CopyBufferRectBytes3d(
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle3d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
void SetDstData(__global ulong* dst, uint currentOffset, ulong contextStart, ulong globalStart, ulong contextEnd, ulong globalEnd, uint useOnlyGlobalTimestamps) {
|
||||
dst[currentOffset] = globalStart;
|
||||
dst[currentOffset + 1] = globalEnd;
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -220,7 +220,7 @@ __kernel void FillBufferSSHOffset(
|
||||
pDst[dstIndex] = pSrc[srcIndex];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
@@ -239,7 +239,29 @@ __kernel void CopyBufferRectBytes2d(
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle2d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
@@ -260,6 +282,29 @@ __kernel void CopyBufferRectBytes3d(
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle3d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
void SetDstData(__global ulong* dst, uint currentOffset, ulong contextStart, ulong globalStart, ulong contextEnd, ulong globalEnd, uint useOnlyGlobalTimestamps) {
|
||||
dst[currentOffset] = globalStart;
|
||||
dst[currentOffset + 1] = globalEnd;
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -363,3 +363,48 @@ __kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __glob
|
||||
|
||||
SetDstData(dst, currentOffset, contextStart, globalStart, contextEnd, globalEnd, useOnlyGlobalTimestamps);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle2d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
ulong4 SrcOrigin,
|
||||
ulong4 DstOrigin,
|
||||
ulong2 SrcPitch,
|
||||
ulong2 DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
|
||||
size_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
size_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle3d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
ulong4 SrcOrigin,
|
||||
ulong4 DstOrigin,
|
||||
ulong2 SrcPitch,
|
||||
ulong2 DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
size_t z = get_global_id(2);
|
||||
|
||||
size_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
size_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -323,3 +323,48 @@ __kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __glob
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle2d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
ulong4 SrcOrigin,
|
||||
ulong4 DstOrigin,
|
||||
ulong2 SrcPitch,
|
||||
ulong2 DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
|
||||
size_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
|
||||
size_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle3d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
ulong4 SrcOrigin,
|
||||
ulong4 DstOrigin,
|
||||
ulong2 SrcPitch,
|
||||
ulong2 DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
size_t z = get_global_id(2);
|
||||
|
||||
size_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
size_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
Reference in New Issue
Block a user