performance: Add copy buffer rect middle builtin

Resolves: NEO-12132

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2024-08-21 13:53:57 +00:00
committed by Compute-Runtime-Automation
parent 90354f5e58
commit bbb44c7a4d
12 changed files with 415 additions and 63 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -7,5 +7,5 @@
#include "shared/test/common/helpers/kernel_binary_helper.h"
const std::string KernelBinaryHelper::BUILT_INS("1535368482363896758");
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("9730917519387864121_images");
const std::string KernelBinaryHelper::BUILT_INS("15672580764041246108");
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("8269212628108939982_images");

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -220,7 +220,7 @@ __kernel void FillBufferSSHOffset(
pDst[dstIndex] = pSrc[srcIndex];
}
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes2d(
__global const char* src,
__global char* dst,
@@ -239,7 +239,29 @@ __kernel void CopyBufferRectBytes2d(
*( dst + LDstOffset ) = *( src + LSrcOffset );
}
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytesMiddle2d(
const __global uint* src,
__global uint* dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch )
{
int x = get_global_id(0);
int y = get_global_id(1);
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
src += LSrcOffset >> 2;
dst += LDstOffset >> 2;
uint4 loaded = vload4(x,src);
vstore4(loaded,x,dst);
}
__kernel void CopyBufferRectBytes3d(
__global const char* src,
__global char* dst,
@@ -260,6 +282,29 @@ __kernel void CopyBufferRectBytes3d(
}
__kernel void CopyBufferRectBytesMiddle3d(
const __global uint* src,
__global uint* dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch )
{
int x = get_global_id(0);
int y = get_global_id(1);
int z = get_global_id(2);
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
src += LSrcOffset >> 2;
dst += LDstOffset >> 2;
uint4 loaded = vload4(x,src);
vstore4(loaded,x,dst);
}
void SetDstData(__global ulong* dst, uint currentOffset, ulong contextStart, ulong globalStart, ulong contextEnd, ulong globalEnd, uint useOnlyGlobalTimestamps) {
dst[currentOffset] = globalStart;
dst[currentOffset + 1] = globalEnd;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -220,7 +220,7 @@ __kernel void FillBufferSSHOffset(
pDst[dstIndex] = pSrc[srcIndex];
}
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytes2d(
__global const char* src,
__global char* dst,
@@ -239,7 +239,29 @@ __kernel void CopyBufferRectBytes2d(
*( dst + LDstOffset ) = *( src + LSrcOffset );
}
//////////////////////////////////////////////////////////////////////////////
__kernel void CopyBufferRectBytesMiddle2d(
const __global uint* src,
__global uint* dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch )
{
int x = get_global_id(0);
int y = get_global_id(1);
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
src += LSrcOffset >> 2;
dst += LDstOffset >> 2;
uint4 loaded = vload4(x,src);
vstore4(loaded,x,dst);
}
__kernel void CopyBufferRectBytes3d(
__global const char* src,
__global char* dst,
@@ -260,6 +282,29 @@ __kernel void CopyBufferRectBytes3d(
}
__kernel void CopyBufferRectBytesMiddle3d(
const __global uint* src,
__global uint* dst,
uint4 SrcOrigin,
uint4 DstOrigin,
uint2 SrcPitch,
uint2 DstPitch )
{
int x = get_global_id(0);
int y = get_global_id(1);
int z = get_global_id(2);
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
src += LSrcOffset >> 2;
dst += LDstOffset >> 2;
uint4 loaded = vload4(x,src);
vstore4(loaded,x,dst);
}
void SetDstData(__global ulong* dst, uint currentOffset, ulong contextStart, ulong globalStart, ulong contextEnd, ulong globalEnd, uint useOnlyGlobalTimestamps) {
dst[currentOffset] = globalStart;
dst[currentOffset + 1] = globalEnd;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -363,3 +363,48 @@ __kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __glob
SetDstData(dst, currentOffset, contextStart, globalStart, contextEnd, globalEnd, useOnlyGlobalTimestamps);
}
__kernel void CopyBufferRectBytesMiddle2d(
const __global uint* src,
__global uint* dst,
ulong4 SrcOrigin,
ulong4 DstOrigin,
ulong2 SrcPitch,
ulong2 DstPitch )
{
size_t x = get_global_id(0);
size_t y = get_global_id(1);
size_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
size_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
src += LSrcOffset >> 2;
dst += LDstOffset >> 2;
uint4 loaded = vload4(x,src);
vstore4(loaded,x,dst);
}
__kernel void CopyBufferRectBytesMiddle3d(
const __global uint* src,
__global uint* dst,
ulong4 SrcOrigin,
ulong4 DstOrigin,
ulong2 SrcPitch,
ulong2 DstPitch )
{
size_t x = get_global_id(0);
size_t y = get_global_id(1);
size_t z = get_global_id(2);
size_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
size_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
src += LSrcOffset >> 2;
dst += LDstOffset >> 2;
uint4 loaded = vload4(x,src);
vstore4(loaded,x,dst);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -323,3 +323,48 @@ __kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __glob
}
}
__kernel void CopyBufferRectBytesMiddle2d(
const __global uint* src,
__global uint* dst,
ulong4 SrcOrigin,
ulong4 DstOrigin,
ulong2 SrcPitch,
ulong2 DstPitch )
{
size_t x = get_global_id(0);
size_t y = get_global_id(1);
size_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x );
size_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x );
src += LSrcOffset >> 2;
dst += LDstOffset >> 2;
uint4 loaded = vload4(x,src);
vstore4(loaded,x,dst);
}
__kernel void CopyBufferRectBytesMiddle3d(
const __global uint* src,
__global uint* dst,
ulong4 SrcOrigin,
ulong4 DstOrigin,
ulong2 SrcPitch,
ulong2 DstPitch )
{
size_t x = get_global_id(0);
size_t y = get_global_id(1);
size_t z = get_global_id(2);
size_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
size_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
src += LSrcOffset >> 2;
dst += LDstOffset >> 2;
uint4 loaded = vload4(x,src);
vstore4(loaded,x,dst);
}