mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 06:23:01 +08:00
Optimize copying buffers with misaligned pointers
Resolves: NEO-5476 Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
9290637a8e
commit
566a761aaa
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -41,6 +41,27 @@ __kernel void CopyBufferToBufferMiddle(
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__global const uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint misalignmentInBits)
|
||||
{
|
||||
const size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
const uint4 src0 = vload4(gid, pSrc);
|
||||
const uint4 src1 = vload4(gid + 1, pSrc);
|
||||
|
||||
uint4 result;
|
||||
result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
|
||||
result.y = (src0.y >> misalignmentInBits) | (src0.z << (32 - misalignmentInBits));
|
||||
result.z = (src0.z >> misalignmentInBits) | (src0.w << (32 - misalignmentInBits));
|
||||
result.w = (src0.w >> misalignmentInBits) | (src1.x << (32 - misalignmentInBits));
|
||||
vstore4(result, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -41,6 +41,27 @@ __kernel void CopyBufferToBufferMiddle(
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__global const uint* pSrc,
|
||||
__global uint* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes,
|
||||
uint misalignmentInBits)
|
||||
{
|
||||
const size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
const uint4 src0 = vload4(gid, pSrc);
|
||||
const uint4 src1 = vload4(gid + 1, pSrc);
|
||||
|
||||
uint4 result;
|
||||
result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
|
||||
result.y = (src0.y >> misalignmentInBits) | (src0.z << (32 - misalignmentInBits));
|
||||
result.z = (src0.z >> misalignmentInBits) | (src0.w << (32 - misalignmentInBits));
|
||||
result.w = (src0.w >> misalignmentInBits) | (src1.x << (32 - misalignmentInBits));
|
||||
vstore4(result, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -46,6 +46,27 @@ __kernel void CopyBufferToBufferMiddle(
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__global const uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint misalignmentInBits)
|
||||
{
|
||||
const size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
const uint4 src0 = vload4(gid, pSrc);
|
||||
const uint4 src1 = vload4(gid + 1, pSrc);
|
||||
|
||||
uint4 result;
|
||||
result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
|
||||
result.y = (src0.y >> misalignmentInBits) | (src0.z << (32 - misalignmentInBits));
|
||||
result.z = (src0.z >> misalignmentInBits) | (src0.w << (32 - misalignmentInBits));
|
||||
result.w = (src0.w >> misalignmentInBits) | (src1.x << (32 - misalignmentInBits));
|
||||
vstore4(result, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -46,6 +46,27 @@ __kernel void CopyBufferToBufferMiddle(
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__global const uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint misalignmentInBits)
|
||||
{
|
||||
const size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
const uint4 src0 = vload4(gid, pSrc);
|
||||
const uint4 src1 = vload4(gid + 1, pSrc);
|
||||
|
||||
uint4 result;
|
||||
result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
|
||||
result.y = (src0.y >> misalignmentInBits) | (src0.z << (32 - misalignmentInBits));
|
||||
result.z = (src0.z >> misalignmentInBits) | (src0.w << (32 - misalignmentInBits));
|
||||
result.w = (src0.w >> misalignmentInBits) | (src1.x << (32 - misalignmentInBits));
|
||||
vstore4(result, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
Reference in New Issue
Block a user