mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 00:24:58 +08:00
Add support for stateless copy buffer to image
Change-Id: I494a64ecea8ff184626eeee3069de16f37e5b24b Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com> Related-To: NEO-3314
This commit is contained in:
@@ -29,6 +29,7 @@ set(RUNTIME_SRCS_BUILT_IN_KERNELS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer_stateless.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.igdrcl_built_in
|
||||
|
||||
@@ -20,18 +20,19 @@ constexpr Type CopyBufferRectStateless{4};
|
||||
constexpr Type FillBuffer{5};
|
||||
constexpr Type FillBufferStateless{6};
|
||||
constexpr Type CopyBufferToImage3d{7};
|
||||
constexpr Type CopyImage3dToBuffer{8};
|
||||
constexpr Type CopyImageToImage1d{9};
|
||||
constexpr Type CopyImageToImage2d{10};
|
||||
constexpr Type CopyImageToImage3d{11};
|
||||
constexpr Type FillImage1d{12};
|
||||
constexpr Type FillImage2d{13};
|
||||
constexpr Type FillImage3d{14};
|
||||
constexpr Type VmeBlockMotionEstimateIntel{15};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{16};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{17};
|
||||
constexpr Type Scheduler{18};
|
||||
constexpr Type CopyBufferToImage3dStateless{8};
|
||||
constexpr Type CopyImage3dToBuffer{9};
|
||||
constexpr Type CopyImageToImage1d{10};
|
||||
constexpr Type CopyImageToImage2d{11};
|
||||
constexpr Type CopyImageToImage3d{12};
|
||||
constexpr Type FillImage1d{13};
|
||||
constexpr Type FillImage2d{14};
|
||||
constexpr Type FillImage3d{15};
|
||||
constexpr Type VmeBlockMotionEstimateIntel{16};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{17};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{18};
|
||||
constexpr Type Scheduler{19};
|
||||
|
||||
constexpr uint32_t MaxBaseValue{18};
|
||||
constexpr uint32_t MaxBaseValue{19};
|
||||
} // namespace EBuiltInOps
|
||||
} // namespace NEO
|
||||
|
||||
@@ -532,7 +532,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernelBytes{nullptr} {
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyBufferToImage3d,
|
||||
"",
|
||||
@@ -544,6 +544,15 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
return buildDispatchInfosTyped<uint32_t>(multiDispatchInfo, operationParams);
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernelBytes[5] = {nullptr};
|
||||
BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib){};
|
||||
|
||||
template <typename OffsetType>
|
||||
bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::NoSplit> kernelNoSplit3DBuilder;
|
||||
multiDispatchInfo.setBuiltinOpParams(operationParams);
|
||||
DEBUG_BREAK_IF(!(((operationParams.srcPtr != nullptr) || (operationParams.srcMemObj != nullptr)) && (operationParams.dstPtr == nullptr)));
|
||||
@@ -559,9 +568,9 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
|
||||
|
||||
size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z};
|
||||
|
||||
auto srcRowPitch = static_cast<uint32_t>(operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel);
|
||||
auto srcRowPitch = static_cast<size_t>(operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel);
|
||||
|
||||
auto srcSlicePitch = static_cast<uint32_t>(
|
||||
auto srcSlicePitch = static_cast<size_t>(
|
||||
operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch));
|
||||
|
||||
// Determine size of host ptr surface for residency purposes
|
||||
@@ -584,7 +593,7 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
|
||||
kernelNoSplit3DBuilder.setArg(1, dstImageRedescribed, operationParams.dstMipLevel);
|
||||
|
||||
// Set-up srcOffset
|
||||
kernelNoSplit3DBuilder.setArg(2, static_cast<uint32_t>(operationParams.srcOffset.x));
|
||||
kernelNoSplit3DBuilder.setArg(2, static_cast<OffsetType>(operationParams.srcOffset.x));
|
||||
|
||||
// Set-up dstOrigin
|
||||
{
|
||||
@@ -598,9 +607,9 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
|
||||
|
||||
// Set-up srcRowPitch
|
||||
{
|
||||
uint32_t pitch[] = {
|
||||
static_cast<uint32_t>(srcRowPitch),
|
||||
static_cast<uint32_t>(srcSlicePitch)};
|
||||
OffsetType pitch[] = {
|
||||
static_cast<OffsetType>(srcRowPitch),
|
||||
static_cast<OffsetType>(srcSlicePitch)};
|
||||
kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch);
|
||||
}
|
||||
|
||||
@@ -610,9 +619,26 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
Kernel *kernelBytes[5];
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::CopyBufferToImage3dStateless> : public BuiltInOp<EBuiltInOps::CopyBufferToImage3d> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltInOp<EBuiltInOps::CopyBufferToImage3d>(kernelsLib) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyBufferToImage3dStateless,
|
||||
"-cl-intel-greater-than-4GB-buffer-required",
|
||||
"CopyBufferToImage3dBytes", kernelBytes[0],
|
||||
"CopyBufferToImage3d2Bytes", kernelBytes[1],
|
||||
"CopyBufferToImage3d4Bytes", kernelBytes[2],
|
||||
"CopyBufferToImage3d8Bytes", kernelBytes[3],
|
||||
"CopyBufferToImage3d16Bytes", kernelBytes[4]);
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo, operationParams);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -852,6 +878,9 @@ BuiltinDispatchInfoBuilder &BuiltIns::getBuiltinDispatchInfoBuilder(EBuiltInOps:
|
||||
case EBuiltInOps::CopyBufferToImage3d:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyBufferToImage3d>>(*this, context, device); });
|
||||
break;
|
||||
case EBuiltInOps::CopyBufferToImage3dStateless:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyBufferToImage3dStateless>>(*this, context, device); });
|
||||
break;
|
||||
case EBuiltInOps::CopyImage3dToBuffer:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyImage3dToBuffer>>(*this, context, device); });
|
||||
break;
|
||||
|
||||
@@ -36,6 +36,8 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
|
||||
return "fill_buffer_stateless.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyBufferToImage3d:
|
||||
return "copy_buffer_to_image3d.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyBufferToImage3dStateless:
|
||||
return "copy_buffer_to_image3d_stateless.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImage3dToBuffer:
|
||||
return "copy_image3d_to_buffer.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImageToImage1d:
|
||||
|
||||
@@ -27,6 +27,7 @@ set(GENERATED_BUILTINS
|
||||
set(GENERATED_BUILTINS_STATELESS
|
||||
"copy_buffer_to_buffer_stateless"
|
||||
"copy_buffer_rect_stateless"
|
||||
"copy_buffer_to_image3d_stateless"
|
||||
"fill_buffer_stateless"
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2019, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
c.x = (uint)combined;
|
||||
}
|
||||
else{
|
||||
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 4));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
}
|
||||
else{
|
||||
c.x = (*(__global uint*)(src + LOffset + x * 4));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 8));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
|
||||
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint2*)(src + LOffset + x * 8));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x0000000f){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 16));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.z = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.w = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint4 *)(src + LOffset + x * 16));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -74,6 +74,15 @@ static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
|
||||
#include "runtime/built_ins/kernels/copy_buffer_to_image3d.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToImage3dStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToImage3dStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/copy_buffer_to_image3d_stateless.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImage3dToBufferSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImage3dToBuffer,
|
||||
|
||||
@@ -33,10 +33,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferToImage(
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
|
||||
MultiDispatchInfo di;
|
||||
auto eBuiltInOpsType = EBuiltInOps::CopyBufferToImage3d;
|
||||
if (forceStateless(srcBuffer->getSize())) {
|
||||
eBuiltInOpsType = EBuiltInOps::CopyBufferToImage3dStateless;
|
||||
}
|
||||
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d,
|
||||
this->getContext(), this->getDevice());
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(eBuiltInOpsType,
|
||||
this->getContext(),
|
||||
this->getDevice());
|
||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||
|
||||
MemObjSurface srcBufferSurf(srcBuffer);
|
||||
@@ -52,12 +56,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferToImage(
|
||||
if (dstImage->getImageDesc().num_mip_levels > 0) {
|
||||
dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, dstOrigin);
|
||||
}
|
||||
builder.buildDispatchInfos(di, dc);
|
||||
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
builder.buildDispatchInfos(dispatchInfo, dc);
|
||||
|
||||
enqueueHandler<CL_COMMAND_COPY_BUFFER_TO_IMAGE>(
|
||||
surfaces,
|
||||
false,
|
||||
di,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
||||
@@ -765,6 +765,42 @@ TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderFillBufferStatelessIsUsedTh
|
||||
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStatelessIsUsedThenParamsAreCorrect) {
|
||||
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||
|
||||
MockBuffer srcBuffer;
|
||||
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||
std ::unique_ptr<Image> pDstImage(Image2dHelper<>::create(pContext));
|
||||
ASSERT_NE(nullptr, pDstImage.get());
|
||||
|
||||
auto &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3dStateless, *pContext, *pDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcPtr = &srcBuffer;
|
||||
dc.dstMemObj = pDstImage.get();
|
||||
dc.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.dstRowPitch = 0;
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, dc));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderCopyBufferToBufferWithSourceOffsetUnalignedToFour) {
|
||||
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pContext, *pDevice);
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "unit_tests/command_queue/enqueue_copy_buffer_to_image_fixture.h"
|
||||
#include "unit_tests/gen_common/gen_commands_common_validation.h"
|
||||
#include "unit_tests/helpers/unit_test_helper.h"
|
||||
#include "unit_tests/mocks/mock_buffer.h"
|
||||
#include "unit_tests/mocks/mock_builtin_dispatch_info_builder.h"
|
||||
#include "unit_tests/mocks/mock_builtins.h"
|
||||
|
||||
@@ -272,3 +273,62 @@ HWTEST_P(MipMapCopyBufferToImageTest, GivenImageWithMipLevelNonZeroWhenCopyBuffe
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MipMapCopyBufferToImageTest_GivenImageWithMipLevelNonZeroWhenCopyBufferToImageIsCalledThenProperMipLevelIsSet,
|
||||
MipMapCopyBufferToImageTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D));
|
||||
|
||||
struct EnqueueCopyBufferToImageHw : public ::testing::Test {
|
||||
|
||||
void SetUp() override {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
device.reset(MockDevice::createWithNewExecutionEnvironment<MockDevice>(*platformDevices));
|
||||
context = std::make_unique<MockContext>(device.get());
|
||||
dstImage = std::unique_ptr<Image>(Image2dHelper<>::create(context.get()));
|
||||
}
|
||||
|
||||
std::unique_ptr<MockDevice> device;
|
||||
std::unique_ptr<MockContext> context;
|
||||
std::unique_ptr<Image> dstImage;
|
||||
MockBuffer srcBuffer;
|
||||
uint64_t bigSize = 5ull * MemoryConstants::gigaByte;
|
||||
uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1;
|
||||
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||
|
||||
const size_t dstOrigin[3] = {0, 0, 0};
|
||||
const size_t region[3] = {4, 1, 1};
|
||||
};
|
||||
|
||||
using EnqueueCopyBufferToImageStatelessTest = EnqueueCopyBufferToImageHw;
|
||||
|
||||
HWTEST_F(EnqueueCopyBufferToImageStatelessTest, givenBigBufferWhenCopyingBufferToImageStatelessThenSuccessIsReturned) {
|
||||
auto cmdQ = std::make_unique<CommandQueueStateless<FamilyType>>(context.get(), device.get());
|
||||
srcBuffer.size = static_cast<size_t>(bigSize);
|
||||
auto retVal = cmdQ->enqueueCopyBufferToImage(
|
||||
&srcBuffer,
|
||||
dstImage.get(),
|
||||
static_cast<size_t>(bigOffset),
|
||||
dstOrigin,
|
||||
region,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
using EnqueueCopyBufferToImageStatefulTest = EnqueueCopyBufferToImageHw;
|
||||
|
||||
HWTEST_F(EnqueueCopyBufferToImageStatefulTest, givenBigBufferWhenCopyingBufferToImageStatefulThenSuccessIsReturned) {
|
||||
auto cmdQ = std::make_unique<CommandQueueStateful<FamilyType>>(context.get(), device.get());
|
||||
srcBuffer.size = static_cast<size_t>(smallSize);
|
||||
auto retVal = cmdQ->enqueueCopyBufferToImage(
|
||||
&srcBuffer,
|
||||
dstImage.get(),
|
||||
0,
|
||||
dstOrigin,
|
||||
region,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user