mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add support for stateless copy image to buffer
Change-Id: I91d1a45d87a5984e0bb7fdb302a716ffcea7bfc8 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com> Related-To: NEO-3314
This commit is contained in:
@ -31,6 +31,7 @@ set(RUNTIME_SRCS_BUILT_IN_KERNELS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer_stateless.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.igdrcl_built_in
|
||||
|
@ -22,17 +22,18 @@ constexpr Type FillBufferStateless{6};
|
||||
constexpr Type CopyBufferToImage3d{7};
|
||||
constexpr Type CopyBufferToImage3dStateless{8};
|
||||
constexpr Type CopyImage3dToBuffer{9};
|
||||
constexpr Type CopyImageToImage1d{10};
|
||||
constexpr Type CopyImageToImage2d{11};
|
||||
constexpr Type CopyImageToImage3d{12};
|
||||
constexpr Type FillImage1d{13};
|
||||
constexpr Type FillImage2d{14};
|
||||
constexpr Type FillImage3d{15};
|
||||
constexpr Type VmeBlockMotionEstimateIntel{16};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{17};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{18};
|
||||
constexpr Type Scheduler{19};
|
||||
constexpr Type CopyImage3dToBufferStateless{10};
|
||||
constexpr Type CopyImageToImage1d{11};
|
||||
constexpr Type CopyImageToImage2d{12};
|
||||
constexpr Type CopyImageToImage3d{13};
|
||||
constexpr Type FillImage1d{14};
|
||||
constexpr Type FillImage2d{15};
|
||||
constexpr Type FillImage3d{16};
|
||||
constexpr Type VmeBlockMotionEstimateIntel{17};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{18};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{19};
|
||||
constexpr Type Scheduler{20};
|
||||
|
||||
constexpr uint32_t MaxBaseValue{19};
|
||||
constexpr uint32_t MaxBaseValue{20};
|
||||
} // namespace EBuiltInOps
|
||||
} // namespace NEO
|
||||
|
@ -568,10 +568,10 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
|
||||
|
||||
size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z};
|
||||
|
||||
auto srcRowPitch = static_cast<size_t>(operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel);
|
||||
auto srcRowPitch = operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel;
|
||||
|
||||
auto srcSlicePitch = static_cast<size_t>(
|
||||
operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch));
|
||||
auto srcSlicePitch =
|
||||
operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch);
|
||||
|
||||
// Determine size of host ptr surface for residency purposes
|
||||
size_t hostPtrSize = operationParams.srcPtr ? Image::calculateHostPtrSize(region, srcRowPitch, srcSlicePitch, bytesPerPixel, dstImage->getImageDesc().image_type) : 0;
|
||||
@ -645,7 +645,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernelBytes{nullptr} {
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyImage3dToBuffer,
|
||||
"",
|
||||
@ -657,6 +657,16 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
return buildDispatchInfosTyped<uint32_t>(multiDispatchInfo, operationParams);
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernelBytes[5] = {nullptr};
|
||||
|
||||
BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib) {}
|
||||
|
||||
template <typename OffsetType>
|
||||
bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::NoSplit> kernelNoSplit3DBuilder;
|
||||
multiDispatchInfo.setBuiltinOpParams(operationParams);
|
||||
DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && ((operationParams.dstPtr != nullptr) || (operationParams.dstMemObj != nullptr))));
|
||||
@ -672,10 +682,10 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
|
||||
|
||||
size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z};
|
||||
|
||||
auto dstRowPitch = static_cast<uint32_t>(operationParams.srcRowPitch ? operationParams.srcRowPitch : region[0] * bytesPerPixel);
|
||||
auto dstRowPitch = operationParams.srcRowPitch ? operationParams.srcRowPitch : region[0] * bytesPerPixel;
|
||||
|
||||
auto dstSlicePitch = static_cast<uint32_t>(
|
||||
operationParams.srcSlicePitch ? operationParams.srcSlicePitch : ((srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch));
|
||||
auto dstSlicePitch =
|
||||
operationParams.srcSlicePitch ? operationParams.srcSlicePitch : ((srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch);
|
||||
|
||||
// Determine size of host ptr surface for residency purposes
|
||||
size_t hostPtrSize = operationParams.dstPtr ? Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, srcImage->getImageDesc().image_type) : 0;
|
||||
@ -707,13 +717,13 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
|
||||
}
|
||||
|
||||
// Set-up dstOffset
|
||||
kernelNoSplit3DBuilder.setArg(3, static_cast<uint32_t>(operationParams.dstOffset.x));
|
||||
kernelNoSplit3DBuilder.setArg(3, static_cast<OffsetType>(operationParams.dstOffset.x));
|
||||
|
||||
// Set-up dstRowPitch
|
||||
{
|
||||
uint32_t pitch[] = {
|
||||
static_cast<uint32_t>(dstRowPitch),
|
||||
static_cast<uint32_t>(dstSlicePitch)};
|
||||
OffsetType pitch[] = {
|
||||
static_cast<OffsetType>(dstRowPitch),
|
||||
static_cast<OffsetType>(dstSlicePitch)};
|
||||
kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch);
|
||||
}
|
||||
|
||||
@ -723,9 +733,26 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
Kernel *kernelBytes[5];
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::CopyImage3dToBufferStateless> : public BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltInOp<EBuiltInOps::CopyImage3dToBuffer>(kernelsLib) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyImage3dToBufferStateless,
|
||||
"-cl-intel-greater-than-4GB-buffer-required",
|
||||
"CopyImage3dToBufferBytes", kernelBytes[0],
|
||||
"CopyImage3dToBuffer2Bytes", kernelBytes[1],
|
||||
"CopyImage3dToBuffer4Bytes", kernelBytes[2],
|
||||
"CopyImage3dToBuffer8Bytes", kernelBytes[3],
|
||||
"CopyImage3dToBuffer16Bytes", kernelBytes[4]);
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
|
||||
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo, operationParams);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
@ -884,6 +911,9 @@ BuiltinDispatchInfoBuilder &BuiltIns::getBuiltinDispatchInfoBuilder(EBuiltInOps:
|
||||
case EBuiltInOps::CopyImage3dToBuffer:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyImage3dToBuffer>>(*this, context, device); });
|
||||
break;
|
||||
case EBuiltInOps::CopyImage3dToBufferStateless:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyImage3dToBufferStateless>>(*this, context, device); });
|
||||
break;
|
||||
case EBuiltInOps::CopyImageToImage3d:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyImageToImage3d>>(*this, context, device); });
|
||||
break;
|
||||
|
@ -40,6 +40,8 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
|
||||
return "copy_buffer_to_image3d_stateless.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImage3dToBuffer:
|
||||
return "copy_image3d_to_buffer.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImage3dToBufferStateless:
|
||||
return "copy_image3d_to_buffer_stateless.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImageToImage1d:
|
||||
return "copy_image_to_image1d.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImageToImage2d:
|
||||
|
@ -28,6 +28,7 @@ set(GENERATED_BUILTINS_STATELESS
|
||||
"copy_buffer_to_buffer_stateless"
|
||||
"copy_buffer_rect_stateless"
|
||||
"copy_buffer_to_image3d_stateless"
|
||||
"copy_image3d_to_buffer_stateless"
|
||||
"fill_buffer_stateless"
|
||||
)
|
||||
|
||||
|
@ -0,0 +1,154 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2019, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
}
|
||||
else{
|
||||
uint2 d = (uint2)(c.x,c.y);
|
||||
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x0000000f){
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
}
|
||||
else{
|
||||
*(__global uint4*)(dst + DstOffset + x * 16) = c;
|
||||
}
|
||||
}
|
||||
)==="
|
@ -92,6 +92,15 @@ static RegisterEmbeddedResource registerCopyImage3dToBufferSrc(
|
||||
#include "runtime/built_ins/kernels/copy_image3d_to_buffer.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImage3dToBufferStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImage3dToBufferStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/copy_image3d_to_buffer_stateless.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyImageToImage1dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyImageToImage1d,
|
||||
|
@ -31,10 +31,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImageToBuffer(
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
|
||||
MultiDispatchInfo di;
|
||||
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer,
|
||||
this->getContext(), this->getDevice());
|
||||
auto eBuiltInOpsType = EBuiltInOps::CopyImage3dToBuffer;
|
||||
if (forceStateless(dstBuffer->getSize())) {
|
||||
eBuiltInOpsType = EBuiltInOps::CopyImage3dToBufferStateless;
|
||||
}
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(eBuiltInOpsType,
|
||||
this->getContext(),
|
||||
this->getDevice());
|
||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||
|
||||
MemObjSurface srcImgSurf(srcImage);
|
||||
@ -50,12 +53,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImageToBuffer(
|
||||
if (srcImage->getImageDesc().num_mip_levels > 0) {
|
||||
dc.srcMipLevel = findMipLevel(srcImage->getImageDesc().image_type, srcOrigin);
|
||||
}
|
||||
builder.buildDispatchInfos(di, dc);
|
||||
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
builder.buildDispatchInfos(dispatchInfo, dc);
|
||||
|
||||
enqueueHandler<CL_COMMAND_COPY_IMAGE_TO_BUFFER>(
|
||||
surfaces,
|
||||
false,
|
||||
di,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
@ -801,6 +801,40 @@ TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStatelessI
|
||||
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStatelessIsUsedThenParamsAreCorrect) {
|
||||
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||
|
||||
MockBuffer dstBuffer;
|
||||
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||
std ::unique_ptr<Image> pSrcImage(Image2dHelper<>::create(pContext));
|
||||
ASSERT_NE(nullptr, pSrcImage.get());
|
||||
|
||||
auto &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBufferStateless, *pContext, *pDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = pSrcImage.get();
|
||||
dc.dstMemObj = &dstBuffer;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {static_cast<size_t>(bigOffset), 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, dc));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderCopyBufferToBufferWithSourceOffsetUnalignedToFour) {
|
||||
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pContext, *pDevice);
|
||||
|
||||
|
@ -114,7 +114,7 @@ struct CommandQueueStateful : public CommandQueueHw<FamilyType> {
|
||||
if (!device.areSharedSystemAllocationsAllowed()) {
|
||||
EXPECT_FALSE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
if (device.getHardwareCapabilities().isStatelesToStatefullWithOffsetSupported) {
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
EXPECT_TRUE(kernel->allBufferArgsStateful);
|
||||
}
|
||||
} else {
|
||||
EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "unit_tests/command_queue/enqueue_copy_image_to_buffer_fixture.h"
|
||||
#include "unit_tests/gen_common/gen_commands_common_validation.h"
|
||||
#include "unit_tests/helpers/unit_test_helper.h"
|
||||
#include "unit_tests/mocks/mock_buffer.h"
|
||||
#include "unit_tests/mocks/mock_builtin_dispatch_info_builder.h"
|
||||
#include "unit_tests/mocks/mock_builtins.h"
|
||||
|
||||
@ -272,3 +273,62 @@ HWTEST_P(MipMapCopyImageToBufferTest, GivenImageWithMipLevelNonZeroWhenCopyImage
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MipMapCopyImageToBufferTest_GivenImageWithMipLevelNonZeroWhenCopyImageToBufferIsCalledThenProperMipLevelIsSet,
|
||||
MipMapCopyImageToBufferTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D));
|
||||
|
||||
struct EnqueueCopyImageToBufferHw : public ::testing::Test {
|
||||
|
||||
void SetUp() override {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
device.reset(MockDevice::createWithNewExecutionEnvironment<MockDevice>(*platformDevices));
|
||||
context = std::make_unique<MockContext>(device.get());
|
||||
srcImage = std::unique_ptr<Image>(Image2dHelper<>::create(context.get()));
|
||||
}
|
||||
|
||||
std::unique_ptr<MockDevice> device;
|
||||
std::unique_ptr<MockContext> context;
|
||||
std::unique_ptr<Image> srcImage;
|
||||
MockBuffer dstBuffer;
|
||||
uint64_t bigSize = 5ull * MemoryConstants::gigaByte;
|
||||
uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1;
|
||||
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
|
||||
|
||||
const size_t srcOrigin[3] = {0, 0, 0};
|
||||
const size_t region[3] = {4, 1, 1};
|
||||
};
|
||||
|
||||
using EnqueueCopyImageToBufferHwStatelessTest = EnqueueCopyImageToBufferHw;
|
||||
|
||||
HWTEST_F(EnqueueCopyImageToBufferHwStatelessTest, givenBigBufferWhenCopyingImageToBufferStatelessThenSuccessIsReturned) {
|
||||
auto cmdQ = std::make_unique<CommandQueueStateless<FamilyType>>(context.get(), device.get());
|
||||
dstBuffer.size = static_cast<size_t>(bigSize);
|
||||
auto retVal = cmdQ->enqueueCopyImageToBuffer(
|
||||
srcImage.get(),
|
||||
&dstBuffer,
|
||||
srcOrigin,
|
||||
region,
|
||||
static_cast<size_t>(bigOffset),
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
using EnqueueCopyImageToBufferStatefulTest = EnqueueCopyImageToBufferHw;
|
||||
|
||||
HWTEST_F(EnqueueCopyImageToBufferStatefulTest, givenBufferWhenCopyingImageToBufferStatefulThenSuccessIsReturned) {
|
||||
auto cmdQ = std::make_unique<CommandQueueStateful<FamilyType>>(context.get(), device.get());
|
||||
dstBuffer.size = static_cast<size_t>(smallSize);
|
||||
auto retVal = cmdQ->enqueueCopyImageToBuffer(
|
||||
srcImage.get(),
|
||||
&dstBuffer,
|
||||
srcOrigin,
|
||||
region,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
Reference in New Issue
Block a user