Add support for stateless copy image to buffer

Change-Id: I91d1a45d87a5984e0bb7fdb302a716ffcea7bfc8
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
Related-To: NEO-3314
This commit is contained in:
Kamil Kopryk
2019-11-14 15:48:30 +01:00
committed by sys_ocldev
parent f2d0606301
commit 5ecb9905c9
11 changed files with 328 additions and 31 deletions

View File

@ -31,6 +31,7 @@ set(RUNTIME_SRCS_BUILT_IN_KERNELS
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer_stateless.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.igdrcl_built_in

View File

@ -22,17 +22,18 @@ constexpr Type FillBufferStateless{6};
constexpr Type CopyBufferToImage3d{7};
constexpr Type CopyBufferToImage3dStateless{8};
constexpr Type CopyImage3dToBuffer{9};
constexpr Type CopyImageToImage1d{10};
constexpr Type CopyImageToImage2d{11};
constexpr Type CopyImageToImage3d{12};
constexpr Type FillImage1d{13};
constexpr Type FillImage2d{14};
constexpr Type FillImage3d{15};
constexpr Type VmeBlockMotionEstimateIntel{16};
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{17};
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{18};
constexpr Type Scheduler{19};
constexpr Type CopyImage3dToBufferStateless{10};
constexpr Type CopyImageToImage1d{11};
constexpr Type CopyImageToImage2d{12};
constexpr Type CopyImageToImage3d{13};
constexpr Type FillImage1d{14};
constexpr Type FillImage2d{15};
constexpr Type FillImage3d{16};
constexpr Type VmeBlockMotionEstimateIntel{17};
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{18};
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{19};
constexpr Type Scheduler{20};
constexpr uint32_t MaxBaseValue{19};
constexpr uint32_t MaxBaseValue{20};
} // namespace EBuiltInOps
} // namespace NEO

View File

@ -568,10 +568,10 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z};
auto srcRowPitch = static_cast<size_t>(operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel);
auto srcRowPitch = operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel;
auto srcSlicePitch = static_cast<size_t>(
operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch));
auto srcSlicePitch =
operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch);
// Determine size of host ptr surface for residency purposes
size_t hostPtrSize = operationParams.srcPtr ? Image::calculateHostPtrSize(region, srcRowPitch, srcSlicePitch, bytesPerPixel, dstImage->getImageDesc().image_type) : 0;
@ -645,7 +645,7 @@ template <>
class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBuilder {
public:
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
: BuiltinDispatchInfoBuilder(kernelsLib), kernelBytes{nullptr} {
: BuiltinDispatchInfoBuilder(kernelsLib) {
populate(context, device,
EBuiltInOps::CopyImage3dToBuffer,
"",
@ -657,6 +657,16 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
}
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
return buildDispatchInfosTyped<uint32_t>(multiDispatchInfo, operationParams);
}
protected:
Kernel *kernelBytes[5] = {nullptr};
BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib) {}
template <typename OffsetType>
bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const {
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::NoSplit> kernelNoSplit3DBuilder;
multiDispatchInfo.setBuiltinOpParams(operationParams);
DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && ((operationParams.dstPtr != nullptr) || (operationParams.dstMemObj != nullptr))));
@ -672,10 +682,10 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z};
auto dstRowPitch = static_cast<uint32_t>(operationParams.srcRowPitch ? operationParams.srcRowPitch : region[0] * bytesPerPixel);
auto dstRowPitch = operationParams.srcRowPitch ? operationParams.srcRowPitch : region[0] * bytesPerPixel;
auto dstSlicePitch = static_cast<uint32_t>(
operationParams.srcSlicePitch ? operationParams.srcSlicePitch : ((srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch));
auto dstSlicePitch =
operationParams.srcSlicePitch ? operationParams.srcSlicePitch : ((srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch);
// Determine size of host ptr surface for residency purposes
size_t hostPtrSize = operationParams.dstPtr ? Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, srcImage->getImageDesc().image_type) : 0;
@ -707,13 +717,13 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
}
// Set-up dstOffset
kernelNoSplit3DBuilder.setArg(3, static_cast<uint32_t>(operationParams.dstOffset.x));
kernelNoSplit3DBuilder.setArg(3, static_cast<OffsetType>(operationParams.dstOffset.x));
// Set-up dstRowPitch
{
uint32_t pitch[] = {
static_cast<uint32_t>(dstRowPitch),
static_cast<uint32_t>(dstSlicePitch)};
OffsetType pitch[] = {
static_cast<OffsetType>(dstRowPitch),
static_cast<OffsetType>(dstSlicePitch)};
kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch);
}
@ -723,9 +733,26 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
return true;
}
};
protected:
Kernel *kernelBytes[5];
template <>
class BuiltInOp<EBuiltInOps::CopyImage3dToBufferStateless> : public BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> {
public:
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
: BuiltInOp<EBuiltInOps::CopyImage3dToBuffer>(kernelsLib) {
populate(context, device,
EBuiltInOps::CopyImage3dToBufferStateless,
"-cl-intel-greater-than-4GB-buffer-required",
"CopyImage3dToBufferBytes", kernelBytes[0],
"CopyImage3dToBuffer2Bytes", kernelBytes[1],
"CopyImage3dToBuffer4Bytes", kernelBytes[2],
"CopyImage3dToBuffer8Bytes", kernelBytes[3],
"CopyImage3dToBuffer16Bytes", kernelBytes[4]);
}
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override {
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo, operationParams);
}
};
template <>
@ -884,6 +911,9 @@ BuiltinDispatchInfoBuilder &BuiltIns::getBuiltinDispatchInfoBuilder(EBuiltInOps:
case EBuiltInOps::CopyImage3dToBuffer:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyImage3dToBuffer>>(*this, context, device); });
break;
case EBuiltInOps::CopyImage3dToBufferStateless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyImage3dToBufferStateless>>(*this, context, device); });
break;
case EBuiltInOps::CopyImageToImage3d:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyImageToImage3d>>(*this, context, device); });
break;

View File

@ -40,6 +40,8 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
return "copy_buffer_to_image3d_stateless.igdrcl_built_in";
case EBuiltInOps::CopyImage3dToBuffer:
return "copy_image3d_to_buffer.igdrcl_built_in";
case EBuiltInOps::CopyImage3dToBufferStateless:
return "copy_image3d_to_buffer_stateless.igdrcl_built_in";
case EBuiltInOps::CopyImageToImage1d:
return "copy_image_to_image1d.igdrcl_built_in";
case EBuiltInOps::CopyImageToImage2d:

View File

@ -28,6 +28,7 @@ set(GENERATED_BUILTINS_STATELESS
"copy_buffer_to_buffer_stateless"
"copy_buffer_rect_stateless"
"copy_buffer_to_image3d_stateless"
"copy_image3d_to_buffer_stateless"
"fill_buffer_stateless"
)

View File

@ -0,0 +1,154 @@
/*
* Copyright (c) 2017-2019, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
R"===(
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
}
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000001){
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
}
}
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000003){
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
}
else{
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
}
}
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x00000007){
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
}
else{
uint2 d = (uint2)(c.x,c.y);
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
}
}
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
__global uchar *dst,
int4 srcOffset,
ulong dstOffset,
ulong2 Pitch) {
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint z = get_global_id(2);
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
const uint4 c = read_imageui(input, srcCoord);
if(( ulong )(dst + dstOffset) & 0x0000000f){
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
}
else{
*(__global uint4*)(dst + DstOffset + x * 16) = c;
}
}
)==="

View File

@ -92,6 +92,15 @@ static RegisterEmbeddedResource registerCopyImage3dToBufferSrc(
#include "runtime/built_ins/kernels/copy_image3d_to_buffer.igdrcl_built_in"
));
static RegisterEmbeddedResource registerCopyImage3dToBufferStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImage3dToBufferStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "runtime/built_ins/kernels/copy_image3d_to_buffer_stateless.igdrcl_built_in"
));
static RegisterEmbeddedResource registerCopyImageToImage1dSrc(
createBuiltinResourceName(
EBuiltInOps::CopyImageToImage1d,

View File

@ -31,10 +31,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImageToBuffer(
const cl_event *eventWaitList,
cl_event *event) {
MultiDispatchInfo di;
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer,
this->getContext(), this->getDevice());
auto eBuiltInOpsType = EBuiltInOps::CopyImage3dToBuffer;
if (forceStateless(dstBuffer->getSize())) {
eBuiltInOpsType = EBuiltInOps::CopyImage3dToBufferStateless;
}
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(eBuiltInOpsType,
this->getContext(),
this->getDevice());
BuiltInOwnershipWrapper builtInLock(builder, this->context);
MemObjSurface srcImgSurf(srcImage);
@ -50,12 +53,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImageToBuffer(
if (srcImage->getImageDesc().num_mip_levels > 0) {
dc.srcMipLevel = findMipLevel(srcImage->getImageDesc().image_type, srcOrigin);
}
builder.buildDispatchInfos(di, dc);
MultiDispatchInfo dispatchInfo;
builder.buildDispatchInfos(dispatchInfo, dc);
enqueueHandler<CL_COMMAND_COPY_IMAGE_TO_BUFFER>(
surfaces,
false,
di,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);

View File

@ -801,6 +801,40 @@ TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStatelessI
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
}
TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStatelessIsUsedThenParamsAreCorrect) {
if (is32bit) {
GTEST_SKIP();
}
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
std ::unique_ptr<Image> pSrcImage(Image2dHelper<>::create(pContext));
ASSERT_NE(nullptr, pSrcImage.get());
auto &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBufferStateless, *pContext, *pDevice);
BuiltinOpParams dc;
dc.srcMemObj = pSrcImage.get();
dc.dstMemObj = &dstBuffer;
dc.srcOffset = {0, 0, 0};
dc.dstOffset = {static_cast<size_t>(bigOffset), 0, 0};
dc.size = {1, 1, 1};
MultiDispatchInfo multiDispatchInfo;
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, dc));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
}
TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderCopyBufferToBufferWithSourceOffsetUnalignedToFour) {
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pContext, *pDevice);

View File

@ -114,7 +114,7 @@ struct CommandQueueStateful : public CommandQueueHw<FamilyType> {
if (!device.areSharedSystemAllocationsAllowed()) {
EXPECT_FALSE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
if (device.getHardwareCapabilities().isStatelesToStatefullWithOffsetSupported) {
EXPECT_TRUE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
EXPECT_TRUE(kernel->allBufferArgsStateful);
}
} else {
EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);

View File

@ -10,6 +10,7 @@
#include "unit_tests/command_queue/enqueue_copy_image_to_buffer_fixture.h"
#include "unit_tests/gen_common/gen_commands_common_validation.h"
#include "unit_tests/helpers/unit_test_helper.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "unit_tests/mocks/mock_builtin_dispatch_info_builder.h"
#include "unit_tests/mocks/mock_builtins.h"
@ -272,3 +273,62 @@ HWTEST_P(MipMapCopyImageToBufferTest, GivenImageWithMipLevelNonZeroWhenCopyImage
INSTANTIATE_TEST_CASE_P(MipMapCopyImageToBufferTest_GivenImageWithMipLevelNonZeroWhenCopyImageToBufferIsCalledThenProperMipLevelIsSet,
MipMapCopyImageToBufferTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D));
struct EnqueueCopyImageToBufferHw : public ::testing::Test {
void SetUp() override {
if (is32bit) {
GTEST_SKIP();
}
device.reset(MockDevice::createWithNewExecutionEnvironment<MockDevice>(*platformDevices));
context = std::make_unique<MockContext>(device.get());
srcImage = std::unique_ptr<Image>(Image2dHelper<>::create(context.get()));
}
std::unique_ptr<MockDevice> device;
std::unique_ptr<MockContext> context;
std::unique_ptr<Image> srcImage;
MockBuffer dstBuffer;
uint64_t bigSize = 5ull * MemoryConstants::gigaByte;
uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
const size_t srcOrigin[3] = {0, 0, 0};
const size_t region[3] = {4, 1, 1};
};
using EnqueueCopyImageToBufferHwStatelessTest = EnqueueCopyImageToBufferHw;
HWTEST_F(EnqueueCopyImageToBufferHwStatelessTest, givenBigBufferWhenCopyingImageToBufferStatelessThenSuccessIsReturned) {
auto cmdQ = std::make_unique<CommandQueueStateless<FamilyType>>(context.get(), device.get());
dstBuffer.size = static_cast<size_t>(bigSize);
auto retVal = cmdQ->enqueueCopyImageToBuffer(
srcImage.get(),
&dstBuffer,
srcOrigin,
region,
static_cast<size_t>(bigOffset),
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
}
using EnqueueCopyImageToBufferStatefulTest = EnqueueCopyImageToBufferHw;
HWTEST_F(EnqueueCopyImageToBufferStatefulTest, givenBufferWhenCopyingImageToBufferStatefulThenSuccessIsReturned) {
auto cmdQ = std::make_unique<CommandQueueStateful<FamilyType>>(context.get(), device.get());
dstBuffer.size = static_cast<size_t>(smallSize);
auto retVal = cmdQ->enqueueCopyImageToBuffer(
srcImage.get(),
&dstBuffer,
srcOrigin,
region,
0,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
}