From 5ecb9905c94bc0500f1dee4cd360e47ddfbbf48c Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Thu, 14 Nov 2019 15:48:30 +0100 Subject: [PATCH] Add support for stateless copy image to buffer Change-Id: I91d1a45d87a5984e0bb7fdb302a716ffcea7bfc8 Signed-off-by: Kamil Kopryk Related-To: NEO-3314 --- runtime/built_ins/CMakeLists.txt | 1 + runtime/built_ins/built_in_ops_base.h | 23 +-- runtime/built_ins/built_ins.cpp | 56 +++++-- runtime/built_ins/built_ins_storage.cpp | 2 + runtime/built_ins/builtins_binary.cmake | 1 + ...mage3d_to_buffer_stateless.igdrcl_built_in | 154 ++++++++++++++++++ .../registry/register_copy_kernels_source.cpp | 9 + .../enqueue_copy_image_to_buffer.h | 17 +- unit_tests/built_ins/built_in_tests.cpp | 34 ++++ .../command_queue/command_enqueue_fixture.h | 2 +- .../enqueue_copy_image_to_buffer_tests.cpp | 60 +++++++ 11 files changed, 328 insertions(+), 31 deletions(-) create mode 100644 runtime/built_ins/kernels/copy_image3d_to_buffer_stateless.igdrcl_built_in diff --git a/runtime/built_ins/CMakeLists.txt b/runtime/built_ins/CMakeLists.txt index 12c9715ad4..a6b5047183 100644 --- a/runtime/built_ins/CMakeLists.txt +++ b/runtime/built_ins/CMakeLists.txt @@ -31,6 +31,7 @@ set(RUNTIME_SRCS_BUILT_IN_KERNELS ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.igdrcl_built_in ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.igdrcl_built_in ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.igdrcl_built_in + ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer_stateless.igdrcl_built_in ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.igdrcl_built_in ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.igdrcl_built_in ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.igdrcl_built_in diff --git a/runtime/built_ins/built_in_ops_base.h b/runtime/built_ins/built_in_ops_base.h index cda3443135..edd05369a2 100644 --- a/runtime/built_ins/built_in_ops_base.h +++ b/runtime/built_ins/built_in_ops_base.h @@ -22,17 +22,18 @@ constexpr Type FillBufferStateless{6}; constexpr Type CopyBufferToImage3d{7}; constexpr Type CopyBufferToImage3dStateless{8}; constexpr Type CopyImage3dToBuffer{9}; -constexpr Type CopyImageToImage1d{10}; -constexpr Type CopyImageToImage2d{11}; -constexpr Type CopyImageToImage3d{12}; -constexpr Type FillImage1d{13}; -constexpr Type FillImage2d{14}; -constexpr Type FillImage3d{15}; -constexpr Type VmeBlockMotionEstimateIntel{16}; -constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{17}; -constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{18}; -constexpr Type Scheduler{19}; +constexpr Type CopyImage3dToBufferStateless{10}; +constexpr Type CopyImageToImage1d{11}; +constexpr Type CopyImageToImage2d{12}; +constexpr Type CopyImageToImage3d{13}; +constexpr Type FillImage1d{14}; +constexpr Type FillImage2d{15}; +constexpr Type FillImage3d{16}; +constexpr Type VmeBlockMotionEstimateIntel{17}; +constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{18}; +constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{19}; +constexpr Type Scheduler{20}; -constexpr uint32_t MaxBaseValue{19}; +constexpr uint32_t MaxBaseValue{20}; } // namespace EBuiltInOps } // namespace NEO diff --git a/runtime/built_ins/built_ins.cpp b/runtime/built_ins/built_ins.cpp index 6e201f7f82..4b8eee0f3a 100644 --- a/runtime/built_ins/built_ins.cpp +++ b/runtime/built_ins/built_ins.cpp @@ -568,10 +568,10 @@ class BuiltInOp : public BuiltinDispatchInfoBu size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; - auto srcRowPitch = static_cast(operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel); + auto srcRowPitch = operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel; - auto srcSlicePitch = static_cast( - operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch)); + auto srcSlicePitch = + operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch); // Determine size of host ptr surface for residency purposes size_t hostPtrSize = operationParams.srcPtr ? Image::calculateHostPtrSize(region, srcRowPitch, srcSlicePitch, bytesPerPixel, dstImage->getImageDesc().image_type) : 0; @@ -645,7 +645,7 @@ template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device) - : BuiltinDispatchInfoBuilder(kernelsLib), kernelBytes{nullptr} { + : BuiltinDispatchInfoBuilder(kernelsLib) { populate(context, device, EBuiltInOps::CopyImage3dToBuffer, "", @@ -657,6 +657,16 @@ class BuiltInOp : public BuiltinDispatchInfoBu } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { + return buildDispatchInfosTyped(multiDispatchInfo, operationParams); + } + + protected: + Kernel *kernelBytes[5] = {nullptr}; + + BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib) {} + + template + bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { DispatchInfoBuilder kernelNoSplit3DBuilder; multiDispatchInfo.setBuiltinOpParams(operationParams); DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && ((operationParams.dstPtr != nullptr) || (operationParams.dstMemObj != nullptr)))); @@ -672,10 +682,10 @@ class BuiltInOp : public BuiltinDispatchInfoBu size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; - auto dstRowPitch = static_cast(operationParams.srcRowPitch ? operationParams.srcRowPitch : region[0] * bytesPerPixel); + auto dstRowPitch = operationParams.srcRowPitch ? operationParams.srcRowPitch : region[0] * bytesPerPixel; - auto dstSlicePitch = static_cast( - operationParams.srcSlicePitch ? operationParams.srcSlicePitch : ((srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch)); + auto dstSlicePitch = + operationParams.srcSlicePitch ? operationParams.srcSlicePitch : ((srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch); // Determine size of host ptr surface for residency purposes size_t hostPtrSize = operationParams.dstPtr ? Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, srcImage->getImageDesc().image_type) : 0; @@ -707,13 +717,13 @@ class BuiltInOp : public BuiltinDispatchInfoBu } // Set-up dstOffset - kernelNoSplit3DBuilder.setArg(3, static_cast(operationParams.dstOffset.x)); + kernelNoSplit3DBuilder.setArg(3, static_cast(operationParams.dstOffset.x)); // Set-up dstRowPitch { - uint32_t pitch[] = { - static_cast(dstRowPitch), - static_cast(dstSlicePitch)}; + OffsetType pitch[] = { + static_cast(dstRowPitch), + static_cast(dstSlicePitch)}; kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch); } @@ -723,9 +733,26 @@ class BuiltInOp : public BuiltinDispatchInfoBu return true; } +}; - protected: - Kernel *kernelBytes[5]; +template <> +class BuiltInOp : public BuiltInOp { + public: + BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device) + : BuiltInOp(kernelsLib) { + populate(context, device, + EBuiltInOps::CopyImage3dToBufferStateless, + "-cl-intel-greater-than-4GB-buffer-required", + "CopyImage3dToBufferBytes", kernelBytes[0], + "CopyImage3dToBuffer2Bytes", kernelBytes[1], + "CopyImage3dToBuffer4Bytes", kernelBytes[2], + "CopyImage3dToBuffer8Bytes", kernelBytes[3], + "CopyImage3dToBuffer16Bytes", kernelBytes[4]); + } + + bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { + return buildDispatchInfosTyped(multiDispatchInfo, operationParams); + } }; template <> @@ -884,6 +911,9 @@ BuiltinDispatchInfoBuilder &BuiltIns::getBuiltinDispatchInfoBuilder(EBuiltInOps: case EBuiltInOps::CopyImage3dToBuffer: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*this, context, device); }); break; + case EBuiltInOps::CopyImage3dToBufferStateless: + std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*this, context, device); }); + break; case EBuiltInOps::CopyImageToImage3d: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*this, context, device); }); break; diff --git a/runtime/built_ins/built_ins_storage.cpp b/runtime/built_ins/built_ins_storage.cpp index 4c67e81765..96352b8ee7 100644 --- a/runtime/built_ins/built_ins_storage.cpp +++ b/runtime/built_ins/built_ins_storage.cpp @@ -40,6 +40,8 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) { return "copy_buffer_to_image3d_stateless.igdrcl_built_in"; case EBuiltInOps::CopyImage3dToBuffer: return "copy_image3d_to_buffer.igdrcl_built_in"; + case EBuiltInOps::CopyImage3dToBufferStateless: + return "copy_image3d_to_buffer_stateless.igdrcl_built_in"; case EBuiltInOps::CopyImageToImage1d: return "copy_image_to_image1d.igdrcl_built_in"; case EBuiltInOps::CopyImageToImage2d: diff --git a/runtime/built_ins/builtins_binary.cmake b/runtime/built_ins/builtins_binary.cmake index 07f015cc7b..a8f2263923 100644 --- a/runtime/built_ins/builtins_binary.cmake +++ b/runtime/built_ins/builtins_binary.cmake @@ -28,6 +28,7 @@ set(GENERATED_BUILTINS_STATELESS "copy_buffer_to_buffer_stateless" "copy_buffer_rect_stateless" "copy_buffer_to_image3d_stateless" + "copy_image3d_to_buffer_stateless" "fill_buffer_stateless" ) diff --git a/runtime/built_ins/kernels/copy_image3d_to_buffer_stateless.igdrcl_built_in b/runtime/built_ins/kernels/copy_image3d_to_buffer_stateless.igdrcl_built_in new file mode 100644 index 0000000000..cf4d0c5d44 --- /dev/null +++ b/runtime/built_ins/kernels/copy_image3d_to_buffer_stateless.igdrcl_built_in @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2017-2019, Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +R"===( +__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input, + __global uchar *dst, + int4 srcOffset, + ulong dstOffset, + ulong2 Pitch) { + const uint x = get_global_id(0); + const uint y = get_global_id(1); + const uint z = get_global_id(2); + + const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; + ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); + + uint4 c = read_imageui(input, srcCoord); + *(dst + DstOffset + x) = convert_uchar_sat(c.x); +} + +__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input, + __global uchar *dst, + int4 srcOffset, + ulong dstOffset, + ulong2 Pitch) { + const uint x = get_global_id(0); + const uint y = get_global_id(1); + const uint z = get_global_id(2); + + const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; + ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); + + uint4 c = read_imageui(input, srcCoord); + + if(( ulong )(dst + dstOffset) & 0x00000001){ + *((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff); + } + else{ + *((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x); + } +} + +__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input, + __global uchar *dst, + int4 srcOffset, + ulong dstOffset, + ulong2 Pitch) { + const uint x = get_global_id(0); + const uint y = get_global_id(1); + const uint z = get_global_id(2); + + const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; + ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); + + uint4 c = read_imageui(input, srcCoord); + + if(( ulong )(dst + dstOffset) & 0x00000003){ + *((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff); + } + else{ + *((__global uint*)(dst + DstOffset + x * 4)) = c.x; + } +} + +__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input, + __global uchar *dst, + int4 srcOffset, + ulong dstOffset, + ulong2 Pitch) { + const uint x = get_global_id(0); + const uint y = get_global_id(1); + const uint z = get_global_id(2); + + const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; + ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); + + uint4 c = read_imageui(input, srcCoord); + + if(( ulong )(dst + dstOffset) & 0x00000007){ + *((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff); + *((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff); + } + else{ + uint2 d = (uint2)(c.x,c.y); + *((__global uint2*)(dst + DstOffset + x * 8)) = d; + } +} + +__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input, + __global uchar *dst, + int4 srcOffset, + ulong dstOffset, + ulong2 Pitch) { + const uint x = get_global_id(0); + const uint y = get_global_id(1); + const uint z = get_global_id(2); + + const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; + ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); + + const uint4 c = read_imageui(input, srcCoord); + + if(( ulong )(dst + dstOffset) & 0x0000000f){ + *((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff); + *((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff); + } + else{ + *(__global uint4*)(dst + DstOffset + x * 16) = c; + } +} +)===" diff --git a/runtime/built_ins/registry/register_copy_kernels_source.cpp b/runtime/built_ins/registry/register_copy_kernels_source.cpp index b72f8c76a7..2108d4bb7a 100644 --- a/runtime/built_ins/registry/register_copy_kernels_source.cpp +++ b/runtime/built_ins/registry/register_copy_kernels_source.cpp @@ -92,6 +92,15 @@ static RegisterEmbeddedResource registerCopyImage3dToBufferSrc( #include "runtime/built_ins/kernels/copy_image3d_to_buffer.igdrcl_built_in" )); +static RegisterEmbeddedResource registerCopyImage3dToBufferStatelessSrc( + createBuiltinResourceName( + EBuiltInOps::CopyImage3dToBufferStateless, + BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) + .c_str(), + std::string( +#include "runtime/built_ins/kernels/copy_image3d_to_buffer_stateless.igdrcl_built_in" + )); + static RegisterEmbeddedResource registerCopyImageToImage1dSrc( createBuiltinResourceName( EBuiltInOps::CopyImageToImage1d, diff --git a/runtime/command_queue/enqueue_copy_image_to_buffer.h b/runtime/command_queue/enqueue_copy_image_to_buffer.h index 233673898b..6578e67837 100644 --- a/runtime/command_queue/enqueue_copy_image_to_buffer.h +++ b/runtime/command_queue/enqueue_copy_image_to_buffer.h @@ -31,10 +31,13 @@ cl_int CommandQueueHw::enqueueCopyImageToBuffer( const cl_event *eventWaitList, cl_event *event) { - MultiDispatchInfo di; - - auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, - this->getContext(), this->getDevice()); + auto eBuiltInOpsType = EBuiltInOps::CopyImage3dToBuffer; + if (forceStateless(dstBuffer->getSize())) { + eBuiltInOpsType = EBuiltInOps::CopyImage3dToBufferStateless; + } + auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(eBuiltInOpsType, + this->getContext(), + this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); MemObjSurface srcImgSurf(srcImage); @@ -50,12 +53,14 @@ cl_int CommandQueueHw::enqueueCopyImageToBuffer( if (srcImage->getImageDesc().num_mip_levels > 0) { dc.srcMipLevel = findMipLevel(srcImage->getImageDesc().image_type, srcOrigin); } - builder.buildDispatchInfos(di, dc); + + MultiDispatchInfo dispatchInfo; + builder.buildDispatchInfos(dispatchInfo, dc); enqueueHandler( surfaces, false, - di, + dispatchInfo, numEventsInWaitList, eventWaitList, event); diff --git a/unit_tests/built_ins/built_in_tests.cpp b/unit_tests/built_ins/built_in_tests.cpp index 5d59c52b12..ee72f7c6e0 100644 --- a/unit_tests/built_ins/built_in_tests.cpp +++ b/unit_tests/built_ins/built_in_tests.cpp @@ -801,6 +801,40 @@ TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStatelessI EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } +TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStatelessIsUsedThenParamsAreCorrect) { + + if (is32bit) { + GTEST_SKIP(); + } + + uint64_t bigSize = 10ull * MemoryConstants::gigaByte; + uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; + + MockBuffer dstBuffer; + dstBuffer.size = static_cast(bigSize); + std ::unique_ptr pSrcImage(Image2dHelper<>::create(pContext)); + ASSERT_NE(nullptr, pSrcImage.get()); + + auto &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBufferStateless, *pContext, *pDevice); + + BuiltinOpParams dc; + dc.srcMemObj = pSrcImage.get(); + dc.dstMemObj = &dstBuffer; + dc.srcOffset = {0, 0, 0}; + dc.dstOffset = {static_cast(bigOffset), 0, 0}; + dc.size = {1, 1, 1}; + + MultiDispatchInfo multiDispatchInfo; + ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, dc)); + EXPECT_EQ(1u, multiDispatchInfo.size()); + EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc)); + + auto kernel = multiDispatchInfo.begin()->getKernel(); + ASSERT_NE(nullptr, kernel); + EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); + EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); +} + TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderCopyBufferToBufferWithSourceOffsetUnalignedToFour) { BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pContext, *pDevice); diff --git a/unit_tests/command_queue/command_enqueue_fixture.h b/unit_tests/command_queue/command_enqueue_fixture.h index aca4a167b5..86c2e6cf93 100644 --- a/unit_tests/command_queue/command_enqueue_fixture.h +++ b/unit_tests/command_queue/command_enqueue_fixture.h @@ -114,7 +114,7 @@ struct CommandQueueStateful : public CommandQueueHw { if (!device.areSharedSystemAllocationsAllowed()) { EXPECT_FALSE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); if (device.getHardwareCapabilities().isStatelesToStatefullWithOffsetSupported) { - EXPECT_TRUE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->allBufferArgsStateful); } } else { EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); diff --git a/unit_tests/command_queue/enqueue_copy_image_to_buffer_tests.cpp b/unit_tests/command_queue/enqueue_copy_image_to_buffer_tests.cpp index 59829e62f6..3306e53a88 100644 --- a/unit_tests/command_queue/enqueue_copy_image_to_buffer_tests.cpp +++ b/unit_tests/command_queue/enqueue_copy_image_to_buffer_tests.cpp @@ -10,6 +10,7 @@ #include "unit_tests/command_queue/enqueue_copy_image_to_buffer_fixture.h" #include "unit_tests/gen_common/gen_commands_common_validation.h" #include "unit_tests/helpers/unit_test_helper.h" +#include "unit_tests/mocks/mock_buffer.h" #include "unit_tests/mocks/mock_builtin_dispatch_info_builder.h" #include "unit_tests/mocks/mock_builtins.h" @@ -272,3 +273,62 @@ HWTEST_P(MipMapCopyImageToBufferTest, GivenImageWithMipLevelNonZeroWhenCopyImage INSTANTIATE_TEST_CASE_P(MipMapCopyImageToBufferTest_GivenImageWithMipLevelNonZeroWhenCopyImageToBufferIsCalledThenProperMipLevelIsSet, MipMapCopyImageToBufferTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); + +struct EnqueueCopyImageToBufferHw : public ::testing::Test { + + void SetUp() override { + if (is32bit) { + GTEST_SKIP(); + } + device.reset(MockDevice::createWithNewExecutionEnvironment(*platformDevices)); + context = std::make_unique(device.get()); + srcImage = std::unique_ptr(Image2dHelper<>::create(context.get())); + } + + std::unique_ptr device; + std::unique_ptr context; + std::unique_ptr srcImage; + MockBuffer dstBuffer; + uint64_t bigSize = 5ull * MemoryConstants::gigaByte; + uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; + uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; + + const size_t srcOrigin[3] = {0, 0, 0}; + const size_t region[3] = {4, 1, 1}; +}; + +using EnqueueCopyImageToBufferHwStatelessTest = EnqueueCopyImageToBufferHw; + +HWTEST_F(EnqueueCopyImageToBufferHwStatelessTest, givenBigBufferWhenCopyingImageToBufferStatelessThenSuccessIsReturned) { + auto cmdQ = std::make_unique>(context.get(), device.get()); + dstBuffer.size = static_cast(bigSize); + auto retVal = cmdQ->enqueueCopyImageToBuffer( + srcImage.get(), + &dstBuffer, + srcOrigin, + region, + static_cast(bigOffset), + 0, + nullptr, + nullptr); + + EXPECT_EQ(CL_SUCCESS, retVal); +} + +using EnqueueCopyImageToBufferStatefulTest = EnqueueCopyImageToBufferHw; + +HWTEST_F(EnqueueCopyImageToBufferStatefulTest, givenBufferWhenCopyingImageToBufferStatefulThenSuccessIsReturned) { + auto cmdQ = std::make_unique>(context.get(), device.get()); + dstBuffer.size = static_cast(smallSize); + auto retVal = cmdQ->enqueueCopyImageToBuffer( + srcImage.get(), + &dstBuffer, + srcOrigin, + region, + 0, + 0, + nullptr, + nullptr); + + EXPECT_EQ(CL_SUCCESS, retVal); +}