From 91ff22f255e16c5cb7d5f2c3fe62e3e0ddfa72c2 Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Fri, 4 Apr 2025 11:17:30 +0000 Subject: [PATCH] fix: correctly calculate host ptr size for images Signed-off-by: Szymon Morek --- opencl/source/api/api.cpp | 7 ++-- opencl/source/command_queue/command_queue.cpp | 2 +- opencl/source/command_queue/command_queue.h | 4 +-- .../api/cl_enqueue_read_image_tests.inl | 33 ++++++++++++++++- .../api/cl_enqueue_write_image_tests.inl | 35 +++++++++++++++++-- 5 files changed, 73 insertions(+), 8 deletions(-) diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index d557e5ad9d..c7d8c1decc 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -2910,7 +2910,8 @@ cl_int CL_API_CALL clEnqueueReadImage(cl_command_queue commandQueue, return retVal; } - if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, pImage->getSize(), CL_COMMAND_READ_IMAGE, blockingRead, numEventsInWaitList > 0)) { + auto hostPtrSize = pCommandQueue->calculateHostPtrSizeForImage(region, rowPitch, slicePitch, pImage); + if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, hostPtrSize, CL_COMMAND_READ_IMAGE, blockingRead, numEventsInWaitList > 0)) { retVal = pCommandQueue->enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, pImage, blockingRead, origin, region, rowPitch, slicePitch, ptr, event); } else { retVal = pCommandQueue->enqueueReadImage( @@ -2986,7 +2987,9 @@ cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue commandQueue, TRACING_EXIT(ClEnqueueWriteImage, &retVal); return retVal; } - if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, pImage->getSize(), CL_COMMAND_WRITE_IMAGE, blockingWrite, numEventsInWaitList > 0)) { + + auto hostPtrSize = pCommandQueue->calculateHostPtrSizeForImage(region, inputRowPitch, inputSlicePitch, pImage); + if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, hostPtrSize, CL_COMMAND_WRITE_IMAGE, blockingWrite, numEventsInWaitList > 0)) { retVal = pCommandQueue->enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, pImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, event); } else { retVal = pCommandQueue->enqueueWriteImage( diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 0bf6fb999a..9c58f00f42 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -1572,7 +1572,7 @@ void CommandQueue::unregisterGpgpuAndBcsCsrClients() { } } -size_t CommandQueue::calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image) { +size_t CommandQueue::calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image) const { auto bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes; auto dstRowPitch = rowPitch ? rowPitch : region[0] * bytesPerPixel; auto dstSlicePitch = slicePitch ? slicePitch : ((image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch); diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 7bb7a55eaf..8e5177e46b 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -410,6 +410,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies); bool isValidForStagingTransfer(MemObj *memObj, const void *ptr, size_t size, cl_command_type commandType, bool isBlocking, bool hasDependencies); + size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image) const; + protected: void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest); @@ -455,8 +457,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> { cl_int postStagingTransferSync(const StagingTransferStatus &status, cl_event *event, const cl_event profilingEvent, bool isSingleTransfer, bool isBlocking, cl_command_type commandType); cl_event *assignEventForStaging(cl_event *userEvent, cl_event *profilingEvent, bool isFirstTransfer, bool isLastTransfer) const; - size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image); - Context *context = nullptr; ClDevice *device = nullptr; mutable EngineControl *gpgpuEngine = nullptr; diff --git a/opencl/test/unit_test/api/cl_enqueue_read_image_tests.inl b/opencl/test/unit_test/api/cl_enqueue_read_image_tests.inl index 03db46de23..050224248f 100644 --- a/opencl/test/unit_test/api/cl_enqueue_read_image_tests.inl +++ b/opencl/test/unit_test/api/cl_enqueue_read_image_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -205,4 +205,35 @@ TEST_F(ClEnqueueReadImageYuv, GivenInvalidRegionWhenReadingYuvImageThenInvalidVa retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } + +TEST_F(ClEnqueueReadImageTests, GivenMappedPtrWhenReadingImageToMappedPtrThenSuccessIsReturned) { + imageFormat.image_channel_order = CL_RGBA; + imageDesc.image_depth = 4; + auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); + ASSERT_EQ(CL_SUCCESS, retVal); + EXPECT_NE(nullptr, image); + const size_t origin[] = {0, 0, 0}; + const size_t region[] = {2, 2, 1}; + auto imgSize = pCommandQueue->calculateHostPtrSizeForImage(region, imageDesc.image_row_pitch, imageDesc.image_slice_pitch, castToObject(image)); + + auto buffer = clCreateBuffer(pContext, 0, imgSize, nullptr, &retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_NE(nullptr, buffer); + auto ptr = clEnqueueMapBuffer(pCommandQueue, buffer, CL_TRUE, + CL_MAP_READ | CL_MAP_WRITE, 0, imgSize, 0, + nullptr, nullptr, &retVal); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + auto retVal = clEnqueueReadImage(pCommandQueue, image, false, origin, region, + 0, 0, ptr, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + retVal = clReleaseMemObject(image); + EXPECT_EQ(CL_SUCCESS, retVal); + retVal = clReleaseMemObject(buffer); + EXPECT_EQ(CL_SUCCESS, retVal); +} + } // namespace ULT diff --git a/opencl/test/unit_test/api/cl_enqueue_write_image_tests.inl b/opencl/test/unit_test/api/cl_enqueue_write_image_tests.inl index 523fc39eab..7e84dbf36b 100644 --- a/opencl/test/unit_test/api/cl_enqueue_write_image_tests.inl +++ b/opencl/test/unit_test/api/cl_enqueue_write_image_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -107,7 +107,7 @@ TEST_F(ClEnqueueWriteImageTests, GivenValidParametersWhenWritingImageThenSuccess EXPECT_EQ(CL_SUCCESS, retVal); } -TEST_F(ClEnqueueReadImageTests, GivenQueueIncapableParametersWhenWritingImageThenInvalidOperationIsReturned) { +TEST_F(ClEnqueueWriteImageTests, GivenQueueIncapableParametersWhenWritingImageThenInvalidOperationIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); const size_t origin[] = {2, 2, 0}; @@ -132,6 +132,36 @@ TEST_F(ClEnqueueReadImageTests, GivenQueueIncapableParametersWhenWritingImageThe EXPECT_EQ(CL_SUCCESS, retVal); } +TEST_F(ClEnqueueWriteImageTests, GivenMappedPtrWhenWritingImageToMappedPtrThenSuccessIsReturned) { + imageFormat.image_channel_order = CL_RGBA; + imageDesc.image_depth = 4; + auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); + ASSERT_EQ(CL_SUCCESS, retVal); + EXPECT_NE(nullptr, image); + const size_t origin[] = {0, 0, 0}; + const size_t region[] = {2, 2, 1}; + auto imgSize = pCommandQueue->calculateHostPtrSizeForImage(region, imageDesc.image_row_pitch, imageDesc.image_slice_pitch, castToObject(image)); + + auto buffer = clCreateBuffer(pContext, 0, imgSize, nullptr, &retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_NE(nullptr, buffer); + auto ptr = clEnqueueMapBuffer(pCommandQueue, buffer, CL_TRUE, + CL_MAP_READ | CL_MAP_WRITE, 0, imgSize, 0, + nullptr, nullptr, &retVal); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + auto retVal = clEnqueueWriteImage(pCommandQueue, image, false, origin, region, + 0, 0, ptr, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + retVal = clReleaseMemObject(image); + EXPECT_EQ(CL_SUCCESS, retVal); + retVal = clReleaseMemObject(buffer); + EXPECT_EQ(CL_SUCCESS, retVal); +} + typedef ClEnqueueWriteImageTests ClEnqueueWriteImageYUV; TEST_F(ClEnqueueWriteImageYUV, GivenValidParametersWhenWritingYuvImageThenSuccessIsReturned) { @@ -204,4 +234,5 @@ TEST_F(ClEnqueueWriteImageYUV, GivenInvalidRegionWhenWritingYuvImageThenInvalidV retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } + } // namespace ULT