From bd5ca4fed5ef82bb5f0b4826bce985440a8f835c Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Tue, 7 Jan 2025 14:33:43 +0000 Subject: [PATCH] fix: flush ocl queue after staging transfer This is required for scenarios without direct submission Otherwise, staging transfers are enqueued but not actually flushed. If that happens, subsequent staging transfers can't reuse previous staging buffers. It results in increased memory usage. Signed-off-by: Szymon Morek --- .../source/command_queue/command_queue_staging.cpp | 12 +++++++++--- .../command_queue/enqueue_read_image_tests.cpp | 4 ++-- .../unit_test/command_queue/enqueue_svm_tests.cpp | 3 ++- .../command_queue/enqueue_write_image_tests.cpp | 2 +- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/opencl/source/command_queue/command_queue_staging.cpp b/opencl/source/command_queue/command_queue_staging.cpp index 7ad465a5e3..9508e23d74 100644 --- a/opencl/source/command_queue/command_queue_staging.cpp +++ b/opencl/source/command_queue/command_queue_staging.cpp @@ -34,7 +34,9 @@ cl_int CommandQueue::enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstP isSingleTransfer = isFirstTransfer && isLastTransfer; cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer); - return this->enqueueSVMMemcpy(false, chunkDst, chunkSrc, chunkSize, 0, nullptr, outEvent, csr); + auto ret = this->enqueueSVMMemcpy(false, chunkDst, chunkSrc, chunkSize, 0, nullptr, outEvent, csr); + ret |= this->flush(); + return ret; }; auto stagingBufferManager = this->context->getStagingBufferManager(); @@ -55,7 +57,9 @@ cl_int CommandQueue::enqueueStagingWriteImage(Image *dstImage, cl_bool blockingC isSingleTransfer = isFirstTransfer && isLastTransfer; cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer); - return this->enqueueWriteImageImpl(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr); + auto ret = this->enqueueWriteImageImpl(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr); + ret |= this->flush(); + return ret; }; auto bytesPerPixel = dstImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes; auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel; @@ -78,7 +82,9 @@ cl_int CommandQueue::enqueueStagingReadImage(Image *srcImage, cl_bool blockingCo isSingleTransfer = isFirstTransfer && isLastTransfer; cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer); - return this->enqueueReadImageImpl(srcImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr); + auto ret = this->enqueueReadImageImpl(srcImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr); + ret |= this->flush(); + return ret; }; auto bytesPerPixel = srcImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes; auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel; diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp index fe39b75c4c..32536a10b9 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1127,7 +1127,7 @@ struct ReadImageStagingBufferTest : public EnqueueReadImageTest { HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledThenReturnSuccess) { MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr); - + EXPECT_TRUE(mockCommandQueueHw.flushCalled); EXPECT_EQ(res, CL_SUCCESS); EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadImageCounter); auto &csr = pDevice->getUltCommandStreamReceiver(); diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index fce39a8fc3..2203676d91 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -2448,6 +2448,7 @@ HWTEST_F(StagingBufferTest, givenInOrderCmdQueueWhenEnqueueStagingBufferMemcpyNo auto pEvent = (Event *)event; auto numOfStagingBuffers = svmManager->getNumAllocs() - initialUsmAllocs; EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_TRUE(myCmdQ.flushCalled); EXPECT_EQ(1u, numOfStagingBuffers); EXPECT_EQ(expectedNumOfCopies, myCmdQ.enqueueSVMMemcpyCalledCount); EXPECT_EQ(0u, myCmdQ.finishCalledCount); diff --git a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp index 2f46b2ffbc..01769b5b06 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp @@ -847,7 +847,7 @@ struct WriteImageStagingBufferTest : public EnqueueWriteImageTest { HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledThenReturnSuccess) { MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr); - + EXPECT_TRUE(mockCommandQueueHw.flushCalled); EXPECT_EQ(res, CL_SUCCESS); EXPECT_EQ(4ul, mockCommandQueueHw.enqueueWriteImageCounter); auto &csr = pDevice->getUltCommandStreamReceiver();