fix: flush ocl queue after staging transfer

This is required for scenarios without direct submission
Otherwise, staging transfers are enqueued but not actually
flushed. If that happens, subsequent staging transfers can't
reuse previous staging buffers. It results in increased memory usage.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-01-07 14:33:43 +00:00
committed by Compute-Runtime-Automation
parent dc0048661a
commit bd5ca4fed5
4 changed files with 14 additions and 7 deletions

View File

@@ -34,7 +34,9 @@ cl_int CommandQueue::enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstP
isSingleTransfer = isFirstTransfer && isLastTransfer;
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
return this->enqueueSVMMemcpy(false, chunkDst, chunkSrc, chunkSize, 0, nullptr, outEvent, csr);
auto ret = this->enqueueSVMMemcpy(false, chunkDst, chunkSrc, chunkSize, 0, nullptr, outEvent, csr);
ret |= this->flush();
return ret;
};
auto stagingBufferManager = this->context->getStagingBufferManager();
@@ -55,7 +57,9 @@ cl_int CommandQueue::enqueueStagingWriteImage(Image *dstImage, cl_bool blockingC
isSingleTransfer = isFirstTransfer && isLastTransfer;
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
return this->enqueueWriteImageImpl(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
auto ret = this->enqueueWriteImageImpl(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
ret |= this->flush();
return ret;
};
auto bytesPerPixel = dstImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;
@@ -78,7 +82,9 @@ cl_int CommandQueue::enqueueStagingReadImage(Image *srcImage, cl_bool blockingCo
isSingleTransfer = isFirstTransfer && isLastTransfer;
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
return this->enqueueReadImageImpl(srcImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
auto ret = this->enqueueReadImageImpl(srcImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
ret |= this->flush();
return ret;
};
auto bytesPerPixel = srcImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -1127,7 +1127,7 @@ struct ReadImageStagingBufferTest : public EnqueueReadImageTest {
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
EXPECT_EQ(res, CL_SUCCESS);
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadImageCounter);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -2448,6 +2448,7 @@ HWTEST_F(StagingBufferTest, givenInOrderCmdQueueWhenEnqueueStagingBufferMemcpyNo
auto pEvent = (Event *)event;
auto numOfStagingBuffers = svmManager->getNumAllocs() - initialUsmAllocs;
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(myCmdQ.flushCalled);
EXPECT_EQ(1u, numOfStagingBuffers);
EXPECT_EQ(expectedNumOfCopies, myCmdQ.enqueueSVMMemcpyCalledCount);
EXPECT_EQ(0u, myCmdQ.finishCalledCount);

View File

@@ -847,7 +847,7 @@ struct WriteImageStagingBufferTest : public EnqueueWriteImageTest {
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
EXPECT_EQ(res, CL_SUCCESS);
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueWriteImageCounter);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();