mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 10:17:01 +08:00
fix: flush ocl queue after staging transfer
This is required for scenarios without direct submission Otherwise, staging transfers are enqueued but not actually flushed. If that happens, subsequent staging transfers can't reuse previous staging buffers. It results in increased memory usage. Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
dc0048661a
commit
bd5ca4fed5
@@ -34,7 +34,9 @@ cl_int CommandQueue::enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstP
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
|
||||
|
||||
return this->enqueueSVMMemcpy(false, chunkDst, chunkSrc, chunkSize, 0, nullptr, outEvent, csr);
|
||||
auto ret = this->enqueueSVMMemcpy(false, chunkDst, chunkSrc, chunkSize, 0, nullptr, outEvent, csr);
|
||||
ret |= this->flush();
|
||||
return ret;
|
||||
};
|
||||
|
||||
auto stagingBufferManager = this->context->getStagingBufferManager();
|
||||
@@ -55,7 +57,9 @@ cl_int CommandQueue::enqueueStagingWriteImage(Image *dstImage, cl_bool blockingC
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
|
||||
|
||||
return this->enqueueWriteImageImpl(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
|
||||
auto ret = this->enqueueWriteImageImpl(dstImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
|
||||
ret |= this->flush();
|
||||
return ret;
|
||||
};
|
||||
auto bytesPerPixel = dstImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;
|
||||
@@ -78,7 +82,9 @@ cl_int CommandQueue::enqueueStagingReadImage(Image *srcImage, cl_bool blockingCo
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
|
||||
|
||||
return this->enqueueReadImageImpl(srcImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
|
||||
auto ret = this->enqueueReadImageImpl(srcImage, false, origin, region, inputRowPitch, inputSlicePitch, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
|
||||
ret |= this->flush();
|
||||
return ret;
|
||||
};
|
||||
auto bytesPerPixel = srcImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
* Copyright (C) 2018-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -1127,7 +1127,7 @@ struct ReadImageStagingBufferTest : public EnqueueReadImageTest {
|
||||
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingReadImage(srcImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadImageCounter);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
* Copyright (C) 2018-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -2448,6 +2448,7 @@ HWTEST_F(StagingBufferTest, givenInOrderCmdQueueWhenEnqueueStagingBufferMemcpyNo
|
||||
auto pEvent = (Event *)event;
|
||||
auto numOfStagingBuffers = svmManager->getNumAllocs() - initialUsmAllocs;
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_TRUE(myCmdQ.flushCalled);
|
||||
EXPECT_EQ(1u, numOfStagingBuffers);
|
||||
EXPECT_EQ(expectedNumOfCopies, myCmdQ.enqueueSVMMemcpyCalledCount);
|
||||
EXPECT_EQ(0u, myCmdQ.finishCalledCount);
|
||||
|
||||
@@ -847,7 +847,7 @@ struct WriteImageStagingBufferTest : public EnqueueWriteImageTest {
|
||||
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteImage(dstImage, false, origin, region, MemoryConstants::megaByte, MemoryConstants::megaByte, ptr, nullptr);
|
||||
|
||||
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueWriteImageCounter);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
Reference in New Issue
Block a user