From 2b340528cf1ec0857c161c7933b399d845f6c7a4 Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Fri, 21 Mar 2025 10:59:11 +0000 Subject: [PATCH] performance: enable staging read for cl buffers Related-To: NEO-14026 Signed-off-by: Szymon Morek --- opencl/source/api/api.cpp | 31 +++++++++++++------ .../api/cl_enqueue_read_buffer_tests.inl | 28 +++++++++++++++++ .../test/unit_test/mocks/mock_command_queue.h | 2 ++ 3 files changed, 51 insertions(+), 10 deletions(-) diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 8f3a955363..d557e5ad9d 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -2390,16 +2390,27 @@ cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue commandQueue, return retVal; } - retVal = pCommandQueue->enqueueReadBuffer( - pBuffer, - blockingRead, - offset, - cb, - ptr, - nullptr, - numEventsInWaitList, - eventWaitList, - event); + if (pCommandQueue->isValidForStagingTransfer(pBuffer, ptr, cb, CL_COMMAND_READ_BUFFER, blockingRead, numEventsInWaitList > 0)) { + retVal = pCommandQueue->enqueueStagingBufferTransfer( + CL_COMMAND_READ_BUFFER, + pBuffer, + blockingRead, + offset, + cb, + ptr, + event); + } else { + retVal = pCommandQueue->enqueueReadBuffer( + pBuffer, + blockingRead, + offset, + cb, + ptr, + nullptr, + numEventsInWaitList, + eventWaitList, + event); + } } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); diff --git a/opencl/test/unit_test/api/cl_enqueue_read_buffer_tests.inl b/opencl/test/unit_test/api/cl_enqueue_read_buffer_tests.inl index 8800c49d4a..84a4c2e2eb 100644 --- a/opencl/test/unit_test/api/cl_enqueue_read_buffer_tests.inl +++ b/opencl/test/unit_test/api/cl_enqueue_read_buffer_tests.inl @@ -40,6 +40,34 @@ TEST_F(ClEnqueueReadBufferTests, GivenCorrectArgumentsWhenReadingBufferThenSucce EXPECT_EQ(CL_SUCCESS, retVal); } +TEST_F(ClEnqueueReadBufferTests, GivenNonUsmMemoryWhenReadingBufferThenNonUsmPointerIsNotImported) { + DebugManagerStateRestore restorer{}; + // 1KB staging buffer size + debugManager.flags.StagingBufferSize.set(1); + debugManager.flags.EnableCopyWithStagingBuffers.set(1); + + MockContext context{}; + MockGraphicsAllocation allocation{}; + MockBuffer buffer{&context, allocation}; + MockCommandQueue commandQueue{context}; + char data[2048]; + auto retVal = clEnqueueReadBuffer( + &commandQueue, + &buffer, + false, + 0, + sizeof(data), + data, + 0, + nullptr, + nullptr); + + EXPECT_EQ(CL_SUCCESS, retVal); + + // 2 * 1KB copies + EXPECT_EQ(2u, commandQueue.enqueueReadBufferImplCalledCount); +} + TEST_F(ClEnqueueReadBufferTests, GivenQueueIncapableArgumentsWhenReadingBufferThenInvalidOperationIsReturned) { MockBuffer buffer{}; auto data = 1; diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index ab51fb4b12..417a8a91d4 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -193,6 +193,7 @@ class MockCommandQueue : public CommandQueue { cl_int enqueueReadBufferImpl(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t cb, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) override { + enqueueReadBufferImplCalledCount++; return CL_SUCCESS; } @@ -282,6 +283,7 @@ class MockCommandQueue : public CommandQueue { std::atomic latestTaskCountWaited{std::numeric_limits::max()}; std::optional waitUntilCompleteReturnValue{}; int waitUntilCompleteCalledCount{0}; + size_t enqueueReadBufferImplCalledCount = 0; }; template