mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-10 07:03:01 +08:00
performance: introduce staging read for cl_buffer
Related-To: NEO-14026 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b2b3b55b19
commit
dacbce7f01
@@ -21,10 +21,13 @@ using ClEnqueueReadBufferTests = ApiTests;
|
||||
namespace ULT {
|
||||
|
||||
TEST_F(ClEnqueueReadBufferTests, GivenCorrectArgumentsWhenReadingBufferThenSuccessIsReturned) {
|
||||
MockBuffer buffer{};
|
||||
MockContext context{};
|
||||
MockGraphicsAllocation allocation{};
|
||||
MockBuffer buffer{&context, allocation};
|
||||
MockCommandQueue commandQueue{context};
|
||||
auto data = 1;
|
||||
auto retVal = clEnqueueReadBuffer(
|
||||
pCommandQueue,
|
||||
&commandQueue,
|
||||
&buffer,
|
||||
false,
|
||||
0,
|
||||
|
||||
@@ -855,3 +855,160 @@ HWTEST_F(EnqueueReadBufferHw, givenHostPtrIsFromMappedBufferWhenReadBufferIsCall
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
struct ReadBufferStagingBufferTest : public EnqueueReadBufferHw {
|
||||
void SetUp() override {
|
||||
REQUIRE_SVM_OR_SKIP(defaultHwInfo);
|
||||
EnqueueReadBufferHw::SetUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (defaultHwInfo->capabilityTable.ftrSvm == false) {
|
||||
return;
|
||||
}
|
||||
EnqueueReadBufferHw::TearDown();
|
||||
}
|
||||
constexpr static size_t chunkSize = MemoryConstants::megaByte * 2;
|
||||
|
||||
unsigned char ptr[MemoryConstants::cacheLineSize];
|
||||
MockBuffer buffer;
|
||||
cl_queue_properties props = {};
|
||||
};
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferCalledThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, buffer.getSize(), ptr, nullptr);
|
||||
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueReadBufferCounter);
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenHostPtrRegisteredThenDontUseStagingUntilEventCompleted) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
|
||||
cl_event event;
|
||||
auto retVal = mockCommandQueueHw.enqueueReadBuffer(&buffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
&event);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
auto pEvent = castToObjectOrAbort<Event>(event);
|
||||
|
||||
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
|
||||
pEvent->updateExecutionStatus();
|
||||
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
|
||||
pEvent->release();
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenHostPtrRegisteredThenDontUseStagingUntilFinishCalled) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
|
||||
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
|
||||
mockCommandQueueHw.finish();
|
||||
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferCalledWithLargeSizeThenSplitTransfer) {
|
||||
auto hostPtr = new unsigned char[chunkSize * 4];
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
auto retVal = CL_SUCCESS;
|
||||
std::unique_ptr<Buffer> buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(),
|
||||
0,
|
||||
chunkSize * 4,
|
||||
nullptr,
|
||||
retVal));
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, buffer.get(), false, 0, chunkSize * 4, hostPtr, nullptr);
|
||||
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadBufferCounter);
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
|
||||
delete[] hostPtr;
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferCalledWithEventThenReturnValidEvent) {
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_READ_BUFFER;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(expectedLastCmd, mockCommandQueueHw.lastCommandType);
|
||||
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, givenOutOfOrderQueueWhenEnqueueStagingReadBufferCalledWithSingleTransferThenNoBarrierEnqueued) {
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_READ_BUFFER;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.setOoqEnabled();
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(expectedLastCmd, mockCommandQueueHw.lastCommandType);
|
||||
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStagingReadBufferThenTimestampsSetCorrectly) {
|
||||
cl_event event;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.setProfilingEnabled();
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_FALSE(pEvent->isCPUProfilingPath());
|
||||
EXPECT_TRUE(pEvent->isProfilingEnabled());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferFailedThenPropagateErrorCode) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.enqueueReadBufferCallBase = false;
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_INVALID_OPERATION);
|
||||
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueReadBufferCounter);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenIsValidForStagingTransferCalledThenReturnCorrectValue) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
auto isStagingBuffersEnabled = device->getProductHelper().isStagingBuffersEnabled();
|
||||
unsigned char ptr[16];
|
||||
|
||||
EXPECT_EQ(isStagingBuffersEnabled, mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, 16, CL_COMMAND_READ_BUFFER, false, false));
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenIsValidForStagingTransferCalledAndCpuCopyAllowedThenReturnCorrectValue) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
debugManager.flags.DoCpuCopyOnReadBuffer.set(1);
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
unsigned char ptr[16];
|
||||
|
||||
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, 16, CL_COMMAND_READ_BUFFER, true, false));
|
||||
}
|
||||
@@ -652,7 +652,7 @@ struct WriteBufferStagingBufferTest : public EnqueueWriteBufferHw {
|
||||
|
||||
HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferCalledThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, buffer.getSize(), ptr, nullptr);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, buffer.getSize(), ptr, nullptr);
|
||||
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueWriteBufferCounter);
|
||||
@@ -708,7 +708,7 @@ HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferCalledWithLa
|
||||
chunkSize * 4,
|
||||
nullptr,
|
||||
retVal));
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(buffer.get(), false, 0, chunkSize * 4, hostPtr, nullptr);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, buffer.get(), false, 0, chunkSize * 4, hostPtr, nullptr);
|
||||
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
@@ -723,7 +723,7 @@ HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferCalledWithEv
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_WRITE_BUFFER;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
@@ -738,7 +738,7 @@ HWTEST_F(WriteBufferStagingBufferTest, givenOutOfOrderQueueWhenEnqueueStagingWri
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.setOoqEnabled();
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
@@ -752,7 +752,7 @@ HWTEST_F(WriteBufferStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStag
|
||||
cl_event event;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.setProfilingEnabled();
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
@@ -765,7 +765,7 @@ HWTEST_F(WriteBufferStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStag
|
||||
HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferFailedThenPropagateErrorCode) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.enqueueWriteBufferCallBase = false;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, nullptr);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_INVALID_OPERATION);
|
||||
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueWriteBufferCounter);
|
||||
|
||||
@@ -100,6 +100,26 @@ TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEn
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueStagingReadBufferIsCalledThenContextProvidesHintsAboutAlignments) {
|
||||
REQUIRE_SVM_OR_SKIP(pPlatform->getClDevice(0));
|
||||
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||
uintptr_t addressForReadBuffer = (uintptr_t)ptr;
|
||||
size_t sizeForReadBuffer = MemoryConstants::cacheLineSize;
|
||||
if (!alignedAddress) {
|
||||
addressForReadBuffer++;
|
||||
}
|
||||
if (!alignedSize) {
|
||||
sizeForReadBuffer--;
|
||||
}
|
||||
pCmdQ->enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, buffer, CL_FALSE,
|
||||
0, sizeForReadBuffer, (void *)addressForReadBuffer, nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), addressForReadBuffer);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBuffer, sizeForReadBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndNotSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) {
|
||||
|
||||
size_t bufferOrigin[] = {0, 0, 0};
|
||||
|
||||
@@ -190,6 +190,12 @@ class MockCommandQueue : public CommandQueue {
|
||||
GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; }
|
||||
|
||||
cl_int enqueueReadBufferImpl(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t cb,
|
||||
void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) override {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region,
|
||||
size_t rowPitch, size_t slicePitch, void *ptr,
|
||||
GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
@@ -433,6 +439,16 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
return CL_INVALID_OPERATION;
|
||||
}
|
||||
|
||||
cl_int enqueueReadBufferImpl(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) override {
|
||||
enqueueReadBufferCounter++;
|
||||
blockingReadBuffer = blockingRead == CL_TRUE;
|
||||
if (enqueueReadBufferCallBase) {
|
||||
return BaseClass::enqueueReadBufferImpl(buffer, blockingRead, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event, csr);
|
||||
}
|
||||
return CL_INVALID_OPERATION;
|
||||
}
|
||||
|
||||
void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override {
|
||||
kernelParams = dispatchInfo.peekBuiltinOpParams();
|
||||
lastCommandType = commandType;
|
||||
@@ -540,8 +556,11 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
bool enqueueReadImageCallBase = true;
|
||||
size_t enqueueWriteBufferCounter = 0;
|
||||
bool enqueueWriteBufferCallBase = true;
|
||||
size_t enqueueReadBufferCounter = 0;
|
||||
bool enqueueReadBufferCallBase = true;
|
||||
size_t requestedCmdStreamSize = 0;
|
||||
bool blockingWriteBuffer = false;
|
||||
bool blockingReadBuffer = false;
|
||||
bool storeMultiDispatchInfo = false;
|
||||
bool notifyEnqueueReadBufferCalled = false;
|
||||
bool notifyEnqueueReadImageCalled = false;
|
||||
|
||||
Reference in New Issue
Block a user