Enable zero copy for enqueueReadBufferRect with hint

Change-Id: I4e7d89edfcff2674e7c163d70ad974d3464bf64f
This commit is contained in:
mplewka
2018-01-25 13:17:35 +01:00
parent 251de14ee6
commit 377fc8d20b
5 changed files with 276 additions and 2 deletions

View File

@@ -49,7 +49,31 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
cl_event *event) {
MultiDispatchInfo dispatchInfo;
size_t bufferOffset;
size_t hostOffset;
computeOffsetsValueForRectCommands(&bufferOffset, &hostOffset, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch);
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_READ_BUFFER_RECT);
if (!isMemTransferNeeded) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blockingRead == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_READ_BUFFER_RECT);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return CL_SUCCESS;
}
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect,
this->getContext(), this->getDevice());
builder.takeOwnership(this->context);
@@ -88,4 +112,4 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
return CL_SUCCESS;
}
}
} // namespace OCLRT

View File

@@ -45,6 +45,7 @@ const char *DriverDiagnostics::hintFormat[] = {
"Performance hint: clEnqueueReadBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA
"Performance hint: Pointer %p and size %u passed to clEnqueueReadBuffer doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS
"Performance hint: clEnqueueReadBufferRect call on a buffer %p with pointer %p will require driver to copy the data.Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA
"Performance hint: clEnqueueReadBufferRect call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA
"Performance hint: Pointer %p and size %u passed to clEnqueueReadBufferRect doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS
"Performance hint: clEnqueueWriteBuffer call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA
"Performance hint: clEnqueueWriteBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA

View File

@@ -38,6 +38,7 @@ enum PerformanceHints {
CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA,
CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS,
CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA,
CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA,
CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS,
CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA,
CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA,

View File

@@ -389,3 +389,197 @@ HWTEST_F(EnqueueReadBufferRectTest, blockingRequiresPipeControlAfterWalkerWithDC
EXPECT_TRUE(cmd->getDcFlushEnable());
}
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
uint32_t taskLevelCmdQ = 17;
pCmdQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {50, 50, 1};
void *ptr = buffer->getCpuAddressForMemoryTransfer();
retVal = pCmdQ->enqueueReadBufferRect(
buffer,
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
slicePitch,
rowPitch,
slicePitch,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
;
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(19u, pCmdQ->taskLevel);
EXPECT_EQ(CL_COMMAND_READ_BUFFER_RECT, (const int)pEvent->getCommandType());
pEvent->release();
}
HWTEST_F(EnqueueReadBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
uint32_t taskLevelCmdQ = 17;
pCmdOOQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {50, 50, 1};
void *ptr = buffer->getCpuAddressForMemoryTransfer();
retVal = pCmdOOQ->enqueueReadBufferRect(
buffer,
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
slicePitch,
rowPitch,
slicePitch,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(19u, pCmdOOQ->taskLevel);
EXPECT_EQ(CL_COMMAND_READ_BUFFER_RECT, (const int)pEvent->getCommandType());
pEvent->release();
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndRowPitchEqualZeroAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
void *ptr = buffer->getCpuAddressForMemoryTransfer();
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {50, 50, 1};
retVal = pCmdQ->enqueueReadBufferRect(
buffer,
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
0,
slicePitch,
0,
slicePitch,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 0u);
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndSlicePitchEqualZeroAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
void *ptr = buffer->getCpuAddressForMemoryTransfer();
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {50, 50, 1};
retVal = pCmdQ->enqueueReadBufferRect(
buffer,
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
0,
rowPitch,
0,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 0u);
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointTheSameStorageWithHostWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
cl_int retVal = CL_SUCCESS;
void *ptr = buffer->getCpuAddressForMemoryTransfer();
size_t bufferOrigin[] = {50, 50, 0};
size_t hostOrigin[] = {20, 20, 0};
size_t region[] = {50, 50, 1};
size_t hostOffset = (bufferOrigin[2] - hostOrigin[2]) * slicePitch + (bufferOrigin[1] - hostOrigin[1]) * rowPitch + (bufferOrigin[0] - hostOrigin[0]);
auto hostStorage = ptrOffset(ptr, hostOffset);
retVal = pCmdQ->enqueueReadBufferRect(
buffer,
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
slicePitch,
rowPitch,
slicePitch,
hostStorage,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 0u);
}
HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointDiffrentStorageWithHostWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) {
cl_int retVal = CL_SUCCESS;
void *ptr = buffer->getCpuAddressForMemoryTransfer();
size_t bufferOrigin[] = {50, 50, 0};
size_t hostOrigin[] = {10, 10, 0};
size_t region[] = {50, 50, 1};
retVal = pCmdQ->enqueueReadBufferRect(
buffer,
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
rowPitch,
slicePitch,
rowPitch,
slicePitch,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 1u);
}

View File

@@ -72,7 +72,8 @@ TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEn
TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferRectIsCallingThenContextProvidesHintsAboutAlignments) {
uintptr_t addressForReadBufferRect = (uintptr_t)address;
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
uintptr_t addressForReadBufferRect = (uintptr_t)ptr;
size_t sizeForReadBufferRect = MemoryConstants::cacheLineSize;
if (!alignedAddress) {
addressForReadBufferRect++;
@@ -102,6 +103,59 @@ TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEn
EXPECT_TRUE(containsHint(expectedHint, userData));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBufferRect, sizeForReadBufferRect, MemoryConstants::pageSize, MemoryConstants::pageSize);
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
alignedFree(ptr);
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndNotSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) {
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {1, 2, 1};
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
pCmdQ->enqueueReadBufferRect(
buffer,
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
EXPECT_TRUE(containsHint(expectedHint, userData));
alignedFree(ptr);
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) {
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {1, 2, 1};
pCmdQ->enqueueReadBufferRect(
buffer,
CL_FALSE,
bufferOrigin,
hostOrigin,
region,
MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize,
address,
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), address);
EXPECT_TRUE(containsHint(expectedHint, userData));
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) {