Enable zero copy for enqueueWriteBufferRect with hint

Change-Id: I411f00b98056307906c02d34e793cefe460735ba
This commit is contained in:
mplewka
2018-01-24 09:57:20 +01:00
committed by sys_ocldev
parent f3f53ed14b
commit 251de14ee6
13 changed files with 390 additions and 10 deletions

View File

@@ -395,5 +395,14 @@ class CommandQueueHw : public CommandQueue {
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType);
void forceDispatchScheduler(OCLRT::MultiDispatchInfo &multiDispatchInfo);
static void computeOffsetsValueForRectCommands(size_t *bufferOffset,
size_t *hostOffset,
const size_t *bufferOrigin,
const size_t *hostOrigin,
const size_t *region,
size_t bufferRowPitch,
size_t bufferSlicePitch,
size_t hostRowPitch,
size_t hostSlicePitch);
};
} // namespace OCLRT

View File

@@ -685,4 +685,21 @@ void CommandQueueHw<GfxFamily>::addMapUnmapToWaitlistEventsDependencies(const cl
}
this->virtualEvent = eventBuilder->getEvent();
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::computeOffsetsValueForRectCommands(size_t *bufferOffset,
size_t *hostOffset,
const size_t *bufferOrigin,
const size_t *hostOrigin,
const size_t *region,
size_t bufferRowPitch,
size_t bufferSlicePitch,
size_t hostRowPitch,
size_t hostSlicePitch) {
size_t computedBufferRowPitch = bufferRowPitch ? bufferRowPitch : region[0];
size_t computedBufferSlicePitch = bufferSlicePitch ? bufferSlicePitch : region[1] * computedBufferRowPitch;
size_t computedHostRowPitch = hostRowPitch ? hostRowPitch : region[0];
size_t computedHostSlicePitch = hostSlicePitch ? hostSlicePitch : region[1] * computedHostRowPitch;
*bufferOffset = bufferOrigin[2] * computedBufferSlicePitch + bufferOrigin[1] * computedBufferRowPitch + bufferOrigin[0];
*hostOffset = hostOrigin[2] * computedHostSlicePitch + hostOrigin[1] * computedHostRowPitch + hostOrigin[0];
}
} // namespace OCLRT

View File

@@ -46,7 +46,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
cl_event *event) {
cl_int retVal = CL_SUCCESS;
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, ptr, CL_COMMAND_READ_BUFFER);
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER);
if ((DebugManager.flags.DoCpuCopyOnReadBuffer.get() ||
buffer->isReadWriteOnCpuAllowed(blockingRead, numEventsInWaitList, ptr, size)) &&
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {

View File

@@ -45,7 +45,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
cl_event *event) {
cl_int retVal = CL_SUCCESS;
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, ptr, CL_COMMAND_WRITE_BUFFER);
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_WRITE_BUFFER);
if ((DebugManager.flags.DoCpuCopyOnWriteBuffer.get() ||
buffer->isReadWriteOnCpuAllowed(blockingWrite, numEventsInWaitList, const_cast<void *>(ptr), size)) &&
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {

View File

@@ -48,7 +48,31 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
cl_event *event) {
MultiDispatchInfo dispatchInfo;
size_t bufferOffset;
size_t hostOffset;
computeOffsetsValueForRectCommands(&bufferOffset, &hostOffset, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch);
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_WRITE_BUFFER_RECT);
if (!isMemTransferNeeded) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blockingWrite == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER_RECT);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return CL_SUCCESS;
}
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect,
this->getContext(), this->getDevice());
builder.takeOwnership(this->context);
@@ -82,4 +106,4 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
return CL_SUCCESS;
}
}
} // namespace OCLRT

View File

@@ -49,6 +49,7 @@ const char *DriverDiagnostics::hintFormat[] = {
"Performance hint: clEnqueueWriteBuffer call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA
"Performance hint: clEnqueueWriteBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA
"Performance hint: clEnqueueWriteBufferRect call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA
"Performance hint: clEnqueueWriteBufferRect call on a buffer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA
"Performance hint: Pointer %p and size %u passed to clEnqueueReadImage doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS
"Performance hint: clEnqueueWriteImage call on an image %p require driver to copy the data.", //CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA
"Performance hint: clEnqueueMapBuffer call on a buffer %p will require driver to make a copy as buffer is not sharing the same physical memory with CPU.", //CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA

View File

@@ -42,6 +42,7 @@ enum PerformanceHints {
CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA,
CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA,
CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA,
CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA,
CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS,
CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA,
CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA,

View File

@@ -351,9 +351,13 @@ void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyn
memoryManager->freeGraphicsMemory(allocation);
}
bool MemObj::checkIfMemoryTransferIsRequired(size_t offset, const void *ptr, cl_command_type cmdType) {
auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offset);
auto isMemTransferNeeded = !((bufferStorage == ptr) && (cmdType == CL_COMMAND_READ_BUFFER || cmdType == CL_COMMAND_WRITE_BUFFER));
bool MemObj::checkIfMemoryTransferIsRequired(size_t offsetInMemObjest, size_t offsetInHostPtr, const void *hostPtr, cl_command_type cmdType) {
auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offsetInMemObjest);
auto hostStorage = ptrOffset(hostPtr, offsetInHostPtr);
auto isMemTransferNeeded = !((bufferStorage == hostStorage) &&
(cmdType == CL_COMMAND_WRITE_BUFFER || cmdType == CL_COMMAND_READ_BUFFER ||
cmdType == CL_COMMAND_WRITE_BUFFER_RECT || cmdType == CL_COMMAND_READ_BUFFER_RECT ||
cmdType == CL_COMMAND_WRITE_IMAGE || cmdType == CL_COMMAND_READ_IMAGE));
return isMemTransferNeeded;
}
} // namespace OCLRT

View File

@@ -121,7 +121,7 @@ class MemObj : public BaseObject<_cl_mem> {
void waitForCsrCompletion();
void destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy);
bool checkIfMemoryTransferIsRequired(size_t offset, const void *ptr, cl_command_type cmdType);
bool checkIfMemoryTransferIsRequired(size_t offsetInMemObjest, size_t offsetInHostPtr, const void *ptr, cl_command_type cmdType);
protected:
void getOsSpecificMemObjectInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam);