mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 14:33:04 +08:00
Enable zero copy for enqueueWriteBufferRect with hint
Change-Id: I411f00b98056307906c02d34e793cefe460735ba
This commit is contained in:
@@ -395,5 +395,14 @@ class CommandQueueHw : public CommandQueue {
|
||||
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
|
||||
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType);
|
||||
void forceDispatchScheduler(OCLRT::MultiDispatchInfo &multiDispatchInfo);
|
||||
static void computeOffsetsValueForRectCommands(size_t *bufferOffset,
|
||||
size_t *hostOffset,
|
||||
const size_t *bufferOrigin,
|
||||
const size_t *hostOrigin,
|
||||
const size_t *region,
|
||||
size_t bufferRowPitch,
|
||||
size_t bufferSlicePitch,
|
||||
size_t hostRowPitch,
|
||||
size_t hostSlicePitch);
|
||||
};
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -685,4 +685,21 @@ void CommandQueueHw<GfxFamily>::addMapUnmapToWaitlistEventsDependencies(const cl
|
||||
}
|
||||
this->virtualEvent = eventBuilder->getEvent();
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::computeOffsetsValueForRectCommands(size_t *bufferOffset,
|
||||
size_t *hostOffset,
|
||||
const size_t *bufferOrigin,
|
||||
const size_t *hostOrigin,
|
||||
const size_t *region,
|
||||
size_t bufferRowPitch,
|
||||
size_t bufferSlicePitch,
|
||||
size_t hostRowPitch,
|
||||
size_t hostSlicePitch) {
|
||||
size_t computedBufferRowPitch = bufferRowPitch ? bufferRowPitch : region[0];
|
||||
size_t computedBufferSlicePitch = bufferSlicePitch ? bufferSlicePitch : region[1] * computedBufferRowPitch;
|
||||
size_t computedHostRowPitch = hostRowPitch ? hostRowPitch : region[0];
|
||||
size_t computedHostSlicePitch = hostSlicePitch ? hostSlicePitch : region[1] * computedHostRowPitch;
|
||||
*bufferOffset = bufferOrigin[2] * computedBufferSlicePitch + bufferOrigin[1] * computedBufferRowPitch + bufferOrigin[0];
|
||||
*hostOffset = hostOrigin[2] * computedHostSlicePitch + hostOrigin[1] * computedHostRowPitch + hostOrigin[0];
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -46,7 +46,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
cl_event *event) {
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, ptr, CL_COMMAND_READ_BUFFER);
|
||||
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER);
|
||||
if ((DebugManager.flags.DoCpuCopyOnReadBuffer.get() ||
|
||||
buffer->isReadWriteOnCpuAllowed(blockingRead, numEventsInWaitList, ptr, size)) &&
|
||||
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {
|
||||
|
||||
@@ -45,7 +45,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
cl_event *event) {
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, ptr, CL_COMMAND_WRITE_BUFFER);
|
||||
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_WRITE_BUFFER);
|
||||
if ((DebugManager.flags.DoCpuCopyOnWriteBuffer.get() ||
|
||||
buffer->isReadWriteOnCpuAllowed(blockingWrite, numEventsInWaitList, const_cast<void *>(ptr), size)) &&
|
||||
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {
|
||||
|
||||
@@ -48,7 +48,31 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
cl_event *event) {
|
||||
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
size_t bufferOffset;
|
||||
size_t hostOffset;
|
||||
computeOffsetsValueForRectCommands(&bufferOffset, &hostOffset, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch);
|
||||
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_WRITE_BUFFER_RECT);
|
||||
if (!isMemTransferNeeded) {
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
enqueueHandler<CL_COMMAND_MARKER>(
|
||||
surfaces,
|
||||
blockingWrite == CL_TRUE,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER_RECT);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect,
|
||||
this->getContext(), this->getDevice());
|
||||
builder.takeOwnership(this->context);
|
||||
@@ -82,4 +106,4 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -49,6 +49,7 @@ const char *DriverDiagnostics::hintFormat[] = {
|
||||
"Performance hint: clEnqueueWriteBuffer call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA
|
||||
"Performance hint: clEnqueueWriteBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA
|
||||
"Performance hint: clEnqueueWriteBufferRect call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA
|
||||
"Performance hint: clEnqueueWriteBufferRect call on a buffer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA
|
||||
"Performance hint: Pointer %p and size %u passed to clEnqueueReadImage doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS
|
||||
"Performance hint: clEnqueueWriteImage call on an image %p require driver to copy the data.", //CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA
|
||||
"Performance hint: clEnqueueMapBuffer call on a buffer %p will require driver to make a copy as buffer is not sharing the same physical memory with CPU.", //CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA
|
||||
|
||||
@@ -42,6 +42,7 @@ enum PerformanceHints {
|
||||
CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA,
|
||||
CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA,
|
||||
CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA,
|
||||
CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA,
|
||||
CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS,
|
||||
CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA,
|
||||
CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA,
|
||||
|
||||
@@ -351,9 +351,13 @@ void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyn
|
||||
memoryManager->freeGraphicsMemory(allocation);
|
||||
}
|
||||
|
||||
bool MemObj::checkIfMemoryTransferIsRequired(size_t offset, const void *ptr, cl_command_type cmdType) {
|
||||
auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offset);
|
||||
auto isMemTransferNeeded = !((bufferStorage == ptr) && (cmdType == CL_COMMAND_READ_BUFFER || cmdType == CL_COMMAND_WRITE_BUFFER));
|
||||
bool MemObj::checkIfMemoryTransferIsRequired(size_t offsetInMemObjest, size_t offsetInHostPtr, const void *hostPtr, cl_command_type cmdType) {
|
||||
auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offsetInMemObjest);
|
||||
auto hostStorage = ptrOffset(hostPtr, offsetInHostPtr);
|
||||
auto isMemTransferNeeded = !((bufferStorage == hostStorage) &&
|
||||
(cmdType == CL_COMMAND_WRITE_BUFFER || cmdType == CL_COMMAND_READ_BUFFER ||
|
||||
cmdType == CL_COMMAND_WRITE_BUFFER_RECT || cmdType == CL_COMMAND_READ_BUFFER_RECT ||
|
||||
cmdType == CL_COMMAND_WRITE_IMAGE || cmdType == CL_COMMAND_READ_IMAGE));
|
||||
return isMemTransferNeeded;
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -121,7 +121,7 @@ class MemObj : public BaseObject<_cl_mem> {
|
||||
|
||||
void waitForCsrCompletion();
|
||||
void destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy);
|
||||
bool checkIfMemoryTransferIsRequired(size_t offset, const void *ptr, cl_command_type cmdType);
|
||||
bool checkIfMemoryTransferIsRequired(size_t offsetInMemObjest, size_t offsetInHostPtr, const void *ptr, cl_command_type cmdType);
|
||||
|
||||
protected:
|
||||
void getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam);
|
||||
|
||||
Reference in New Issue
Block a user