mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 12:23:05 +08:00
Simplify read/write enqueue operations on CPU
Change-Id: I7f59b04d484be2699e325d10e16298016231faf2 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
@@ -572,4 +572,15 @@ size_t CommandQueue::estimateTimestampPacketNodesCount(const MultiDispatchInfo &
|
||||
}
|
||||
return nodesCount;
|
||||
}
|
||||
|
||||
bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList) {
|
||||
// Requested by debug variable or allowed by Buffer
|
||||
bool debugVariableSet = (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get()) ||
|
||||
(CL_COMMAND_WRITE_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnWriteBuffer.get());
|
||||
|
||||
return (debugVariableSet && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList) &&
|
||||
buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) ||
|
||||
buffer->isReadWriteOnCpuAllowed(blocking, numEventsInWaitList, ptr, size);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -446,6 +446,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies);
|
||||
void processProperties(const cl_queue_properties *properties);
|
||||
bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList);
|
||||
|
||||
Context *context = nullptr;
|
||||
Device *device = nullptr;
|
||||
|
||||
@@ -367,6 +367,15 @@ class CommandQueueHw : public CommandQueue {
|
||||
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
|
||||
size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image);
|
||||
|
||||
cl_int enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer,
|
||||
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event);
|
||||
cl_int enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer,
|
||||
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event);
|
||||
cl_int enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event);
|
||||
|
||||
private:
|
||||
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
|
||||
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType) override;
|
||||
|
||||
@@ -42,4 +42,61 @@ void CommandQueueHw<Family>::notifyEnqueueReadImage(Image *image, bool blockingR
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
cl_int CommandQueueHw<Family>::enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer,
|
||||
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
|
||||
TransferProperties transferProperties(buffer, commandType, 0, true, &offset, &size, ptr, true);
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
cl_int CommandQueueHw<Family>::enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer,
|
||||
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
|
||||
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, ptr, false);
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(commandType);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
cl_int CommandQueueHw<Family>::enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) {
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
enqueueHandler<CL_COMMAND_MARKER>(
|
||||
surfaces,
|
||||
blocking == CL_TRUE,
|
||||
multiDispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(commandType);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast<cl_mem>(memObj), ptr);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -164,31 +164,23 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
||||
void CommandQueue::providePerformanceHint(TransferProperties &transferProperties) {
|
||||
switch (transferProperties.cmdType) {
|
||||
case CL_COMMAND_MAP_BUFFER:
|
||||
if (!transferProperties.memObj->isMemObjZeroCopy()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
||||
break;
|
||||
}
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
||||
break;
|
||||
case CL_COMMAND_MAP_IMAGE:
|
||||
if (!transferProperties.memObj->isMemObjZeroCopy()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
||||
break;
|
||||
}
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
||||
context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, !transferProperties.memObj->isMemObjZeroCopy(),
|
||||
static_cast<cl_mem>(transferProperties.memObj));
|
||||
break;
|
||||
case CL_COMMAND_UNMAP_MEM_OBJECT:
|
||||
if (!transferProperties.memObj->isMemObjZeroCopy()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, transferProperties.ptr, static_cast<cl_mem>(transferProperties.memObj));
|
||||
context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, true,
|
||||
transferProperties.ptr, static_cast<cl_mem>(transferProperties.memObj));
|
||||
break;
|
||||
}
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, transferProperties.ptr);
|
||||
context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, false, transferProperties.ptr);
|
||||
break;
|
||||
case CL_COMMAND_READ_BUFFER:
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj), transferProperties.ptr);
|
||||
break;
|
||||
case CL_COMMAND_WRITE_BUFFER:
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj), transferProperties.ptr);
|
||||
context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, true,
|
||||
static_cast<cl_mem>(transferProperties.memObj), transferProperties.ptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -37,55 +37,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
notifyEnqueueReadBuffer(buffer, !!blockingRead);
|
||||
}
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true;
|
||||
if ((DebugManager.flags.DoCpuCopyOnReadBuffer.get() && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList) &&
|
||||
buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) ||
|
||||
buffer->isReadWriteOnCpuAllowed(blockingRead, numEventsInWaitList, ptr, size)) {
|
||||
if (!isMemTransferNeeded) {
|
||||
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, ptr, false);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_READ_BUFFER);
|
||||
}
|
||||
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_READ_BUFFER, blockingRead, size, ptr,
|
||||
numEventsInWaitList, eventWaitList);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
return retVal;
|
||||
if (isCpuCopyAllowed) {
|
||||
if (isMemTransferNeeded) {
|
||||
return enqueueReadWriteBufferOnCpuWithMemoryTransfer(CL_COMMAND_READ_BUFFER, buffer, offset, size, ptr,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
} else {
|
||||
return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(CL_COMMAND_READ_BUFFER, buffer, offset, size, ptr,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
TransferProperties transferProperties(buffer, CL_COMMAND_READ_BUFFER, 0, true, &offset, &size, ptr, true);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
return retVal;
|
||||
} else if (!isMemTransferNeeded) {
|
||||
return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER, blockingRead,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
if (!isMemTransferNeeded) {
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
enqueueHandler<CL_COMMAND_MARKER>(
|
||||
surfaces,
|
||||
blockingRead == CL_TRUE,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_READ_BUFFER);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
||||
this->getContext(), this->getDevice());
|
||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
|
||||
void *dstPtr = ptr;
|
||||
|
||||
MemObjSurface bufferSurf(buffer);
|
||||
@@ -121,7 +94,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
builder.buildDispatchInfos(dispatchInfo, dc);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER, true, static_cast<cl_mem>(buffer), ptr);
|
||||
if (!isL3Capable(ptr, size)) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
}
|
||||
|
||||
@@ -43,25 +43,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_READ_BUFFER_RECT);
|
||||
}
|
||||
if (!isMemTransferNeeded) {
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
enqueueHandler<CL_COMMAND_MARKER>(
|
||||
surfaces,
|
||||
blockingRead == CL_TRUE,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_READ_BUFFER_RECT);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER_RECT, blockingRead,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect,
|
||||
this->getContext(), this->getDevice());
|
||||
@@ -109,7 +92,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
event);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast<cl_mem>(buffer), ptr);
|
||||
if (!isL3Capable(ptr, hostPtrSize)) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, hostPtrSize, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
}
|
||||
|
||||
@@ -53,25 +53,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
isMemTransferNeeded = srcImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, CL_COMMAND_READ_IMAGE);
|
||||
}
|
||||
if (!isMemTransferNeeded) {
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
enqueueHandler<CL_COMMAND_MARKER>(
|
||||
surfaces,
|
||||
blockingRead == CL_TRUE,
|
||||
di,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_READ_IMAGE);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA, static_cast<cl_mem>(srcImage));
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
return enqueueMarkerForReadWriteOperation(srcImage, ptr, CL_COMMAND_READ_IMAGE, blockingRead,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer,
|
||||
|
||||
@@ -32,58 +32,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true;
|
||||
if ((DebugManager.flags.DoCpuCopyOnWriteBuffer.get() && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList) &&
|
||||
buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) ||
|
||||
buffer->isReadWriteOnCpuAllowed(blockingWrite, numEventsInWaitList, const_cast<void *>(ptr), size)) {
|
||||
if (!isMemTransferNeeded) {
|
||||
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, const_cast<void *>(ptr), false);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_WRITE_BUFFER) : true;
|
||||
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_WRITE_BUFFER, blockingWrite, size, const_cast<void *>(ptr),
|
||||
numEventsInWaitList, eventWaitList);
|
||||
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
return retVal;
|
||||
if (isCpuCopyAllowed) {
|
||||
if (isMemTransferNeeded) {
|
||||
return enqueueReadWriteBufferOnCpuWithMemoryTransfer(CL_COMMAND_WRITE_BUFFER, buffer, offset, size, const_cast<void *>(ptr),
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
} else {
|
||||
return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(CL_COMMAND_WRITE_BUFFER, buffer, offset, size, const_cast<void *>(ptr),
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
TransferProperties transferProperties(buffer, CL_COMMAND_WRITE_BUFFER, 0, true, &offset, &size, const_cast<void *>(ptr), true);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
return retVal;
|
||||
} else if (!isMemTransferNeeded) {
|
||||
return enqueueMarkerForReadWriteOperation(buffer, const_cast<void *>(ptr), CL_COMMAND_WRITE_BUFFER, blockingWrite,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
if (!isMemTransferNeeded) {
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
enqueueHandler<CL_COMMAND_MARKER>(
|
||||
surfaces,
|
||||
blockingWrite == CL_TRUE,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
||||
this->getContext(), this->getDevice());
|
||||
|
||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
|
||||
void *srcPtr = const_cast<void *>(ptr);
|
||||
|
||||
|
||||
@@ -42,25 +42,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_WRITE_BUFFER_RECT);
|
||||
}
|
||||
if (!isMemTransferNeeded) {
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
enqueueHandler<CL_COMMAND_MARKER>(
|
||||
surfaces,
|
||||
blockingWrite == CL_TRUE,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER_RECT);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
return enqueueMarkerForReadWriteOperation(buffer, const_cast<void *>(ptr), CL_COMMAND_WRITE_BUFFER_RECT, blockingWrite,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect,
|
||||
this->getContext(), this->getDevice());
|
||||
|
||||
@@ -45,25 +45,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
isMemTransferNeeded = dstImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, CL_COMMAND_WRITE_IMAGE);
|
||||
}
|
||||
if (!isMemTransferNeeded) {
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
enqueueHandler<CL_COMMAND_MARKER>(
|
||||
surfaces,
|
||||
blockingWrite == CL_TRUE,
|
||||
di,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_WRITE_IMAGE);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage));
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
return enqueueMarkerForReadWriteOperation(dstImage, const_cast<void *>(ptr), CL_COMMAND_WRITE_IMAGE, blockingWrite,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d,
|
||||
this->getContext(), this->getDevice());
|
||||
|
||||
@@ -107,6 +107,15 @@ class Context : public BaseObject<_cl_context> {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void providePerformanceHintForMemoryTransfer(cl_command_type commandType, bool transferRequired, Args &&... args) {
|
||||
cl_diagnostics_verbose_level verboseLevel = transferRequired ? CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL
|
||||
: CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL;
|
||||
PerformanceHints hint = driverDiagnostics->obtainHintForTransferOperation(commandType, transferRequired);
|
||||
|
||||
providePerformanceHint(verboseLevel, hint, args...);
|
||||
}
|
||||
|
||||
cl_bool isProvidingPerformanceHints() const {
|
||||
return driverDiagnostics != nullptr;
|
||||
}
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
|
||||
#include "driver_diagnostics.h"
|
||||
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
DriverDiagnostics::DriverDiagnostics(cl_diagnostics_verbose_level level) {
|
||||
@@ -56,4 +58,41 @@ const char *DriverDiagnostics::hintFormat[] = {
|
||||
"Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY
|
||||
"Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"" //KERNEL_ARGUMENT_AUX_TRANSLATION
|
||||
};
|
||||
|
||||
PerformanceHints DriverDiagnostics::obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired) {
|
||||
PerformanceHints hint;
|
||||
switch (commandType) {
|
||||
case CL_COMMAND_MAP_BUFFER:
|
||||
hint = transferRequired ? CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA;
|
||||
break;
|
||||
case CL_COMMAND_MAP_IMAGE:
|
||||
hint = transferRequired ? CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA : CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA;
|
||||
break;
|
||||
case CL_COMMAND_UNMAP_MEM_OBJECT:
|
||||
hint = transferRequired ? CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA : CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA;
|
||||
break;
|
||||
case CL_COMMAND_WRITE_BUFFER:
|
||||
hint = transferRequired ? CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA;
|
||||
break;
|
||||
case CL_COMMAND_READ_BUFFER:
|
||||
hint = transferRequired ? CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA;
|
||||
break;
|
||||
case CL_COMMAND_WRITE_BUFFER_RECT:
|
||||
hint = transferRequired ? CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA;
|
||||
break;
|
||||
case CL_COMMAND_READ_BUFFER_RECT:
|
||||
hint = transferRequired ? CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA : CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA;
|
||||
break;
|
||||
case CL_COMMAND_WRITE_IMAGE:
|
||||
hint = transferRequired ? CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA;
|
||||
break;
|
||||
case CL_COMMAND_READ_IMAGE:
|
||||
UNRECOVERABLE_IF(transferRequired)
|
||||
hint = CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA;
|
||||
break;
|
||||
default:
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
return hint;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -57,6 +57,7 @@ class DriverDiagnostics {
|
||||
~DriverDiagnostics() = default;
|
||||
static const char *hintFormat[];
|
||||
static const cl_int maxHintStringSize = 1024;
|
||||
PerformanceHints obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired);
|
||||
|
||||
protected:
|
||||
cl_diagnostics_verbose_level verboseLevel;
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
|
||||
#include <tuple>
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
bool containsHint(const char *providedHint, char *userData) {
|
||||
@@ -521,3 +523,42 @@ INSTANTIATE_TEST_CASE_P(
|
||||
TEST(PerformanceHintsDebugVariables, givenDefaultDebugManagerWhenPrintDriverDiagnosticsIsCalledThenMinusOneIsReturned) {
|
||||
EXPECT_EQ(-1, DebugManager.flags.PrintDriverDiagnostics.get());
|
||||
}
|
||||
|
||||
TEST(PerformanceHintsTransferTest, givenCommandTypeAndMemoryTransferRequiredWhenAskingForHintThenReturnCorrectValue) {
|
||||
DriverDiagnostics driverDiagnostics(0);
|
||||
const uint32_t numHints = 8;
|
||||
std::tuple<uint32_t, PerformanceHints, PerformanceHints> commandHints[numHints] = {
|
||||
// commandType, transfer required, transfer not required
|
||||
std::make_tuple(CL_COMMAND_MAP_BUFFER, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA),
|
||||
std::make_tuple(CL_COMMAND_MAP_IMAGE, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA),
|
||||
std::make_tuple(CL_COMMAND_UNMAP_MEM_OBJECT, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA),
|
||||
std::make_tuple(CL_COMMAND_WRITE_BUFFER, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA),
|
||||
std::make_tuple(CL_COMMAND_READ_BUFFER, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA),
|
||||
std::make_tuple(CL_COMMAND_WRITE_BUFFER_RECT, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA),
|
||||
std::make_tuple(CL_COMMAND_READ_BUFFER_RECT, CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA),
|
||||
std::make_tuple(CL_COMMAND_WRITE_IMAGE, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA),
|
||||
};
|
||||
|
||||
for (uint32_t i = 0; i < numHints; i++) {
|
||||
auto hintWithTransferRequired = driverDiagnostics.obtainHintForTransferOperation(std::get<0>(commandHints[i]), true);
|
||||
auto hintWithoutTransferRequired = driverDiagnostics.obtainHintForTransferOperation(std::get<0>(commandHints[i]), false);
|
||||
|
||||
EXPECT_EQ(std::get<1>(commandHints[i]), hintWithTransferRequired);
|
||||
EXPECT_EQ(std::get<2>(commandHints[i]), hintWithoutTransferRequired);
|
||||
}
|
||||
|
||||
EXPECT_THROW(driverDiagnostics.obtainHintForTransferOperation(CL_COMMAND_READ_IMAGE, true), std::exception); // no hint for this scenario
|
||||
EXPECT_EQ(CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA,
|
||||
driverDiagnostics.obtainHintForTransferOperation(CL_COMMAND_READ_IMAGE, false));
|
||||
}
|
||||
|
||||
TEST_F(DriverDiagnosticsTest, givenInvalidCommandTypeWhenAskingForZeroCopyOperatonThenAbort) {
|
||||
cl_device_id deviceId = devices[0];
|
||||
cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0};
|
||||
auto context = std::unique_ptr<MockContext>(Context::create<MockContext>(validProperties, DeviceVector(&deviceId, 1),
|
||||
callbackFunction, (void *)userData, retVal));
|
||||
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
|
||||
auto address = reinterpret_cast<void *>(0x12345);
|
||||
EXPECT_THROW(context->providePerformanceHintForMemoryTransfer(CL_COMMAND_BARRIER, true, buffer.get(), address), std::exception);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user