From f6bf2c5d0bc2d700db426e9712d19a9448e51c47 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Thu, 30 May 2019 14:36:12 +0200 Subject: [PATCH] Simplify read/write enqueue operations on CPU Change-Id: I7f59b04d484be2699e325d10e16298016231faf2 Signed-off-by: Dunajski, Bartosz --- runtime/command_queue/command_queue.cpp | 11 ++++ runtime/command_queue/command_queue.h | 2 + runtime/command_queue/command_queue_hw.h | 9 +++ .../command_queue/command_queue_hw_base.inl | 57 ++++++++++++++++++ .../cpu_data_transfer_handler.cpp | 24 +++----- runtime/command_queue/enqueue_read_buffer.h | 57 +++++------------- .../command_queue/enqueue_read_buffer_rect.h | 23 +------- runtime/command_queue/enqueue_read_image.h | 21 +------ runtime/command_queue/enqueue_write_buffer.h | 58 +++++-------------- .../command_queue/enqueue_write_buffer_rect.h | 21 +------ runtime/command_queue/enqueue_write_image.h | 21 +------ runtime/context/context.h | 9 +++ runtime/context/driver_diagnostics.cpp | 39 +++++++++++++ runtime/context/driver_diagnostics.h | 1 + .../context/driver_diagnostics_tests.cpp | 41 +++++++++++++ 15 files changed, 215 insertions(+), 179 deletions(-) diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp index 0043d806a9..7d2b4cbc5e 100644 --- a/runtime/command_queue/command_queue.cpp +++ b/runtime/command_queue/command_queue.cpp @@ -572,4 +572,15 @@ size_t CommandQueue::estimateTimestampPacketNodesCount(const MultiDispatchInfo & } return nodesCount; } + +bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, + cl_uint numEventsInWaitList, const cl_event *eventWaitList) { + // Requested by debug variable or allowed by Buffer + bool debugVariableSet = (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get()) || + (CL_COMMAND_WRITE_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnWriteBuffer.get()); + + return (debugVariableSet && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList) && + buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) || + buffer->isReadWriteOnCpuAllowed(blocking, numEventsInWaitList, ptr, size); +} } // namespace NEO diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h index 1d51c9094e..fbe1e2ef84 100644 --- a/runtime/command_queue/command_queue.h +++ b/runtime/command_queue/command_queue.h @@ -446,6 +446,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> { void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies); void processProperties(const cl_queue_properties *properties); + bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, + cl_uint numEventsInWaitList, const cl_event *eventWaitList); Context *context = nullptr; Device *device = nullptr; diff --git a/runtime/command_queue/command_queue_hw.h b/runtime/command_queue/command_queue_hw.h index a081e36e9f..37284454ca 100644 --- a/runtime/command_queue/command_queue_hw.h +++ b/runtime/command_queue/command_queue_hw.h @@ -367,6 +367,15 @@ class CommandQueueHw : public CommandQueue { MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){}; size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image); + cl_int enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer, + size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, + const cl_event *eventWaitList, cl_event *event); + cl_int enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer, + size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, + const cl_event *eventWaitList, cl_event *event); + cl_int enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList, + const cl_event *eventWaitList, cl_event *event); + private: bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType); void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType) override; diff --git a/runtime/command_queue/command_queue_hw_base.inl b/runtime/command_queue/command_queue_hw_base.inl index c7ad13e682..4a5cebc057 100644 --- a/runtime/command_queue/command_queue_hw_base.inl +++ b/runtime/command_queue/command_queue_hw_base.inl @@ -42,4 +42,61 @@ void CommandQueueHw::notifyEnqueueReadImage(Image *image, bool blockingR } } +template +cl_int CommandQueueHw::enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer, + size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, + const cl_event *eventWaitList, cl_event *event) { + cl_int retVal = CL_SUCCESS; + EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); + + TransferProperties transferProperties(buffer, commandType, 0, true, &offset, &size, ptr, true); + cpuDataTransferHandler(transferProperties, eventsRequest, retVal); + return retVal; +} + +template +cl_int CommandQueueHw::enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer, + size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, + const cl_event *eventWaitList, cl_event *event) { + cl_int retVal = CL_SUCCESS; + EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); + + TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, ptr, false); + cpuDataTransferHandler(transferProperties, eventsRequest, retVal); + if (event) { + auto pEvent = castToObjectOrAbort(*event); + pEvent->setCmdType(commandType); + } + + if (context->isProvidingPerformanceHints()) { + context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast(buffer), ptr); + } + return retVal; +} + +template +cl_int CommandQueueHw::enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList, + const cl_event *eventWaitList, cl_event *event) { + MultiDispatchInfo multiDispatchInfo; + NullSurface s; + Surface *surfaces[] = {&s}; + enqueueHandler( + surfaces, + blocking == CL_TRUE, + multiDispatchInfo, + numEventsInWaitList, + eventWaitList, + event); + if (event) { + auto pEvent = castToObjectOrAbort(*event); + pEvent->setCmdType(commandType); + } + + if (context->isProvidingPerformanceHints()) { + context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast(memObj), ptr); + } + + return CL_SUCCESS; +} + } // namespace NEO diff --git a/runtime/command_queue/cpu_data_transfer_handler.cpp b/runtime/command_queue/cpu_data_transfer_handler.cpp index 1c63819a07..a7123b8a3b 100644 --- a/runtime/command_queue/cpu_data_transfer_handler.cpp +++ b/runtime/command_queue/cpu_data_transfer_handler.cpp @@ -164,31 +164,23 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie void CommandQueue::providePerformanceHint(TransferProperties &transferProperties) { switch (transferProperties.cmdType) { case CL_COMMAND_MAP_BUFFER: - if (!transferProperties.memObj->isMemObjZeroCopy()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, static_cast(transferProperties.memObj)); - break; - } - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast(transferProperties.memObj)); - break; case CL_COMMAND_MAP_IMAGE: - if (!transferProperties.memObj->isMemObjZeroCopy()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, static_cast(transferProperties.memObj)); - break; - } - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, static_cast(transferProperties.memObj)); + context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, !transferProperties.memObj->isMemObjZeroCopy(), + static_cast(transferProperties.memObj)); break; case CL_COMMAND_UNMAP_MEM_OBJECT: if (!transferProperties.memObj->isMemObjZeroCopy()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, transferProperties.ptr, static_cast(transferProperties.memObj)); + context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, true, + transferProperties.ptr, static_cast(transferProperties.memObj)); break; } - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, transferProperties.ptr); + context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, false, transferProperties.ptr); break; case CL_COMMAND_READ_BUFFER: - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast(transferProperties.memObj), transferProperties.ptr); - break; case CL_COMMAND_WRITE_BUFFER: - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast(transferProperties.memObj), transferProperties.ptr); + context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, true, + static_cast(transferProperties.memObj), transferProperties.ptr); + break; } } } // namespace NEO diff --git a/runtime/command_queue/enqueue_read_buffer.h b/runtime/command_queue/enqueue_read_buffer.h index 2dfa958048..917db1e0b7 100644 --- a/runtime/command_queue/enqueue_read_buffer.h +++ b/runtime/command_queue/enqueue_read_buffer.h @@ -37,55 +37,28 @@ cl_int CommandQueueHw::enqueueReadBuffer( notifyEnqueueReadBuffer(buffer, !!blockingRead); } - cl_int retVal = CL_SUCCESS; bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true; - if ((DebugManager.flags.DoCpuCopyOnReadBuffer.get() && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList) && - buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) || - buffer->isReadWriteOnCpuAllowed(blockingRead, numEventsInWaitList, ptr, size)) { - if (!isMemTransferNeeded) { - TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, ptr, false); - EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); - cpuDataTransferHandler(transferProperties, eventsRequest, retVal); - if (event) { - auto pEvent = castToObjectOrAbort(*event); - pEvent->setCmdType(CL_COMMAND_READ_BUFFER); - } + bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_READ_BUFFER, blockingRead, size, ptr, + numEventsInWaitList, eventWaitList); - if (context->isProvidingPerformanceHints()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast(buffer), ptr); - } - return retVal; + if (isCpuCopyAllowed) { + if (isMemTransferNeeded) { + return enqueueReadWriteBufferOnCpuWithMemoryTransfer(CL_COMMAND_READ_BUFFER, buffer, offset, size, ptr, + numEventsInWaitList, eventWaitList, event); + } else { + return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(CL_COMMAND_READ_BUFFER, buffer, offset, size, ptr, + numEventsInWaitList, eventWaitList, event); } - TransferProperties transferProperties(buffer, CL_COMMAND_READ_BUFFER, 0, true, &offset, &size, ptr, true); - EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); - cpuDataTransferHandler(transferProperties, eventsRequest, retVal); - return retVal; + } else if (!isMemTransferNeeded) { + return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER, blockingRead, + numEventsInWaitList, eventWaitList, event); } - MultiDispatchInfo dispatchInfo; - if (!isMemTransferNeeded) { - NullSurface s; - Surface *surfaces[] = {&s}; - enqueueHandler( - surfaces, - blockingRead == CL_TRUE, - dispatchInfo, - numEventsInWaitList, - eventWaitList, - event); - if (event) { - auto pEvent = castToObjectOrAbort(*event); - pEvent->setCmdType(CL_COMMAND_READ_BUFFER); - } - if (context->isProvidingPerformanceHints()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast(buffer), ptr); - } - - return CL_SUCCESS; - } auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, this->getContext(), this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); + MultiDispatchInfo dispatchInfo; + void *dstPtr = ptr; MemObjSurface bufferSurf(buffer); @@ -121,7 +94,7 @@ cl_int CommandQueueHw::enqueueReadBuffer( builder.buildDispatchInfos(dispatchInfo, dc); if (context->isProvidingPerformanceHints()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast(buffer), ptr); + context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER, true, static_cast(buffer), ptr); if (!isL3Capable(ptr, size)) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize); } diff --git a/runtime/command_queue/enqueue_read_buffer_rect.h b/runtime/command_queue/enqueue_read_buffer_rect.h index 013e521489..f1a77dbe80 100644 --- a/runtime/command_queue/enqueue_read_buffer_rect.h +++ b/runtime/command_queue/enqueue_read_buffer_rect.h @@ -43,25 +43,8 @@ cl_int CommandQueueHw::enqueueReadBufferRect( isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_READ_BUFFER_RECT); } if (!isMemTransferNeeded) { - NullSurface s; - Surface *surfaces[] = {&s}; - enqueueHandler( - surfaces, - blockingRead == CL_TRUE, - dispatchInfo, - numEventsInWaitList, - eventWaitList, - event); - if (event) { - auto pEvent = castToObjectOrAbort(*event); - pEvent->setCmdType(CL_COMMAND_READ_BUFFER_RECT); - } - - if (context->isProvidingPerformanceHints()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA, static_cast(buffer), ptr); - } - - return CL_SUCCESS; + return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER_RECT, blockingRead, + numEventsInWaitList, eventWaitList, event); } auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect, this->getContext(), this->getDevice()); @@ -109,7 +92,7 @@ cl_int CommandQueueHw::enqueueReadBufferRect( event); if (context->isProvidingPerformanceHints()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast(buffer), ptr); + context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast(buffer), ptr); if (!isL3Capable(ptr, hostPtrSize)) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, hostPtrSize, MemoryConstants::pageSize, MemoryConstants::pageSize); } diff --git a/runtime/command_queue/enqueue_read_image.h b/runtime/command_queue/enqueue_read_image.h index a43156bdf9..5fc368ff4c 100644 --- a/runtime/command_queue/enqueue_read_image.h +++ b/runtime/command_queue/enqueue_read_image.h @@ -53,25 +53,8 @@ cl_int CommandQueueHw::enqueueReadImage( isMemTransferNeeded = srcImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, CL_COMMAND_READ_IMAGE); } if (!isMemTransferNeeded) { - NullSurface s; - Surface *surfaces[] = {&s}; - enqueueHandler( - surfaces, - blockingRead == CL_TRUE, - di, - numEventsInWaitList, - eventWaitList, - event); - if (event) { - auto pEvent = castToObjectOrAbort(*event); - pEvent->setCmdType(CL_COMMAND_READ_IMAGE); - } - - if (context->isProvidingPerformanceHints()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA, static_cast(srcImage)); - } - - return CL_SUCCESS; + return enqueueMarkerForReadWriteOperation(srcImage, ptr, CL_COMMAND_READ_IMAGE, blockingRead, + numEventsInWaitList, eventWaitList, event); } auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, diff --git a/runtime/command_queue/enqueue_write_buffer.h b/runtime/command_queue/enqueue_write_buffer.h index be30bec91b..dec6688a0d 100644 --- a/runtime/command_queue/enqueue_write_buffer.h +++ b/runtime/command_queue/enqueue_write_buffer.h @@ -32,58 +32,28 @@ cl_int CommandQueueHw::enqueueWriteBuffer( const cl_event *eventWaitList, cl_event *event) { - cl_int retVal = CL_SUCCESS; - auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true; - if ((DebugManager.flags.DoCpuCopyOnWriteBuffer.get() && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList) && - buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) || - buffer->isReadWriteOnCpuAllowed(blockingWrite, numEventsInWaitList, const_cast(ptr), size)) { - if (!isMemTransferNeeded) { - TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, const_cast(ptr), false); - EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); - cpuDataTransferHandler(transferProperties, eventsRequest, retVal); + auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_WRITE_BUFFER) : true; + bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_WRITE_BUFFER, blockingWrite, size, const_cast(ptr), + numEventsInWaitList, eventWaitList); - if (event) { - auto pEvent = castToObjectOrAbort(*event); - pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER); - } - - if (context->isProvidingPerformanceHints()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast(buffer), ptr); - } - return retVal; + if (isCpuCopyAllowed) { + if (isMemTransferNeeded) { + return enqueueReadWriteBufferOnCpuWithMemoryTransfer(CL_COMMAND_WRITE_BUFFER, buffer, offset, size, const_cast(ptr), + numEventsInWaitList, eventWaitList, event); + } else { + return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(CL_COMMAND_WRITE_BUFFER, buffer, offset, size, const_cast(ptr), + numEventsInWaitList, eventWaitList, event); } - TransferProperties transferProperties(buffer, CL_COMMAND_WRITE_BUFFER, 0, true, &offset, &size, const_cast(ptr), true); - EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); - cpuDataTransferHandler(transferProperties, eventsRequest, retVal); - return retVal; + } else if (!isMemTransferNeeded) { + return enqueueMarkerForReadWriteOperation(buffer, const_cast(ptr), CL_COMMAND_WRITE_BUFFER, blockingWrite, + numEventsInWaitList, eventWaitList, event); } - MultiDispatchInfo dispatchInfo; - if (!isMemTransferNeeded) { - NullSurface s; - Surface *surfaces[] = {&s}; - enqueueHandler( - surfaces, - blockingWrite == CL_TRUE, - dispatchInfo, - numEventsInWaitList, - eventWaitList, - event); - if (event) { - auto pEvent = castToObjectOrAbort(*event); - pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER); - } - - if (context->isProvidingPerformanceHints()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast(buffer), ptr); - } - - return CL_SUCCESS; - } auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, this->getContext(), this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); + MultiDispatchInfo dispatchInfo; void *srcPtr = const_cast(ptr); diff --git a/runtime/command_queue/enqueue_write_buffer_rect.h b/runtime/command_queue/enqueue_write_buffer_rect.h index 1d520a14a2..c7cfd83a76 100644 --- a/runtime/command_queue/enqueue_write_buffer_rect.h +++ b/runtime/command_queue/enqueue_write_buffer_rect.h @@ -42,25 +42,8 @@ cl_int CommandQueueHw::enqueueWriteBufferRect( isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_WRITE_BUFFER_RECT); } if (!isMemTransferNeeded) { - NullSurface s; - Surface *surfaces[] = {&s}; - enqueueHandler( - surfaces, - blockingWrite == CL_TRUE, - dispatchInfo, - numEventsInWaitList, - eventWaitList, - event); - if (event) { - auto pEvent = castToObjectOrAbort(*event); - pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER_RECT); - } - - if (context->isProvidingPerformanceHints()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA, static_cast(buffer), ptr); - } - - return CL_SUCCESS; + return enqueueMarkerForReadWriteOperation(buffer, const_cast(ptr), CL_COMMAND_WRITE_BUFFER_RECT, blockingWrite, + numEventsInWaitList, eventWaitList, event); } auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect, this->getContext(), this->getDevice()); diff --git a/runtime/command_queue/enqueue_write_image.h b/runtime/command_queue/enqueue_write_image.h index 927fb16544..5f91c09231 100644 --- a/runtime/command_queue/enqueue_write_image.h +++ b/runtime/command_queue/enqueue_write_image.h @@ -45,25 +45,8 @@ cl_int CommandQueueHw::enqueueWriteImage( isMemTransferNeeded = dstImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, CL_COMMAND_WRITE_IMAGE); } if (!isMemTransferNeeded) { - NullSurface s; - Surface *surfaces[] = {&s}; - enqueueHandler( - surfaces, - blockingWrite == CL_TRUE, - di, - numEventsInWaitList, - eventWaitList, - event); - if (event) { - auto pEvent = castToObjectOrAbort(*event); - pEvent->setCmdType(CL_COMMAND_WRITE_IMAGE); - } - - if (context->isProvidingPerformanceHints()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA, static_cast(dstImage)); - } - - return CL_SUCCESS; + return enqueueMarkerForReadWriteOperation(dstImage, const_cast(ptr), CL_COMMAND_WRITE_IMAGE, blockingWrite, + numEventsInWaitList, eventWaitList, event); } auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, this->getContext(), this->getDevice()); diff --git a/runtime/context/context.h b/runtime/context/context.h index ba8f660e5d..c0cc7f2a9d 100644 --- a/runtime/context/context.h +++ b/runtime/context/context.h @@ -107,6 +107,15 @@ class Context : public BaseObject<_cl_context> { } } + template + void providePerformanceHintForMemoryTransfer(cl_command_type commandType, bool transferRequired, Args &&... args) { + cl_diagnostics_verbose_level verboseLevel = transferRequired ? CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL + : CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL; + PerformanceHints hint = driverDiagnostics->obtainHintForTransferOperation(commandType, transferRequired); + + providePerformanceHint(verboseLevel, hint, args...); + } + cl_bool isProvidingPerformanceHints() const { return driverDiagnostics != nullptr; } diff --git a/runtime/context/driver_diagnostics.cpp b/runtime/context/driver_diagnostics.cpp index c37019600e..d136c46de9 100644 --- a/runtime/context/driver_diagnostics.cpp +++ b/runtime/context/driver_diagnostics.cpp @@ -7,6 +7,8 @@ #include "driver_diagnostics.h" +#include "runtime/helpers/debug_helpers.h" + namespace NEO { DriverDiagnostics::DriverDiagnostics(cl_diagnostics_verbose_level level) { @@ -56,4 +58,41 @@ const char *DriverDiagnostics::hintFormat[] = { "Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY "Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"" //KERNEL_ARGUMENT_AUX_TRANSLATION }; + +PerformanceHints DriverDiagnostics::obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired) { + PerformanceHints hint; + switch (commandType) { + case CL_COMMAND_MAP_BUFFER: + hint = transferRequired ? CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA; + break; + case CL_COMMAND_MAP_IMAGE: + hint = transferRequired ? CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA : CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA; + break; + case CL_COMMAND_UNMAP_MEM_OBJECT: + hint = transferRequired ? CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA : CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA; + break; + case CL_COMMAND_WRITE_BUFFER: + hint = transferRequired ? CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA; + break; + case CL_COMMAND_READ_BUFFER: + hint = transferRequired ? CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA; + break; + case CL_COMMAND_WRITE_BUFFER_RECT: + hint = transferRequired ? CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA; + break; + case CL_COMMAND_READ_BUFFER_RECT: + hint = transferRequired ? CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA : CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA; + break; + case CL_COMMAND_WRITE_IMAGE: + hint = transferRequired ? CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA; + break; + case CL_COMMAND_READ_IMAGE: + UNRECOVERABLE_IF(transferRequired) + hint = CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA; + break; + default: + UNRECOVERABLE_IF(true); + } + return hint; +} } // namespace NEO diff --git a/runtime/context/driver_diagnostics.h b/runtime/context/driver_diagnostics.h index 70f905ae49..f7457f8af7 100644 --- a/runtime/context/driver_diagnostics.h +++ b/runtime/context/driver_diagnostics.h @@ -57,6 +57,7 @@ class DriverDiagnostics { ~DriverDiagnostics() = default; static const char *hintFormat[]; static const cl_int maxHintStringSize = 1024; + PerformanceHints obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired); protected: cl_diagnostics_verbose_level verboseLevel; diff --git a/unit_tests/context/driver_diagnostics_tests.cpp b/unit_tests/context/driver_diagnostics_tests.cpp index 0a779b49cd..51c11b5add 100644 --- a/unit_tests/context/driver_diagnostics_tests.cpp +++ b/unit_tests/context/driver_diagnostics_tests.cpp @@ -9,6 +9,8 @@ #include "unit_tests/helpers/debug_manager_state_restore.h" +#include + using namespace NEO; bool containsHint(const char *providedHint, char *userData) { @@ -521,3 +523,42 @@ INSTANTIATE_TEST_CASE_P( TEST(PerformanceHintsDebugVariables, givenDefaultDebugManagerWhenPrintDriverDiagnosticsIsCalledThenMinusOneIsReturned) { EXPECT_EQ(-1, DebugManager.flags.PrintDriverDiagnostics.get()); } + +TEST(PerformanceHintsTransferTest, givenCommandTypeAndMemoryTransferRequiredWhenAskingForHintThenReturnCorrectValue) { + DriverDiagnostics driverDiagnostics(0); + const uint32_t numHints = 8; + std::tuple commandHints[numHints] = { + // commandType, transfer required, transfer not required + std::make_tuple(CL_COMMAND_MAP_BUFFER, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA), + std::make_tuple(CL_COMMAND_MAP_IMAGE, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA), + std::make_tuple(CL_COMMAND_UNMAP_MEM_OBJECT, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA), + std::make_tuple(CL_COMMAND_WRITE_BUFFER, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA), + std::make_tuple(CL_COMMAND_READ_BUFFER, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA), + std::make_tuple(CL_COMMAND_WRITE_BUFFER_RECT, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA), + std::make_tuple(CL_COMMAND_READ_BUFFER_RECT, CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA), + std::make_tuple(CL_COMMAND_WRITE_IMAGE, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA), + }; + + for (uint32_t i = 0; i < numHints; i++) { + auto hintWithTransferRequired = driverDiagnostics.obtainHintForTransferOperation(std::get<0>(commandHints[i]), true); + auto hintWithoutTransferRequired = driverDiagnostics.obtainHintForTransferOperation(std::get<0>(commandHints[i]), false); + + EXPECT_EQ(std::get<1>(commandHints[i]), hintWithTransferRequired); + EXPECT_EQ(std::get<2>(commandHints[i]), hintWithoutTransferRequired); + } + + EXPECT_THROW(driverDiagnostics.obtainHintForTransferOperation(CL_COMMAND_READ_IMAGE, true), std::exception); // no hint for this scenario + EXPECT_EQ(CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA, + driverDiagnostics.obtainHintForTransferOperation(CL_COMMAND_READ_IMAGE, false)); +} + +TEST_F(DriverDiagnosticsTest, givenInvalidCommandTypeWhenAskingForZeroCopyOperatonThenAbort) { + cl_device_id deviceId = devices[0]; + cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; + auto context = std::unique_ptr(Context::create(validProperties, DeviceVector(&deviceId, 1), + callbackFunction, (void *)userData, retVal)); + + auto buffer = std::unique_ptr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + auto address = reinterpret_cast(0x12345); + EXPECT_THROW(context->providePerformanceHintForMemoryTransfer(CL_COMMAND_BARRIER, true, buffer.get(), address), std::exception); +}