From 4b592b25205e8e9b8110b41d69aeb75cbbb9fe8f Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Tue, 18 Jun 2019 11:02:47 +0200 Subject: [PATCH] Don't take BCS path when Queue is blocked Change-Id: Ie6faa276ce1173ce075693bc81d1e91d1ace27fc Signed-off-by: Dunajski, Bartosz Related-To: NEO-3020 --- runtime/command_queue/command_queue.cpp | 12 ++++++++++ runtime/command_queue/command_queue.h | 3 ++- runtime/command_queue/command_queue_hw.h | 2 +- .../cpu_data_transfer_handler.cpp | 2 +- runtime/command_queue/enqueue_common.h | 10 ++++----- runtime/command_queue/enqueue_read_buffer.h | 17 +++++++------- runtime/command_queue/enqueue_write_buffer.h | 17 +++++++------- unit_tests/mem_obj/buffer_tests.cpp | 22 +++++++++++++++++++ 8 files changed, 59 insertions(+), 26 deletions(-) diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp index 7f374b8aeb..41817425d3 100644 --- a/runtime/command_queue/command_queue.cpp +++ b/runtime/command_queue/command_queue.cpp @@ -607,4 +607,16 @@ cl_int CommandQueue::enqueueReadWriteBufferWithBlitTransfer(cl_command_type comm bool CommandQueue::queueDependenciesClearRequired() const { return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get(); } + +bool CommandQueue::blitEnqueueAllowed(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_command_type cmdType) { + bool blitAllowed = device->getExecutionEnvironment()->getHardwareInfo()->capabilityTable.blitterOperationsSupported && + DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.get(); + + bool queueBlocked = false; + uint32_t calculatedTaskLevel = 0; + + obtainTaskLevelAndBlockedStatus(calculatedTaskLevel, numEventsInWaitList, eventWaitList, queueBlocked, cmdType, false); + + return blitAllowed && !queueBlocked; +} } // namespace NEO diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h index 76823be796..8846be7886 100644 --- a/runtime/command_queue/command_queue.h +++ b/runtime/command_queue/command_queue.h @@ -443,7 +443,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); - virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){}; + virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType, bool updateQueueTaskLevel){}; MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation, AuxTranslationDirection auxTranslationDirection); @@ -454,6 +454,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { cl_uint numEventsInWaitList, const cl_event *eventWaitList); void providePerformanceHint(TransferProperties &transferProperties); bool queueDependenciesClearRequired() const; + bool blitEnqueueAllowed(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_command_type cmdType); Context *context = nullptr; Device *device = nullptr; diff --git a/runtime/command_queue/command_queue_hw.h b/runtime/command_queue/command_queue_hw.h index 37284454ca..41f26b3460 100644 --- a/runtime/command_queue/command_queue_hw.h +++ b/runtime/command_queue/command_queue_hw.h @@ -378,7 +378,7 @@ class CommandQueueHw : public CommandQueue { private: bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType); - void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType) override; + void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType, bool updateQueueTaskLevel) override; void forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo); static void computeOffsetsValueForRectCommands(size_t *bufferOffset, size_t *hostOffset, diff --git a/runtime/command_queue/cpu_data_transfer_handler.cpp b/runtime/command_queue/cpu_data_transfer_handler.cpp index a7123b8a3b..7be470544b 100644 --- a/runtime/command_queue/cpu_data_transfer_handler.cpp +++ b/runtime/command_queue/cpu_data_transfer_handler.cpp @@ -56,7 +56,7 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie auto blockQueue = false; auto taskLevel = 0u; - obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, transferProperties.cmdType); + obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, transferProperties.cmdType, true); DBG_LOG(LogTaskCounts, __FUNCTION__, "taskLevel", taskLevel); diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 916cfc2f43..5220437d24 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -173,7 +173,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, auto blockQueue = false; auto taskLevel = 0u; - obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType); + obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType, true); DBG_LOG(EventsDebugEnable, "blockQueue", blockQueue, "virtualEvent", virtualEvent, "taskLevel", taskLevel); @@ -510,13 +510,13 @@ void CommandQueueHw::processDeviceEnqueue(Kernel *parentKernel, } template -void CommandQueueHw::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType) { +void CommandQueueHw::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType, bool updateQueueTaskLevel) { auto isQueueBlockedStatus = isQueueBlocked(); taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList); - blockQueue = (taskLevel == Event::eventNotReady) || isQueueBlockedStatus; + blockQueueStatus = (taskLevel == Event::eventNotReady) || isQueueBlockedStatus; - auto updateTaskLevel = isTaskLevelUpdateRequired(taskLevel, eventWaitList, numEventsInWaitList, commandType); - if (updateTaskLevel) { + auto taskLevelUpdateRequired = isTaskLevelUpdateRequired(taskLevel, eventWaitList, numEventsInWaitList, commandType); + if (updateQueueTaskLevel && taskLevelUpdateRequired) { taskLevel++; this->taskLevel = taskLevel; } diff --git a/runtime/command_queue/enqueue_read_buffer.h b/runtime/command_queue/enqueue_read_buffer.h index 4874f7f7c2..e32ebca6a6 100644 --- a/runtime/command_queue/enqueue_read_buffer.h +++ b/runtime/command_queue/enqueue_read_buffer.h @@ -37,25 +37,24 @@ cl_int CommandQueueHw::enqueueReadBuffer( notifyEnqueueReadBuffer(buffer, !!blockingRead); } - bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true; - bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_READ_BUFFER, blockingRead, size, ptr, + const cl_command_type cmdType = CL_COMMAND_READ_BUFFER; + bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true; + bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingRead, size, ptr, numEventsInWaitList, eventWaitList); - bool blitOperationsSupported = device->getExecutionEnvironment()->getHardwareInfo()->capabilityTable.blitterOperationsSupported && - DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.get(); if (isCpuCopyAllowed) { if (isMemTransferNeeded) { - return enqueueReadWriteBufferOnCpuWithMemoryTransfer(CL_COMMAND_READ_BUFFER, buffer, offset, size, ptr, + return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, ptr, numEventsInWaitList, eventWaitList, event); } else { - return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(CL_COMMAND_READ_BUFFER, buffer, offset, size, ptr, + return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cmdType, buffer, offset, size, ptr, numEventsInWaitList, eventWaitList, event); } } else if (!isMemTransferNeeded) { - return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER, blockingRead, + return enqueueMarkerForReadWriteOperation(buffer, ptr, cmdType, blockingRead, numEventsInWaitList, eventWaitList, event); - } else if (blitOperationsSupported) { - return enqueueReadWriteBufferWithBlitTransfer(CL_COMMAND_READ_BUFFER, buffer, !!blockingRead, offset, size, ptr, + } else if (blitEnqueueAllowed(numEventsInWaitList, eventWaitList, cmdType)) { + return enqueueReadWriteBufferWithBlitTransfer(cmdType, buffer, !!blockingRead, offset, size, ptr, numEventsInWaitList, eventWaitList, event); } diff --git a/runtime/command_queue/enqueue_write_buffer.h b/runtime/command_queue/enqueue_write_buffer.h index 48e8e628f7..4265fb288b 100644 --- a/runtime/command_queue/enqueue_write_buffer.h +++ b/runtime/command_queue/enqueue_write_buffer.h @@ -32,25 +32,24 @@ cl_int CommandQueueHw::enqueueWriteBuffer( const cl_event *eventWaitList, cl_event *event) { - auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_WRITE_BUFFER) : true; - bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_WRITE_BUFFER, blockingWrite, size, const_cast(ptr), + const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER; + auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true; + bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast(ptr), numEventsInWaitList, eventWaitList); - bool blitOperationsSupported = device->getExecutionEnvironment()->getHardwareInfo()->capabilityTable.blitterOperationsSupported && - DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.get(); if (isCpuCopyAllowed) { if (isMemTransferNeeded) { - return enqueueReadWriteBufferOnCpuWithMemoryTransfer(CL_COMMAND_WRITE_BUFFER, buffer, offset, size, const_cast(ptr), + return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, const_cast(ptr), numEventsInWaitList, eventWaitList, event); } else { - return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(CL_COMMAND_WRITE_BUFFER, buffer, offset, size, const_cast(ptr), + return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cmdType, buffer, offset, size, const_cast(ptr), numEventsInWaitList, eventWaitList, event); } } else if (!isMemTransferNeeded) { - return enqueueMarkerForReadWriteOperation(buffer, const_cast(ptr), CL_COMMAND_WRITE_BUFFER, blockingWrite, + return enqueueMarkerForReadWriteOperation(buffer, const_cast(ptr), cmdType, blockingWrite, numEventsInWaitList, eventWaitList, event); - } else if (blitOperationsSupported) { - return enqueueReadWriteBufferWithBlitTransfer(CL_COMMAND_WRITE_BUFFER, buffer, !!blockingWrite, offset, size, const_cast(ptr), + } else if (blitEnqueueAllowed(numEventsInWaitList, eventWaitList, cmdType)) { + return enqueueReadWriteBufferWithBlitTransfer(cmdType, buffer, !!blockingWrite, offset, size, const_cast(ptr), numEventsInWaitList, eventWaitList, event); } diff --git a/unit_tests/mem_obj/buffer_tests.cpp b/unit_tests/mem_obj/buffer_tests.cpp index ccd7441de9..278446f615 100644 --- a/unit_tests/mem_obj/buffer_tests.cpp +++ b/unit_tests/mem_obj/buffer_tests.cpp @@ -6,6 +6,7 @@ */ #include "runtime/command_queue/command_queue_hw.h" +#include "runtime/event/user_event.h" #include "runtime/gmm_helper/gmm.h" #include "runtime/gmm_helper/gmm_helper.h" #include "runtime/gmm_helper/resource_info.h" @@ -721,6 +722,27 @@ HWTEST_F(BcsBufferTests, givenBcsSupportedWhenEnqueueReadWriteBufferIsCalledThen EXPECT_EQ(2u, bcsCsr->blitBufferCalled); } +HWTEST_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDontTakeBcsPath) { + auto bcsCsr = static_cast *>(bcsMockContext->bcsCsr.get()); + + auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + bufferForBlt->forceDisallowCPUCopy = true; + auto commandQueue = std::unique_ptr(CommandQueue::create(bcsMockContext.get(), device.get(), nullptr, retVal)); + UserEvent userEvent(bcsMockContext.get()); + cl_event waitlist = &userEvent; + + commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr); + commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(0u, bcsCsr->blitBufferCalled); + + userEvent.setStatus(CL_COMPLETE); + + commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(1u, bcsCsr->blitBufferCalled); + commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(2u, bcsCsr->blitBufferCalled); +} + HWTEST_F(BcsBufferTests, givenInputDependenciesWhenEnqueueBlitCalledThenProgramSemaphoresBeforeBlitCommand) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;