diff --git a/runtime/command_queue/enqueue_read_buffer.h b/runtime/command_queue/enqueue_read_buffer.h index 0f8b34acb0..878db448f1 100644 --- a/runtime/command_queue/enqueue_read_buffer.h +++ b/runtime/command_queue/enqueue_read_buffer.h @@ -34,7 +34,7 @@ cl_int CommandQueueHw::enqueueReadBuffer( cl_int retVal = CL_SUCCESS; bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true; - if ((DebugManager.flags.DoCpuCopyOnReadBuffer.get() || + if (((DebugManager.flags.DoCpuCopyOnReadBuffer.get() && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList)) || buffer->isReadWriteOnCpuAllowed(blockingRead, numEventsInWaitList, ptr, size)) && context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) { if (!isMemTransferNeeded) { diff --git a/runtime/command_queue/enqueue_write_buffer.h b/runtime/command_queue/enqueue_write_buffer.h index e51a2c375d..d2e89d740d 100644 --- a/runtime/command_queue/enqueue_write_buffer.h +++ b/runtime/command_queue/enqueue_write_buffer.h @@ -31,7 +31,7 @@ cl_int CommandQueueHw::enqueueWriteBuffer( cl_int retVal = CL_SUCCESS; auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true; - if ((DebugManager.flags.DoCpuCopyOnWriteBuffer.get() || + if (((DebugManager.flags.DoCpuCopyOnWriteBuffer.get() && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList)) || buffer->isReadWriteOnCpuAllowed(blockingWrite, numEventsInWaitList, const_cast(ptr), size)) && context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) { if (!isMemTransferNeeded) { diff --git a/runtime/event/event.cpp b/runtime/event/event.cpp index c96ad3faf2..d86e639807 100644 --- a/runtime/event/event.cpp +++ b/runtime/event/event.cpp @@ -715,4 +715,18 @@ void Event::addTimestampPacketNodes(const TimestampPacketContainer &inputTimesta } TimestampPacketContainer *Event::getTimestampPacketNodes() const { return timestampPacketContainer.get(); } + +bool Event::checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList) { + bool userEventsDependencies = false; + + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + auto event = castToObjectOrAbort(eventWaitList[i]); + if (!event->isReadyForSubmission()) { + userEventsDependencies = true; + break; + } + } + return userEventsDependencies; +} + } // namespace OCLRT diff --git a/runtime/event/event.h b/runtime/event/event.h index a0c35af1c5..cb9fa2452b 100644 --- a/runtime/event/event.h +++ b/runtime/event/event.h @@ -306,6 +306,8 @@ class Event : public BaseObject<_cl_event>, public IDNode { return false; } + static bool checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList); + protected: Event(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount); diff --git a/unit_tests/command_queue/enqueue_read_buffer_event_tests.cpp b/unit_tests/command_queue/enqueue_read_buffer_event_tests.cpp index 497146c426..4e9b878dd1 100644 --- a/unit_tests/command_queue/enqueue_read_buffer_event_tests.cpp +++ b/unit_tests/command_queue/enqueue_read_buffer_event_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -94,7 +94,7 @@ TEST_F(EnqueueReadBuffer, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) { delete pEvent; } -TEST_F(EnqueueReadBuffer, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { +TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(true); cl_int retVal = CL_SUCCESS; @@ -134,7 +134,40 @@ TEST_F(EnqueueReadBuffer, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEq pEvent->release(); } -TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { + +TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.DoCpuCopyOnReadBuffer.set(true); + cl_int retVal = CL_SUCCESS; + uint32_t taskLevelCmdQ = 17; + pCmdQ->taskLevel = taskLevelCmdQ; + + cl_bool blockingRead = CL_TRUE; + size_t size = sizeof(cl_float); + + cl_event event = nullptr; + auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); + void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); + retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), + blockingRead, + 0, + size, + ptr, + 0, + nullptr, + &event); + + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(nullptr, event); + + auto pEvent = (Event *)event; + EXPECT_EQ(17u, pEvent->taskLevel); + EXPECT_EQ(17u, pCmdQ->taskLevel); + + pEvent->release(); +} + +TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(true); std::unique_ptr pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); @@ -175,6 +208,46 @@ TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndEnabledSupportCpuCopiesAndDstPt pEvent->release(); } + +TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndEventNotReadyWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.DoCpuCopyOnReadBuffer.set(true); + cl_int retVal = CL_SUCCESS; + uint32_t taskLevelCmdQ = 17; + pCmdQ->taskLevel = taskLevelCmdQ; + + Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, Event::eventNotReady, 4); + + cl_bool blockingRead = CL_FALSE; + size_t size = sizeof(cl_float); + cl_event eventWaitList[] = {&event1}; + cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); + cl_event event = nullptr; + auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); + cl_float mem[4]; + + retVal = pCmdQ->enqueueReadBuffer(dstBuffer.get(), + blockingRead, + 0, + size, + mem, + numEventsInWaitList, + eventWaitList, + &event); + + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(nullptr, event); + + auto pEvent = (Event *)event; + EXPECT_EQ(Event::eventNotReady, pEvent->taskLevel); + EXPECT_EQ(Event::eventNotReady, pCmdQ->taskLevel); + event1.taskLevel = 20; + event1.setStatus(CL_COMPLETE); + pEvent->updateExecutionStatus(); + pCmdQ->isQueueBlocked(); + pEvent->release(); +} + TEST_F(EnqueueReadBuffer, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(false); diff --git a/unit_tests/command_queue/enqueue_write_buffer_event_tests.cpp b/unit_tests/command_queue/enqueue_write_buffer_event_tests.cpp index 734898212a..98ae8015bf 100644 --- a/unit_tests/command_queue/enqueue_write_buffer_event_tests.cpp +++ b/unit_tests/command_queue/enqueue_write_buffer_event_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -95,7 +95,7 @@ TEST_F(EnqueueWriteBufferTypeTest, eventReturnedShouldBeMaxOfInputEventsAndCmdQP delete pEvent; } -TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { +TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true); cl_int retVal = CL_SUCCESS; @@ -135,7 +135,78 @@ TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAn pEvent->release(); } -TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { + +TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBufferAndEventNotReadyWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true); + cl_int retVal = CL_SUCCESS; + uint32_t taskLevelCmdQ = 17; + pCmdQ->taskLevel = taskLevelCmdQ; + + Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, Event::eventNotReady, 4); + + cl_bool blockingWrite = CL_FALSE; + size_t size = sizeof(cl_float); + cl_event eventWaitList[] = {&event1}; + cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); + cl_event event = nullptr; + auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); + cl_float mem[4]; + + retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), + blockingWrite, + 0, + size, + mem, + numEventsInWaitList, + eventWaitList, + &event); + + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(nullptr, event); + + auto pEvent = (Event *)event; + EXPECT_EQ(Event::eventNotReady, pEvent->taskLevel); + EXPECT_EQ(Event::eventNotReady, pCmdQ->taskLevel); + event1.taskLevel = 20; + event1.setStatus(CL_COMPLETE); + pEvent->updateExecutionStatus(); + pCmdQ->isQueueBlocked(); + pEvent->release(); +} + +TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true); + cl_int retVal = CL_SUCCESS; + uint32_t taskLevelCmdQ = 17; + pCmdQ->taskLevel = taskLevelCmdQ; + + cl_bool blockingRead = CL_TRUE; + size_t size = sizeof(cl_float); + cl_event event = nullptr; + auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); + void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); + retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), + blockingRead, + 0, + size, + ptr, + 0, + nullptr, + &event); + + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(nullptr, event); + + auto pEvent = (Event *)event; + EXPECT_EQ(17u, pEvent->taskLevel); + EXPECT_EQ(17u, pCmdQ->taskLevel); + + pEvent->release(); +} + +TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true); std::unique_ptr pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); diff --git a/unit_tests/event/event_tests.cpp b/unit_tests/event/event_tests.cpp index 5dc2eb0ab2..cc15180cbe 100644 --- a/unit_tests/event/event_tests.cpp +++ b/unit_tests/event/event_tests.cpp @@ -213,6 +213,22 @@ TEST(Event, waitForEventsWithNotReadyEventDoesNotFlushQueue) { EXPECT_EQ(0u, cmdQ1->flushCounter); } +TEST(Event, givenNotReadyEventOnWaitlistWhenCheckingUserEventDependeciesThenTrueIsReturned) { + auto event1 = std::make_unique(nullptr, CL_COMMAND_NDRANGE_KERNEL, Event::eventNotReady, 0); + cl_event eventWaitlist[] = {event1.get()}; + + bool userEventDependencies = Event::checkUserEventDependencies(1, eventWaitlist); + EXPECT_TRUE(userEventDependencies); +} + +TEST(Event, givenReadyEventsOnWaitlistWhenCheckingUserEventDependeciesThenFalseIsReturned) { + auto event1 = std::make_unique(nullptr, CL_COMMAND_NDRANGE_KERNEL, 5, 0); + cl_event eventWaitlist[] = {event1.get()}; + + bool userEventDependencies = Event::checkUserEventDependencies(1, eventWaitlist); + EXPECT_FALSE(userEventDependencies); +} + TEST_F(EventTest, GetEventInfo_CL_EVENT_COMMAND_EXECUTION_STATUS_sizeReturned) { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 5); cl_int eventStatus = -1;