diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 57ecd01781..e53a4e4809 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -235,6 +235,8 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer); } + bool flushDependenciesForNonKernelCommand = false; + if (blitEnqueue) { processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest, commandStream, commandType, blocking); } else if (multiDispatchInfo.empty() == false) { @@ -247,14 +249,19 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (CL_COMMAND_BARRIER == commandType) { getCommandStreamReceiver().requestStallingPipeControlOnNextFlush(); } - if (eventBuilder.getEvent()) { - for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) { - auto waitlistEvent = castToObjectOrAbort(eventsRequest.eventWaitList[i]); - if (waitlistEvent->getTimestampPacketNodes()) { + + for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) { + auto waitlistEvent = castToObjectOrAbort(eventsRequest.eventWaitList[i]); + if (waitlistEvent->getTimestampPacketNodes()) { + flushDependenciesForNonKernelCommand = true; + if (eventBuilder.getEvent()) { eventBuilder.getEvent()->addTimestampPacketNodes(*waitlistEvent->getTimestampPacketNodes()); } } } + if (flushDependenciesForNonKernelCommand) { + TimestampPacketHelper::programCsrDependencies(commandStream, csrDeps); + } } CompletionStamp completionStamp = {Event::eventNotReady, taskLevel, 0}; @@ -298,7 +305,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, devQueueHw->getDebugQueue()); } } - } else if (isCacheFlushCommand(commandType) || blitEnqueue) { + } else if (isCacheFlushCommand(commandType) || blitEnqueue || flushDependenciesForNonKernelCommand) { completionStamp = enqueueCommandWithoutKernel( surfacesForResidency, numSurfaceForResidency, diff --git a/unit_tests/command_queue/enqueue_barrier_tests.cpp b/unit_tests/command_queue/enqueue_barrier_tests.cpp index 4e07f4deb7..0371551906 100644 --- a/unit_tests/command_queue/enqueue_barrier_tests.cpp +++ b/unit_tests/command_queue/enqueue_barrier_tests.cpp @@ -186,11 +186,16 @@ HWTEST_F(BarrierTest, eventWithWaitDependenciesShouldSync) { &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); - auto pEvent = (Event *)event; + auto pEvent = castToObject(event); + auto &csr = pCmdQ->getCommandStreamReceiver(); // in this case only cmdQ raises the taskLevel why csr stay intact EXPECT_EQ(8u, pCmdQ->taskLevel); - EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel()); + if (csr.peekTimestampPacketWriteEnabled()) { + EXPECT_EQ(8u, commandStreamReceiver.peekTaskLevel()); + } else { + EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel()); + } EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); EXPECT_EQ(8u, pEvent->taskLevel); @@ -215,10 +220,17 @@ HWTEST_F(BarrierTest, givenNotBlockedCommandQueueAndEnqueueBarrierWithWaitlistRe eventWaitList, &event); + auto &csr = pCmdQ->getCommandStreamReceiver(); + EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(latestTaskCountWaitedBeforeEnqueue, this->pCmdQ->latestTaskCountWaited); - auto pEvent = (Event *)event; - EXPECT_EQ(17u, pEvent->peekTaskCount()); + auto pEvent = castToObject(event); + + if (csr.peekTimestampPacketWriteEnabled()) { + EXPECT_EQ(csr.peekTaskCount(), pEvent->peekTaskCount()); + } else { + EXPECT_EQ(17u, pEvent->peekTaskCount()); + } EXPECT_TRUE(pEvent->updateStatusAndCheckCompletion()); delete pEvent; } diff --git a/unit_tests/command_queue/enqueue_handler_tests.cpp b/unit_tests/command_queue/enqueue_handler_tests.cpp index 0364805def..c54e39f3ba 100644 --- a/unit_tests/command_queue/enqueue_handler_tests.cpp +++ b/unit_tests/command_queue/enqueue_handler_tests.cpp @@ -412,9 +412,9 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDu } HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestingEnqueueWithoutGpuSubmissionThenTaskCountIsNotInherited) { - struct ExternallySynchEvent : Event { - ExternallySynchEvent(CommandQueue *cmdQueue) : Event(cmdQueue, CL_COMMAND_MARKER, 0, 0) { - transitionExecutionStatus(CL_COMPLETE); + struct ExternallySynchEvent : VirtualEvent { + ExternallySynchEvent(CommandQueue *cmdQueue) { + setStatus(CL_COMPLETE); this->updateTaskCount(7); } bool isExternallySynchronized() const override { diff --git a/unit_tests/command_queue/enqueue_kernel_event_tests.cpp b/unit_tests/command_queue/enqueue_kernel_event_tests.cpp index 5bfd6aad15..1e811395e6 100644 --- a/unit_tests/command_queue/enqueue_kernel_event_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_event_tests.cpp @@ -207,9 +207,13 @@ TEST_F(EventTests, eventPassedToEnqueueMarkerHasTheSameLevelAsPreviousCommand) { retVal = clEnqueueMarkerWithWaitList(pCmdQ, 1, &event, &event2); - auto pEvent2 = (Event *)event2; + auto pEvent2 = castToObject(event2); - EXPECT_EQ(pEvent2->taskLevel, pEvent->taskLevel); + if (csr.peekTimestampPacketWriteEnabled()) { + EXPECT_EQ(pEvent2->taskLevel, pEvent->taskLevel + 1); + } else { + EXPECT_EQ(pEvent2->taskLevel, pEvent->taskLevel); + } ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event2); @@ -217,7 +221,11 @@ TEST_F(EventTests, eventPassedToEnqueueMarkerHasTheSameLevelAsPreviousCommand) { retVal = clWaitForEvents(1, &event2); ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(csr.peekTaskLevel(), pEvent2->taskLevel + 1); + if (csr.peekTimestampPacketWriteEnabled()) { + EXPECT_EQ(csr.peekTaskLevel(), pEvent2->taskLevel); + } else { + EXPECT_EQ(csr.peekTaskLevel(), pEvent->taskLevel + 1); + } clReleaseEvent(event); clReleaseEvent(event2); diff --git a/unit_tests/command_queue/enqueue_marker_tests.cpp b/unit_tests/command_queue/enqueue_marker_tests.cpp index 23f8a7c36e..c90e9a9258 100644 --- a/unit_tests/command_queue/enqueue_marker_tests.cpp +++ b/unit_tests/command_queue/enqueue_marker_tests.cpp @@ -182,6 +182,8 @@ TEST_F(MarkerTest, givenMultipleEventWhenTheyArePassedToMarkerThenOutputEventHas &event3}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; + auto initialTaskCount = pCmdQ->taskCount; + pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, @@ -189,8 +191,13 @@ TEST_F(MarkerTest, givenMultipleEventWhenTheyArePassedToMarkerThenOutputEventHas std::unique_ptr pEvent((Event *)(event)); - EXPECT_EQ(16u, pCmdQ->taskCount); - EXPECT_EQ(16u, pEvent->peekTaskCount()); + if (pCmdQ->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + EXPECT_EQ(initialTaskCount + 1, pCmdQ->taskCount); + EXPECT_EQ(initialTaskCount + 1, pEvent->peekTaskCount()); + } else { + EXPECT_EQ(16u, pCmdQ->taskCount); + EXPECT_EQ(16u, pEvent->peekTaskCount()); + } } TEST_F(MarkerTest, givenMultipleEventsAndCompletedUserEventWhenTheyArePassedToMarkerThenOutputEventHasHighestTaskCount) { @@ -209,6 +216,8 @@ TEST_F(MarkerTest, givenMultipleEventsAndCompletedUserEventWhenTheyArePassedToMa &userEvent}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; + auto initialTaskCount = pCmdQ->taskCount; + pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, @@ -216,8 +225,13 @@ TEST_F(MarkerTest, givenMultipleEventsAndCompletedUserEventWhenTheyArePassedToMa std::unique_ptr pEvent((Event *)(event)); - EXPECT_EQ(16u, pCmdQ->taskCount); - EXPECT_EQ(16u, pEvent->peekTaskCount()); + if (pCmdQ->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + EXPECT_EQ(initialTaskCount + 1, pCmdQ->taskCount); + EXPECT_EQ(initialTaskCount + 1, pEvent->peekTaskCount()); + } else { + EXPECT_EQ(16u, pCmdQ->taskCount); + EXPECT_EQ(16u, pEvent->peekTaskCount()); + } } HWTEST_F(MarkerTest, givenMarkerCallFollowingNdrangeCallInBatchedModeWhenWaitForEventsIsCalledThenFlushStampIsProperlyUpdated) { diff --git a/unit_tests/command_queue/enqueue_read_buffer_event_tests.cpp b/unit_tests/command_queue/enqueue_read_buffer_event_tests.cpp index f2cdb313c9..a0f77beb97 100644 --- a/unit_tests/command_queue/enqueue_read_buffer_event_tests.cpp +++ b/unit_tests/command_queue/enqueue_read_buffer_event_tests.cpp @@ -320,6 +320,7 @@ TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstP cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); + retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, @@ -333,9 +334,14 @@ TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstP EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); - auto pEvent = (Event *)event; - EXPECT_EQ(19u, pEvent->taskLevel); - EXPECT_EQ(19u, pCmdOOQ->taskLevel); + auto pEvent = castToObject(event); + if (pCmdOOQ->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + EXPECT_EQ(taskLevelEvent2 + 1, pCmdOOQ->taskLevel); + EXPECT_EQ(taskLevelEvent2 + 1, pEvent->taskLevel); + } else { + EXPECT_EQ(19u, pCmdOOQ->taskLevel); + EXPECT_EQ(19u, pEvent->taskLevel); + } pEvent->release(); } diff --git a/unit_tests/command_queue/enqueue_write_buffer_event_tests.cpp b/unit_tests/command_queue/enqueue_write_buffer_event_tests.cpp index 61de76d965..e6b9f5c167 100644 --- a/unit_tests/command_queue/enqueue_write_buffer_event_tests.cpp +++ b/unit_tests/command_queue/enqueue_write_buffer_event_tests.cpp @@ -319,6 +319,7 @@ TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndDisabledSupportCpuCopi cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); + retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, @@ -332,9 +333,14 @@ TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndDisabledSupportCpuCopi EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); - auto pEvent = (Event *)event; - EXPECT_EQ(19u, pEvent->taskLevel); - EXPECT_EQ(19u, pCmdOOQ->taskLevel); + auto pEvent = castToObject(event); + if (pCmdOOQ->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + EXPECT_EQ(taskLevelEvent2 + 1, pCmdOOQ->taskLevel); + EXPECT_EQ(taskLevelEvent2 + 1, pEvent->taskLevel); + } else { + EXPECT_EQ(19u, pCmdOOQ->taskLevel); + EXPECT_EQ(19u, pEvent->taskLevel); + } pEvent->release(); } diff --git a/unit_tests/helpers/timestamp_packet_tests.cpp b/unit_tests/helpers/timestamp_packet_tests.cpp index 4b79213c97..6bf16d6550 100644 --- a/unit_tests/helpers/timestamp_packet_tests.cpp +++ b/unit_tests/helpers/timestamp_packet_tests.cpp @@ -1373,6 +1373,84 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingWithoutK cmdQ->isQueueBlocked(); } +HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingMarkerWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); + + device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; + device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; + MockContext context2(device2.get()); + + auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); + auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); + + MockTimestampPacketContainer node1(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1); + MockTimestampPacketContainer node2(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1); + + Event event0(cmdQ.get(), 0, 0, 0); + event0.addTimestampPacketNodes(node1); + Event event1(cmdQ2.get(), 0, 0, 0); + event1.addTimestampPacketNodes(node2); + + uint32_t numEventsOnWaitlist = 2; + + cl_event waitlist[] = {&event0, &event1}; + + cmdQ->enqueueMarkerWithWaitList(numEventsOnWaitlist, waitlist, nullptr); + + HardwareParse hwParserCsr; + HardwareParse hwParserCmdQ; + hwParserCsr.parseCommands(device->getUltCommandStreamReceiver().commandStream, 0); + hwParserCmdQ.parseCommands(*cmdQ->commandStream, 0); + + auto csrSemaphores = findAll(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); + EXPECT_EQ(1u, csrSemaphores.size()); + verifySemaphore(genCmdCast(*(csrSemaphores[0])), node2.getNode(0)); + + auto queueSemaphores = findAll(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); + EXPECT_EQ(1u, queueSemaphores.size()); + verifySemaphore(genCmdCast(*(queueSemaphores[0])), node1.getNode(0)); +} + +HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); + + device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; + device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; + MockContext context2(device2.get()); + + auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); + auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); + + MockTimestampPacketContainer node1(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1); + MockTimestampPacketContainer node2(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1); + + Event event0(cmdQ.get(), 0, 0, 0); + event0.addTimestampPacketNodes(node1); + Event event1(cmdQ2.get(), 0, 0, 0); + event1.addTimestampPacketNodes(node2); + + uint32_t numEventsOnWaitlist = 2; + + cl_event waitlist[] = {&event0, &event1}; + + cmdQ->enqueueBarrierWithWaitList(numEventsOnWaitlist, waitlist, nullptr); + + HardwareParse hwParserCsr; + HardwareParse hwParserCmdQ; + hwParserCsr.parseCommands(device->getUltCommandStreamReceiver().commandStream, 0); + hwParserCmdQ.parseCommands(*cmdQ->commandStream, 0); + + auto csrSemaphores = findAll(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); + EXPECT_EQ(1u, csrSemaphores.size()); + verifySemaphore(genCmdCast(*(csrSemaphores[0])), node2.getNode(0)); + + auto queueSemaphores = findAll(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); + EXPECT_EQ(1u, queueSemaphores.size()); + verifySemaphore(genCmdCast(*(queueSemaphores[0])), node1.getNode(0)); +} + HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndNoOutputEventWhenEnqueueingMarkerThenDoNothing) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true;