Flush Marker command with TimestampPacket dependencies

Change-Id: I6475624996ccc254adb6641bef3cda431e57325a
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2019-07-10 11:39:19 +02:00
committed by sys_ocldev
parent 3a61e34030
commit 2c0c0ace88
8 changed files with 156 additions and 25 deletions

View File

@@ -235,6 +235,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
}
bool flushDependenciesForNonKernelCommand = false;
if (blitEnqueue) {
processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest, commandStream, commandType, blocking);
} else if (multiDispatchInfo.empty() == false) {
@@ -247,14 +249,19 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (CL_COMMAND_BARRIER == commandType) {
getCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
}
if (eventBuilder.getEvent()) {
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
if (waitlistEvent->getTimestampPacketNodes()) {
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
if (waitlistEvent->getTimestampPacketNodes()) {
flushDependenciesForNonKernelCommand = true;
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->addTimestampPacketNodes(*waitlistEvent->getTimestampPacketNodes());
}
}
}
if (flushDependenciesForNonKernelCommand) {
TimestampPacketHelper::programCsrDependencies<GfxFamily>(commandStream, csrDeps);
}
}
CompletionStamp completionStamp = {Event::eventNotReady, taskLevel, 0};
@@ -298,7 +305,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
devQueueHw->getDebugQueue());
}
}
} else if (isCacheFlushCommand(commandType) || blitEnqueue) {
} else if (isCacheFlushCommand(commandType) || blitEnqueue || flushDependenciesForNonKernelCommand) {
completionStamp = enqueueCommandWithoutKernel(
surfacesForResidency,
numSurfaceForResidency,

View File

@@ -186,11 +186,16 @@ HWTEST_F(BarrierTest, eventWithWaitDependenciesShouldSync) {
&event);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
auto pEvent = castToObject<Event>(event);
auto &csr = pCmdQ->getCommandStreamReceiver();
// in this case only cmdQ raises the taskLevel why csr stay intact
EXPECT_EQ(8u, pCmdQ->taskLevel);
EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel());
if (csr.peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(8u, commandStreamReceiver.peekTaskLevel());
} else {
EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel());
}
EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel);
EXPECT_EQ(8u, pEvent->taskLevel);
@@ -215,10 +220,17 @@ HWTEST_F(BarrierTest, givenNotBlockedCommandQueueAndEnqueueBarrierWithWaitlistRe
eventWaitList,
&event);
auto &csr = pCmdQ->getCommandStreamReceiver();
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(latestTaskCountWaitedBeforeEnqueue, this->pCmdQ->latestTaskCountWaited);
auto pEvent = (Event *)event;
EXPECT_EQ(17u, pEvent->peekTaskCount());
auto pEvent = castToObject<Event>(event);
if (csr.peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(csr.peekTaskCount(), pEvent->peekTaskCount());
} else {
EXPECT_EQ(17u, pEvent->peekTaskCount());
}
EXPECT_TRUE(pEvent->updateStatusAndCheckCompletion());
delete pEvent;
}

View File

@@ -412,9 +412,9 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDu
}
HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestingEnqueueWithoutGpuSubmissionThenTaskCountIsNotInherited) {
struct ExternallySynchEvent : Event {
ExternallySynchEvent(CommandQueue *cmdQueue) : Event(cmdQueue, CL_COMMAND_MARKER, 0, 0) {
transitionExecutionStatus(CL_COMPLETE);
struct ExternallySynchEvent : VirtualEvent {
ExternallySynchEvent(CommandQueue *cmdQueue) {
setStatus(CL_COMPLETE);
this->updateTaskCount(7);
}
bool isExternallySynchronized() const override {

View File

@@ -207,9 +207,13 @@ TEST_F(EventTests, eventPassedToEnqueueMarkerHasTheSameLevelAsPreviousCommand) {
retVal = clEnqueueMarkerWithWaitList(pCmdQ, 1, &event, &event2);
auto pEvent2 = (Event *)event2;
auto pEvent2 = castToObject<Event>(event2);
EXPECT_EQ(pEvent2->taskLevel, pEvent->taskLevel);
if (csr.peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(pEvent2->taskLevel, pEvent->taskLevel + 1);
} else {
EXPECT_EQ(pEvent2->taskLevel, pEvent->taskLevel);
}
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event2);
@@ -217,7 +221,11 @@ TEST_F(EventTests, eventPassedToEnqueueMarkerHasTheSameLevelAsPreviousCommand) {
retVal = clWaitForEvents(1, &event2);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(csr.peekTaskLevel(), pEvent2->taskLevel + 1);
if (csr.peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(csr.peekTaskLevel(), pEvent2->taskLevel);
} else {
EXPECT_EQ(csr.peekTaskLevel(), pEvent->taskLevel + 1);
}
clReleaseEvent(event);
clReleaseEvent(event2);

View File

@@ -182,6 +182,8 @@ TEST_F(MarkerTest, givenMultipleEventWhenTheyArePassedToMarkerThenOutputEventHas
&event3};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto initialTaskCount = pCmdQ->taskCount;
pCmdQ->enqueueMarkerWithWaitList(
numEventsInWaitList,
eventWaitList,
@@ -189,8 +191,13 @@ TEST_F(MarkerTest, givenMultipleEventWhenTheyArePassedToMarkerThenOutputEventHas
std::unique_ptr<Event> pEvent((Event *)(event));
EXPECT_EQ(16u, pCmdQ->taskCount);
EXPECT_EQ(16u, pEvent->peekTaskCount());
if (pCmdQ->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(initialTaskCount + 1, pCmdQ->taskCount);
EXPECT_EQ(initialTaskCount + 1, pEvent->peekTaskCount());
} else {
EXPECT_EQ(16u, pCmdQ->taskCount);
EXPECT_EQ(16u, pEvent->peekTaskCount());
}
}
TEST_F(MarkerTest, givenMultipleEventsAndCompletedUserEventWhenTheyArePassedToMarkerThenOutputEventHasHighestTaskCount) {
@@ -209,6 +216,8 @@ TEST_F(MarkerTest, givenMultipleEventsAndCompletedUserEventWhenTheyArePassedToMa
&userEvent};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto initialTaskCount = pCmdQ->taskCount;
pCmdQ->enqueueMarkerWithWaitList(
numEventsInWaitList,
eventWaitList,
@@ -216,8 +225,13 @@ TEST_F(MarkerTest, givenMultipleEventsAndCompletedUserEventWhenTheyArePassedToMa
std::unique_ptr<Event> pEvent((Event *)(event));
EXPECT_EQ(16u, pCmdQ->taskCount);
EXPECT_EQ(16u, pEvent->peekTaskCount());
if (pCmdQ->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(initialTaskCount + 1, pCmdQ->taskCount);
EXPECT_EQ(initialTaskCount + 1, pEvent->peekTaskCount());
} else {
EXPECT_EQ(16u, pCmdQ->taskCount);
EXPECT_EQ(16u, pEvent->peekTaskCount());
}
}
HWTEST_F(MarkerTest, givenMarkerCallFollowingNdrangeCallInBatchedModeWhenWaitForEventsIsCalledThenFlushStampIsProperlyUpdated) {

View File

@@ -320,6 +320,7 @@ TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstP
cl_event event = nullptr;
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(),
blockingRead,
0,
@@ -333,9 +334,14 @@ TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstP
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(19u, pCmdOOQ->taskLevel);
auto pEvent = castToObject<Event>(event);
if (pCmdOOQ->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(taskLevelEvent2 + 1, pCmdOOQ->taskLevel);
EXPECT_EQ(taskLevelEvent2 + 1, pEvent->taskLevel);
} else {
EXPECT_EQ(19u, pCmdOOQ->taskLevel);
EXPECT_EQ(19u, pEvent->taskLevel);
}
pEvent->release();
}

View File

@@ -319,6 +319,7 @@ TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndDisabledSupportCpuCopi
cl_event event = nullptr;
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer.get(),
blockingRead,
0,
@@ -332,9 +333,14 @@ TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndDisabledSupportCpuCopi
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(19u, pCmdOOQ->taskLevel);
auto pEvent = castToObject<Event>(event);
if (pCmdOOQ->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(taskLevelEvent2 + 1, pCmdOOQ->taskLevel);
EXPECT_EQ(taskLevelEvent2 + 1, pEvent->taskLevel);
} else {
EXPECT_EQ(19u, pCmdOOQ->taskLevel);
EXPECT_EQ(19u, pEvent->taskLevel);
}
pEvent->release();
}

View File

@@ -1373,6 +1373,84 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingWithoutK
cmdQ->isQueueBlocked();
}
HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingMarkerWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(executionEnvironment, 1u));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
device2->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
MockContext context2(device2.get());
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(context, device.get(), nullptr));
auto cmdQ2 = std::make_unique<MockCommandQueueHw<FamilyType>>(&context2, device2.get(), nullptr);
MockTimestampPacketContainer node1(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer node2(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
Event event0(cmdQ.get(), 0, 0, 0);
event0.addTimestampPacketNodes(node1);
Event event1(cmdQ2.get(), 0, 0, 0);
event1.addTimestampPacketNodes(node2);
uint32_t numEventsOnWaitlist = 2;
cl_event waitlist[] = {&event0, &event1};
cmdQ->enqueueMarkerWithWaitList(numEventsOnWaitlist, waitlist, nullptr);
HardwareParse hwParserCsr;
HardwareParse hwParserCmdQ;
hwParserCsr.parseCommands<FamilyType>(device->getUltCommandStreamReceiver<FamilyType>().commandStream, 0);
hwParserCmdQ.parseCommands<FamilyType>(*cmdQ->commandStream, 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size());
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0])), node2.getNode(0));
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
EXPECT_EQ(1u, queueSemaphores.size());
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), node1.getNode(0));
}
HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(executionEnvironment, 1u));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
device2->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
MockContext context2(device2.get());
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(context, device.get(), nullptr));
auto cmdQ2 = std::make_unique<MockCommandQueueHw<FamilyType>>(&context2, device2.get(), nullptr);
MockTimestampPacketContainer node1(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer node2(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
Event event0(cmdQ.get(), 0, 0, 0);
event0.addTimestampPacketNodes(node1);
Event event1(cmdQ2.get(), 0, 0, 0);
event1.addTimestampPacketNodes(node2);
uint32_t numEventsOnWaitlist = 2;
cl_event waitlist[] = {&event0, &event1};
cmdQ->enqueueBarrierWithWaitList(numEventsOnWaitlist, waitlist, nullptr);
HardwareParse hwParserCsr;
HardwareParse hwParserCmdQ;
hwParserCsr.parseCommands<FamilyType>(device->getUltCommandStreamReceiver<FamilyType>().commandStream, 0);
hwParserCmdQ.parseCommands<FamilyType>(*cmdQ->commandStream, 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size());
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0])), node2.getNode(0));
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
EXPECT_EQ(1u, queueSemaphores.size());
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), node1.getNode(0));
}
HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndNoOutputEventWhenEnqueueingMarkerThenDoNothing) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;