From f4008336f8dea2fb840080beb3ced4e173c6a979 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Wed, 4 Sep 2019 09:33:21 +0200 Subject: [PATCH] Dispatch blit operation in blocked path Change-Id: I2230bde051449bf22c74c112bbe5719aad644533 Signed-off-by: Dunajski, Bartosz Related-To: NEO-3020 --- runtime/helpers/task_information.cpp | 25 +++++++++++--- runtime/helpers/task_information.h | 3 +- unit_tests/mem_obj/buffer_tests.cpp | 49 +++++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 11 deletions(-) diff --git a/runtime/helpers/task_information.cpp b/runtime/helpers/task_information.cpp index 2b20580cf1..ff6a488638 100644 --- a/runtime/helpers/task_information.cpp +++ b/runtime/helpers/task_information.cpp @@ -139,7 +139,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate if (printfHandler) { printfHandler.get()->makeResident(commandStreamReceiver); } - makeTimestampPacketsResident(); + makeTimestampPacketsResident(commandStreamReceiver); if (executionModelKernel) { uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1; @@ -224,6 +224,19 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate return completionStamp; } +void CommandWithoutKernel::dispatchBlitOperation() { + auto bcsCsr = commandQueue.getBcsCommandStreamReceiver(); + + makeTimestampPacketsResident(*bcsCsr); + + auto &blitProperties = kernelOperation->blitProperties; + blitProperties.csrDependencies.fillFromEventsRequest(eventsRequest, *bcsCsr, CsrDependencies::DependenciesType::All); + blitProperties.csrDependencies.push_back(previousTimestampPacketNodes.get()); + blitProperties.outputTimestampPacket = currentTimestampPacketNodes.get(); + + bcsCsr->blitBuffer(blitProperties); +} + CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) { if (terminated) { return completionStamp; @@ -241,6 +254,10 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate auto lockCSR = commandStreamReceiver.obtainUniqueOwnership(); + if (kernelOperation->blitEnqueue) { + dispatchBlitOperation(); + } + DispatchFlags dispatchFlags; dispatchFlags.blocking = true; dispatchFlags.lowPriority = commandQueue.getPriority() == QueuePriority::LOW; @@ -254,7 +271,7 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate dispatchFlags.csrDependencies.fillFromEventsRequest(eventsRequest, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr); - makeTimestampPacketsResident(); + makeTimestampPacketsResident(commandStreamReceiver); gtpinNotifyPreFlushTask(&commandQueue); @@ -298,9 +315,7 @@ Command::~Command() { } } -void Command::makeTimestampPacketsResident() { - auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); - +void Command::makeTimestampPacketsResident(CommandStreamReceiver &commandStreamReceiver) { if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { for (cl_event &eventFromWaitList : eventsWaitlist) { auto event = castToObjectOrAbort(eventFromWaitList); diff --git a/runtime/helpers/task_information.h b/runtime/helpers/task_information.h index 98d30f46b0..17d1377441 100644 --- a/runtime/helpers/task_information.h +++ b/runtime/helpers/task_information.h @@ -96,7 +96,7 @@ class Command : public IFNode { } void setTimestampPacketNode(TimestampPacketContainer ¤t, TimestampPacketContainer &previous); void setEventsRequest(EventsRequest &eventsRequest); - void makeTimestampPacketsResident(); + void makeTimestampPacketsResident(CommandStreamReceiver &commandStreamReceiver); TagNode *timestamp = nullptr; CompletionStamp completionStamp = {}; @@ -152,5 +152,6 @@ class CommandWithoutKernel : public Command { public: using Command::Command; CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; + void dispatchBlitOperation(); }; } // namespace NEO diff --git a/unit_tests/mem_obj/buffer_tests.cpp b/unit_tests/mem_obj/buffer_tests.cpp index fc4ed0ba44..b1a3bb0bf1 100644 --- a/unit_tests/mem_obj/buffer_tests.cpp +++ b/unit_tests/mem_obj/buffer_tests.cpp @@ -702,7 +702,7 @@ struct BcsBufferTests : public ::testing::Test { } bcsMockContext = std::make_unique(device.get()); - commandQueue.reset(CommandQueue::create(bcsMockContext.get(), device.get(), nullptr, retVal)); + commandQueue.reset(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); } template @@ -767,7 +767,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueReadWriteBufferIs EXPECT_EQ(2u, bcsCsr->blitBufferCalled); } -HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDontTakeBcsPath) { +HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDispatchBlitWhenUnblocked) { auto bcsCsr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); @@ -781,10 +781,49 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDontTa userEvent.setStatus(CL_COMPLETE); - commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(1u, bcsCsr->blitBufferCalled); - commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(2u, bcsCsr->blitBufferCalled); + + commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(3u, bcsCsr->blitBufferCalled); + commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(4u, bcsCsr->blitBufferCalled); +} + +HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedBlitEnqueueWhenUnblockingThenMakeResidentAllTimestampPackets) { + auto bcsCsr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); + bcsCsr->storeMakeResidentAllocations = true; + + auto mockCmdQ = static_cast *>(commandQueue.get()); + + auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + bufferForBlt->forceDisallowCPUCopy = true; + + TimestampPacketContainer previousTimestampPackets; + mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPackets, false); + auto dependencyFromPreviousEnqueue = mockCmdQ->timestampPacketContainer->peekNodes()[0]; + + auto event = make_releaseable(mockCmdQ, CL_COMMAND_READ_BUFFER, 0, 0); + MockTimestampPacketContainer eventDependencyContainer(*bcsCsr->getTimestampPacketAllocator(), 1); + auto eventDependency = eventDependencyContainer.getNode(0); + event->addTimestampPacketNodes(eventDependencyContainer); + + auto userEvent = make_releaseable(bcsMockContext.get()); + cl_event waitlist[] = {userEvent.get(), event.get()}; + + commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 2, waitlist, nullptr); + + auto outputDependency = mockCmdQ->timestampPacketContainer->peekNodes()[0]; + EXPECT_NE(outputDependency, dependencyFromPreviousEnqueue); + + EXPECT_FALSE(bcsCsr->isMadeResident(dependencyFromPreviousEnqueue->getBaseGraphicsAllocation())); + EXPECT_FALSE(bcsCsr->isMadeResident(outputDependency->getBaseGraphicsAllocation())); + EXPECT_FALSE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation())); + + userEvent->setStatus(CL_COMPLETE); + + EXPECT_TRUE(bcsCsr->isMadeResident(dependencyFromPreviousEnqueue->getBaseGraphicsAllocation(), bcsCsr->taskCount)); + EXPECT_TRUE(bcsCsr->isMadeResident(outputDependency->getBaseGraphicsAllocation(), bcsCsr->taskCount)); + EXPECT_TRUE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation(), bcsCsr->taskCount)); } HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommandStreamThenAddSemaphoreWait) {