diff --git a/runtime/command_queue/command_queue_hw.h b/runtime/command_queue/command_queue_hw.h index 3bf63337a5..29bb19e34b 100644 --- a/runtime/command_queue/command_queue_hw.h +++ b/runtime/command_queue/command_queue_hw.h @@ -334,7 +334,7 @@ class CommandQueueHw : public CommandQueue { size_t commandStreamStart, bool &blocking, const MultiDispatchInfo &multiDispatchInfo, - TimestampPacketContainer *previousTimestampPacketNodes, + TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel, @@ -357,8 +357,7 @@ class CommandQueueHw : public CommandQueue { size_t commandStreamStart, bool &blocking, const EnqueueProperties &enqueueProperties, - TimestampPacketContainer *previousTimestampPacketNodes, - TimestampPacketContainer &barrierTimestampPacketNodes, + TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel); @@ -367,8 +366,7 @@ class CommandQueueHw : public CommandQueue { LinearStream *commandStream, CsrDependencies &csrDeps); BlitProperties processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo, - TimestampPacketContainer &previousTimestampPacketNodes, - TimestampPacketContainer &barrierTimestampPacketNode, + TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, LinearStream &commandStream, uint32_t commandType, bool queueBlocked); @@ -450,6 +448,6 @@ class CommandQueueHw : public CommandQueue { DeviceQueueHw *devQueueHw, CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, - TimestampPacketContainer &previousTimestampPacketNodes); + TimestampPacketDependencies ×tampPacketDependencies); }; } // namespace NEO diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 82575a0c12..beffce2a89 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -54,12 +54,12 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount const cl_event *eventWaitList, cl_event *event) { BuiltInOwnershipWrapper builtInLock; + MemObjsForAuxTranslation memObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo(kernel); if (DebugManager.flags.ForceDispatchScheduler.get()) { forceDispatchScheduler(multiDispatchInfo); } else { - MemObjsForAuxTranslation memObjsForAuxTranslation; if (kernel->isAuxTranslationRequired()) { auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice()); builtInLock.takeOwnership(builder, this->context); @@ -189,8 +189,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, blocking = true; } - TimestampPacketContainer previousTimestampPacketNodes; - TimestampPacketContainer barrierTimestampPacketNode; + TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); CsrDependencies csrDeps; BlitPropertiesContainer blitPropertiesContainer; @@ -207,12 +206,12 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (blitEnqueue && !blockQueue && getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired()) { auto allocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(); - barrierTimestampPacketNode.add(allocator->getTag()); + timestampPacketDependencies.barrierNodes.add(allocator->getTag()); } if (nodesCount > 0) { - obtainNewTimestampPacketNodes(nodesCount, previousTimestampPacketNodes, clearAllDependencies); - csrDeps.push_back(&previousTimestampPacketNodes); + obtainNewTimestampPacketNodes(nodesCount, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies); + csrDeps.push_back(×tampPacketDependencies.previousEnqueueNodes); } } @@ -227,12 +226,12 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, bool flushDependenciesForNonKernelCommand = false; if (blitEnqueue) { - blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, barrierTimestampPacketNode, + blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies, eventsRequest, commandStream, commandType, blockQueue)); } else if (multiDispatchInfo.empty() == false) { processDispatchForKernels(multiDispatchInfo, printfHandler, eventBuilder.getEvent(), hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(), - previousTimestampPacketNodes); + timestampPacketDependencies); } else if (isCacheFlushCommand(commandType)) { processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps); } else if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { @@ -274,7 +273,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, commandStreamStart, blocking, multiDispatchInfo, - &previousTimestampPacketNodes, + timestampPacketDependencies, eventsRequest, eventBuilder, taskLevel, @@ -296,8 +295,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, commandStreamStart, blocking, enqueueProperties, - &previousTimestampPacketNodes, - barrierTimestampPacketNode, + timestampPacketDependencies, eventsRequest, eventBuilder, taskLevel); @@ -345,7 +343,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, surfacesForResidency, numSurfaceForResidency, multiDispatchInfo, - previousTimestampPacketNodes, + timestampPacketDependencies.previousEnqueueNodes, blockedCommandsData, enqueueProperties, eventsRequest, @@ -380,7 +378,7 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf DeviceQueueHw *devQueueHw, CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, - TimestampPacketContainer &previousTimestampPacketNodes) { + TimestampPacketDependencies ×tampPacketDependencies) { TagNode *hwPerfCounter = nullptr; DebugManager.dumpKernelArgs(&multiDispatchInfo); @@ -422,7 +420,7 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf blockedCommandsData, hwTimeStamps, hwPerfCounter, - &previousTimestampPacketNodes, + ×tampPacketDependencies, timestampPacketContainer.get(), commandType); @@ -439,8 +437,7 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf template BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo, - TimestampPacketContainer &previousTimestampPacketNodes, - TimestampPacketContainer &barrierTimestampPacketNode, + TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, LinearStream &commandStream, uint32_t commandType, bool queueBlocked) { auto blitDirection = BlitProperties::obtainBlitDirection(commandType); @@ -453,8 +450,8 @@ BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(const Mu blitProperties.csrDependencies.fillFromEventsRequest(eventsRequest, *blitCommandStreamReceiver, CsrDependencies::DependenciesType::All); - blitProperties.csrDependencies.push_back(&previousTimestampPacketNodes); - blitProperties.csrDependencies.push_back(&barrierTimestampPacketNode); + blitProperties.csrDependencies.push_back(×tampPacketDependencies.previousEnqueueNodes); + blitProperties.csrDependencies.push_back(×tampPacketDependencies.barrierNodes); } auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0); @@ -581,7 +578,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( size_t commandStreamStart, bool &blocking, const MultiDispatchInfo &multiDispatchInfo, - TimestampPacketContainer *previousTimestampPacketNodes, + TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel, @@ -597,7 +594,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( } if (timestampPacketContainer) { timestampPacketContainer->makeResident(getGpgpuCommandStreamReceiver()); - previousTimestampPacketNodes->makeResident(getGpgpuCommandStreamReceiver()); + timestampPacketDependencies.previousEnqueueNodes.makeResident(getGpgpuCommandStreamReceiver()); } bool anyUncacheableArgs = false; @@ -841,15 +838,14 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( size_t commandStreamStart, bool &blocking, const EnqueueProperties &enqueueProperties, - TimestampPacketContainer *previousTimestampPacketNodes, - TimestampPacketContainer &barrierTimestampPacketNodes, + TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel) { if (timestampPacketContainer) { timestampPacketContainer->makeResident(getGpgpuCommandStreamReceiver()); - previousTimestampPacketNodes->makeResident(getGpgpuCommandStreamReceiver()); + timestampPacketDependencies.previousEnqueueNodes.makeResident(getGpgpuCommandStreamReceiver()); } for (auto surface : CreateRange(surfaces, surfaceCount)) { @@ -863,7 +859,7 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( DispatchFlags dispatchFlags( {}, //csrDependencies - &barrierTimestampPacketNodes, //barrierTimestampPacketNodes + ×tampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes {}, //pipelineSelectArgs flushStamp->getStampReference(), //flushStampReference QueueThrottle::MEDIUM, //throttle diff --git a/runtime/command_queue/hardware_interface.h b/runtime/command_queue/hardware_interface.h index 095a6c2f3e..c018420b6e 100644 --- a/runtime/command_queue/hardware_interface.h +++ b/runtime/command_queue/hardware_interface.h @@ -42,7 +42,7 @@ class HardwareInterface { KernelOperation *blockedCommandsData, TagNode *hwTimeStamps, TagNode *hwPerfCounter, - TimestampPacketContainer *previousTimestampPacketNodes, + TimestampPacketDependencies *timestampPacketDependencies, TimestampPacketContainer *currentTimestampPacketNodes, uint32_t commandType); diff --git a/runtime/command_queue/hardware_interface_base.inl b/runtime/command_queue/hardware_interface_base.inl index c1c4b3b32b..e78fc04856 100644 --- a/runtime/command_queue/hardware_interface_base.inl +++ b/runtime/command_queue/hardware_interface_base.inl @@ -30,7 +30,7 @@ void HardwareInterface::dispatchWalker( KernelOperation *blockedCommandsData, TagNode *hwTimeStamps, TagNode *hwPerfCounter, - TimestampPacketContainer *previousTimestampPacketNodes, + TimestampPacketDependencies *timestampPacketDependencies, TimestampPacketContainer *currentTimestampPacketNodes, uint32_t commandType) { diff --git a/runtime/helpers/timestamp_packet.h b/runtime/helpers/timestamp_packet.h index 8c72e31e2d..6bdd3380cf 100644 --- a/runtime/helpers/timestamp_packet.h +++ b/runtime/helpers/timestamp_packet.h @@ -91,6 +91,11 @@ class TimestampPacketContainer : public NonCopyableClass { std::vector timestampPacketNodes; }; +struct TimestampPacketDependencies : public NonCopyableClass { + TimestampPacketContainer previousEnqueueNodes; + TimestampPacketContainer barrierNodes; +}; + struct TimestampPacketHelper { template static void programSemaphoreWithImplicitDependency(LinearStream &cmdStream, TagNode ×tampPacketNode) { diff --git a/unit_tests/command_queue/enqueue_command_without_kernel_tests.cpp b/unit_tests/command_queue/enqueue_command_without_kernel_tests.cpp index c0915a832a..c029d6833d 100644 --- a/unit_tests/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/unit_tests/command_queue/enqueue_command_without_kernel_tests.cpp @@ -44,12 +44,11 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelWhenCommandEnqueuedT EventBuilder eventBuilder; Surface *surfaces[] = {surface.get()}; auto blocking = true; - TimestampPacketContainer previousTimestampPacketNodes; - TimestampPacketContainer barrierTimestampPacketNodes; + TimestampPacketDependencies timestampPacketDependencies; EnqueueProperties enqueueProperties(false, false, false, true, nullptr); - mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, - barrierTimestampPacketNodes, eventsRequest, eventBuilder, 0); + mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, + eventsRequest, eventBuilder, 0); EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId()), 1u); } @@ -116,14 +115,13 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispa auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); auto blocking = true; - TimestampPacketContainer previousTimestampPacketNodes; - TimestampPacketContainer barrierTimestampPacketNodes; + TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; EnqueueProperties enqueueProperties(false, false, false, true, nullptr); - mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, - barrierTimestampPacketNodes, eventsRequest, eventBuilder, 0); + mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, + eventsRequest, eventBuilder, 0); EXPECT_EQ(blocking, mockCsr->passedDispatchFlags.blocking); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); @@ -146,8 +144,7 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne auto buffer = std::unique_ptr(Buffer::create(context.get(), 0, 1, nullptr, retVal)); auto blocking = true; - TimestampPacketContainer previousTimestampPacketNodes; - TimestampPacketContainer barrierTimestampPacketNodes; + TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; BuiltinOpParams builtinOpParams; @@ -156,16 +153,16 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setBuiltinOpParams(builtinOpParams); - mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, true); - BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, barrierTimestampPacketNodes, + mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true); + BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies, eventsRequest, mockCmdQ->getCS(0), 0, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); EnqueueProperties enqueueProperties(true, false, false, false, &blitPropertiesContainer); - mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, - barrierTimestampPacketNodes, eventsRequest, eventBuilder, 0); + mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, + eventsRequest, eventBuilder, 0); EXPECT_TRUE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); @@ -184,8 +181,7 @@ HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowO cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context.get(), 0, 1, nullptr, retVal)); - TimestampPacketContainer previousTimestampPacketNodes; - TimestampPacketContainer barrierTimestampPacketNodes; + TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; @@ -196,21 +192,21 @@ HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowO MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setBuiltinOpParams(builtinOpParams); - mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, true); - BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, barrierTimestampPacketNodes, + mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true); + BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies, eventsRequest, mockCmdQ->getCS(0), 0, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); EnqueueProperties enqueueProperties(true, false, false, false, &blitPropertiesContainer); mockCsr->nTo1SubmissionModelEnabled = false; - mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, &previousTimestampPacketNodes, - barrierTimestampPacketNodes, eventsRequest, eventBuilder, 0); + mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, timestampPacketDependencies, + eventsRequest, eventBuilder, 0); EXPECT_FALSE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); mockCsr->nTo1SubmissionModelEnabled = true; - mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, &previousTimestampPacketNodes, - barrierTimestampPacketNodes, eventsRequest, eventBuilder, 0); + mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, timestampPacketDependencies, + eventsRequest, eventBuilder, 0); EXPECT_TRUE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); }