From 5f559ec2d5c790ac621a97d9a2e3ac06b01eba3b Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Fri, 2 Jun 2023 12:17:18 +0000 Subject: [PATCH] feature: signal sync alloc in in-order CmdList appendSignalEvent path Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.h | 1 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 12 ++++ .../source/cmdlist/cmdlist_hw_immediate.h | 2 +- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 14 +--- .../core/test/unit_tests/mocks/mock_cmdlist.h | 1 + .../sources/cmdlist/test_cmdlist_6.cpp | 7 ++ .../test_cmdlist_append_launch_kernel_3.cpp | 69 +++++++++++++++++++ 7 files changed, 93 insertions(+), 13 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index f77b141e60..5942755442 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -322,6 +322,7 @@ struct CommandListCoreFamily : CommandListImp { void postInitComputeSetup(); NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel); void obtainNewTimestampPacketNode(); + virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; } }; template diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index ec3e9b1aff..c5364c8072 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2088,6 +2088,10 @@ inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint template ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_handle_t hEvent) { + if (this->inOrderExecutionEnabled) { + addEventsToCmdList(0, nullptr, isRelaxedOrderingDispatchAllowed(0), false); + } + auto event = Event::fromHandle(hEvent); event->resetKernelCountAndPacketUsedCount(); @@ -2107,6 +2111,14 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet()); dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync); + if (this->inOrderExecutionEnabled) { + obtainNewTimestampPacketNode(); + + CommandListCoreFamily::appendWaitOnEvents(1, &hEvent, false, false, false); + + appendSignalInOrderDependencyTimestampPacket(); + } + if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 3a86e88b6c..833479cd8d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -174,7 +174,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::appendLaunchKernelWithParams(K } if (inOrderExecSignalRequired && event) { - using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; - auto gpuAddr = event->getCompletionFieldGpuAddress(this->device); - - uint32_t packetsToWait = this->signalAllEventPackets ? event->getMaxPacketsCount() : event->getPacketsInUse(); - - for (uint32_t i = 0u; i < packetsToWait; i++) { - NEO::EncodeSemaphore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), - gpuAddr, - Event::State::STATE_CLEARED, - MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); - gpuAddr += event->getSinglePacketSize(); - } + auto eventHandle = event->toHandle(); + CommandListCoreFamily::appendWaitOnEvents(1, &eventHandle, false, false, false); appendSignalInOrderDependencyTimestampPacket(); } diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index ea712e7b28..e1a40f1d9d 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -75,6 +75,7 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::indirectAllocationsAllowed; using BaseClass::initialize; using BaseClass::isFlushTaskSubmissionEnabled; + using BaseClass::isRelaxedOrderingDispatchAllowed; using BaseClass::isSyncModeQueue; using BaseClass::isTbxMode; using BaseClass::isTimestampEventForMultiTile; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 1747d7d860..940d8c296a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -465,6 +465,13 @@ HWTEST2_F(CommandListTest, givenImmediateCommandListWhenFlushImmediateThenOverri EXPECT_EQ(event->csrs[0], cmdList.csr); } +HWTEST2_F(CommandListTest, givenRegularCmdListWhenAskingForRelaxedOrderingThenReturnFalse, IsAtLeastSkl) { + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(5)); +} + HWTEST2_F(CommandListTest, givenComputeCommandListAnd2dRegionWhenMemoryCopyRegionInExternalHostAllocationCalledThenBuiltinFlagAndDestinationAllocSystemIsSet, IsAtLeastSkl) { auto commandList = std::make_unique>>(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index ff5e8700c6..84f8e254cd 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -1143,6 +1143,75 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen EXPECT_EQ(0u, sdiCmd->getDataDword0()); } +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventThenSignalSyncAllocation, IsAtLeastXeHpCore) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto immCmdList = createImmCmdList(); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + auto eventPool = createEvents(1, true); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + auto offset = cmdStream->getUsed(); + + immCmdList->appendSignalEvent(events[0]->toHandle()); + + EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); + EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + (cmdStream->getUsed() - offset))); + + { + auto semaphoreCmd = genCmdCast(*cmdList.begin()); + + ASSERT_NE(nullptr, semaphoreCmd); + + auto previousNode = immCmdList->deferredTimestampPackets->peekNodes()[0]; + uint64_t nodeGpuVa = previousNode->getGpuAddress() + previousNode->getContextEndOffset(); + + EXPECT_EQ(TimestampPacketConstants::initValue, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(nodeGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + } + + { + + auto rbeginItor = cmdList.rbegin(); + + auto sdiCmd = genCmdCast(*rbeginItor); + while (sdiCmd == nullptr) { + sdiCmd = genCmdCast(*(++rbeginItor)); + if (rbeginItor == cmdList.rend()) { + break; + } + } + + ASSERT_NE(nullptr, sdiCmd); + + auto node = getLatestTsNode(immCmdList.get()); + uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); + + EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getDataDword0()); + + auto semaphoreCmd = genCmdCast(*(++rbeginItor)); + ASSERT_NE(nullptr, semaphoreCmd); + + auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device); + + EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(eventEndGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + } +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenDontSignalFromWalker, IsAtLeastXeHpCore) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;