From 51ae76a25f4f61b4dd02e6108a2b1bdbfe19b29d Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Fri, 1 Mar 2024 14:33:52 +0000 Subject: [PATCH] refactor: improve handling of in-order atomic signaling Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.h | 4 +--- level_zero/core/source/cmdlist/cmdlist_hw.inl | 12 ++++-------- level_zero/core/source/cmdlist/cmdlist_imp.cpp | 2 +- level_zero/core/source/cmdlist/cmdlist_imp.h | 2 -- .../core/test/unit_tests/mocks/mock_cmdlist.h | 5 ++--- .../sources/cmdlist/test_in_order_cmdlist.cpp | 16 ++++++++++------ shared/source/helpers/gfx_core_helper.h | 2 ++ shared/source/helpers/gfx_core_helper_base.inl | 5 +++++ shared/source/helpers/in_order_cmd_helpers.cpp | 3 ++- shared/source/helpers/in_order_cmd_helpers.h | 2 +- .../command_container/command_encoder_tests.cpp | 4 ++-- .../unit_test/helpers/gfx_core_helper_tests.cpp | 17 ++++++++++++++++- 12 files changed, 46 insertions(+), 28 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index e3035dbf6f..f99c854d80 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -341,10 +341,7 @@ struct CommandListCoreFamily : public CommandListImp { bool hasInOrderDependencies() const; void addCmdForPatching(std::shared_ptr *externalInOrderExecInfo, void *cmd1, void *cmd2, uint64_t counterValue, NEO::InOrderPatchCommandHelpers::PatchCmdType patchCmdType); - - bool inOrderAtomicSignallingEnabled() const override; uint64_t getInOrderIncrementValue() const; - bool isSkippingInOrderBarrierAllowed(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) const; NEO::InOrderPatchCommandsContainer inOrderPatchCmds; @@ -352,6 +349,7 @@ struct CommandListCoreFamily : public CommandListImp { uint64_t latestHostWaitedInOrderSyncValue = 0; bool latestOperationRequiredNonWalkerInOrderCmdsChaining = false; bool duplicatedInOrderCounterStorageEnabled = false; + bool inOrderAtomicSignalingEnabled = false; }; template diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 3e85e49aa3..fccb64e355 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -237,6 +237,7 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO this->requiredStreamState.initSupport(rootDeviceEnvironment); this->finalStreamState.initSupport(rootDeviceEnvironment); this->duplicatedInOrderCounterStorageEnabled = gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(rootDeviceEnvironment); + this->inOrderAtomicSignalingEnabled = gfxCoreHelper.inOrderAtomicSignallingEnabled(rootDeviceEnvironment); this->commandContainer.doubleSbaWaRef() = this->doubleSbaWa; this->commandContainer.l1CachePolicyDataRef() = &this->l1CachePolicyData; @@ -2560,7 +2561,7 @@ void CommandListCoreFamily::appendSignalInOrderDependencyCounter( auto cmdStream = commandContainer.getCommandStream(); - if (inOrderAtomicSignallingEnabled()) { + if (this->inOrderAtomicSignalingEnabled) { using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES; using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE; @@ -3690,7 +3691,7 @@ void CommandListCoreFamily::appendWaitOnSingleEvent(Event *event, template void CommandListCoreFamily::addCmdForPatching(std::shared_ptr *externalInOrderExecInfo, void *cmd1, void *cmd2, uint64_t counterValue, NEO::InOrderPatchCommandHelpers::PatchCmdType patchCmdType) { if ((NEO::debugManager.flags.EnableInOrderRegularCmdListPatching.get() != 0) && !isImmediateType()) { - this->inOrderPatchCmds.emplace_back(externalInOrderExecInfo, cmd1, cmd2, counterValue, patchCmdType, inOrderAtomicSignallingEnabled(), this->duplicatedInOrderCounterStorageEnabled); + this->inOrderPatchCmds.emplace_back(externalInOrderExecInfo, cmd1, cmd2, counterValue, patchCmdType, this->inOrderAtomicSignalingEnabled, this->duplicatedInOrderCounterStorageEnabled); } } @@ -3757,14 +3758,9 @@ bool CommandListCoreFamily::handleCounterBasedEventOperations(Eve return true; } -template -bool CommandListCoreFamily::inOrderAtomicSignallingEnabled() const { - return (NEO::debugManager.flags.InOrderAtomicSignallingEnabled.get() == 1); -} - template uint64_t CommandListCoreFamily::getInOrderIncrementValue() const { - return (inOrderAtomicSignallingEnabled() ? this->getPartitionCount() : 1); + return (this->inOrderAtomicSignalingEnabled ? this->getPartitionCount() : 1); } } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index e8ab70958e..b2f912c321 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -261,7 +261,7 @@ void CommandListImp::enableInOrderExecution() { auto deviceCounterNode = this->device->getDeviceInOrderCounterAllocator()->getTag(); - inOrderExecInfo = NEO::InOrderExecInfo::create(deviceCounterNode, *this->device->getNEODevice(), this->partitionCount, !isImmediateType(), inOrderAtomicSignallingEnabled()); + inOrderExecInfo = NEO::InOrderExecInfo::create(deviceCounterNode, *this->device->getNEODevice(), this->partitionCount, !isImmediateType()); } void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) { diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index 8d7bb18d21..83ae7e1e67 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -50,8 +50,6 @@ struct CommandListImp : public CommandList { ~CommandListImp() override = default; - virtual bool inOrderAtomicSignallingEnabled() const = 0; - static constexpr int32_t cmdListDefaultEngineInstancedDevice = NEO::StreamProperty::initValue; static constexpr bool cmdListDefaultCoherency = false; static constexpr bool cmdListDefaultDisableOverdispatch = true; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 12acfb1cdf..dc61ec378e 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -80,7 +80,7 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::immediateCmdListHeapSharing; using BaseClass::indirectAllocationsAllowed; using BaseClass::initialize; - using BaseClass::inOrderAtomicSignallingEnabled; + using BaseClass::inOrderAtomicSignalingEnabled; using BaseClass::inOrderExecInfo; using BaseClass::inOrderPatchCmds; using BaseClass::isFlushTaskSubmissionEnabled; @@ -187,7 +187,7 @@ struct WhiteBox> using BaseClass::getInOrderIncrementValue; using BaseClass::hostSynchronize; using BaseClass::immediateCmdListHeapSharing; - using BaseClass::inOrderAtomicSignallingEnabled; + using BaseClass::inOrderAtomicSignalingEnabled; using BaseClass::inOrderExecInfo; using BaseClass::inOrderPatchCmds; using BaseClass::isBcsSplitNeeded; @@ -295,7 +295,6 @@ struct MockCommandList : public CommandList { ADDMETHOD_NOBASE(close, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE_VOIDRETURN(patchInOrderCmds, (void)); - ADDMETHOD_CONST_NOBASE(inOrderAtomicSignallingEnabled, bool, false, (void)); ADDMETHOD_NOBASE(appendLaunchKernel, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_handle_t kernelHandle, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp index 49716089c4..f7c6cd75b9 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp @@ -502,13 +502,15 @@ HWTEST2_F(InOrderCmdListTests, givenRegularEventWithInOrderExecInfoWhenReusedOnR HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetAndSingleTileCmdListWhenAskingForAtomicSignallingThenReturnTrue, IsAtLeastSkl) { auto immCmdList = createImmCmdList(); - EXPECT_FALSE(immCmdList->inOrderAtomicSignallingEnabled()); + EXPECT_FALSE(immCmdList->inOrderAtomicSignalingEnabled); EXPECT_EQ(1u, immCmdList->getInOrderIncrementValue()); debugManager.flags.InOrderAtomicSignallingEnabled.set(1); - EXPECT_TRUE(immCmdList->inOrderAtomicSignallingEnabled()); - EXPECT_EQ(1u, immCmdList->getInOrderIncrementValue()); + auto immCmdList2 = createImmCmdList(); + + EXPECT_TRUE(immCmdList2->inOrderAtomicSignalingEnabled); + EXPECT_EQ(1u, immCmdList2->getInOrderIncrementValue()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastXeHpCore) { @@ -4166,13 +4168,15 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenStandaloneEventAndCopyOnlyCmdListWh HWTEST2_F(MultiTileInOrderCmdListTests, givenDebugFlagSetWhenAskingForAtomicSignallingThenReturnTrue, IsAtLeastXeHpCore) { auto immCmdList = createMultiTileImmCmdList(); - EXPECT_FALSE(immCmdList->inOrderAtomicSignallingEnabled()); + EXPECT_FALSE(immCmdList->inOrderAtomicSignalingEnabled); EXPECT_EQ(1u, immCmdList->getInOrderIncrementValue()); debugManager.flags.InOrderAtomicSignallingEnabled.set(1); - EXPECT_TRUE(immCmdList->inOrderAtomicSignallingEnabled()); - EXPECT_EQ(partitionCount, immCmdList->getInOrderIncrementValue()); + auto immCmdList2 = createMultiTileImmCmdList(); + + EXPECT_TRUE(immCmdList2->inOrderAtomicSignalingEnabled); + EXPECT_EQ(partitionCount, immCmdList2->getInOrderIncrementValue()); } HWTEST2_F(MultiTileInOrderCmdListTests, givenAtomicSignallingEnabledWhenSignallingCounterThenUseMiAtomicCmd, IsAtLeastXeHpCore) { diff --git a/shared/source/helpers/gfx_core_helper.h b/shared/source/helpers/gfx_core_helper.h index 6f0a30dcdb..5a90c50a39 100644 --- a/shared/source/helpers/gfx_core_helper.h +++ b/shared/source/helpers/gfx_core_helper.h @@ -187,6 +187,7 @@ class GfxCoreHelper { virtual bool singleTileExecImplicitScalingRequired(bool cooperativeKernel) const = 0; virtual bool duplicatedInOrderCounterStorageEnabled(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0; + virtual bool inOrderAtomicSignallingEnabled(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0; virtual ~GfxCoreHelper() = default; @@ -413,6 +414,7 @@ class GfxCoreHelperHw : public GfxCoreHelper { bool singleTileExecImplicitScalingRequired(bool cooperativeKernel) const override; bool duplicatedInOrderCounterStorageEnabled(const RootDeviceEnvironment &rootDeviceEnvironment) const override; + bool inOrderAtomicSignallingEnabled(const RootDeviceEnvironment &rootDeviceEnvironment) const override; ~GfxCoreHelperHw() override = default; diff --git a/shared/source/helpers/gfx_core_helper_base.inl b/shared/source/helpers/gfx_core_helper_base.inl index 700e9171e1..8ca636164b 100644 --- a/shared/source/helpers/gfx_core_helper_base.inl +++ b/shared/source/helpers/gfx_core_helper_base.inl @@ -761,4 +761,9 @@ bool GfxCoreHelperHw::duplicatedInOrderCounterStorageEnabled(const Ro return (debugManager.flags.InOrderDuplicatedCounterStorageEnabled.get() == 1); } +template +bool GfxCoreHelperHw::inOrderAtomicSignallingEnabled(const RootDeviceEnvironment &rootDeviceEnvironment) const { + return (debugManager.flags.InOrderAtomicSignallingEnabled.get() == 1); +} + } // namespace NEO diff --git a/shared/source/helpers/in_order_cmd_helpers.cpp b/shared/source/helpers/in_order_cmd_helpers.cpp index f8ab944830..0e605b48b5 100644 --- a/shared/source/helpers/in_order_cmd_helpers.cpp +++ b/shared/source/helpers/in_order_cmd_helpers.cpp @@ -19,10 +19,11 @@ namespace NEO { -std::shared_ptr InOrderExecInfo::create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling) { +std::shared_ptr InOrderExecInfo::create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList) { NEO::GraphicsAllocation *hostCounterAllocation = nullptr; auto &gfxCoreHelper = device.getGfxCoreHelper(); + bool atomicDeviceSignalling = gfxCoreHelper.inOrderAtomicSignallingEnabled(device.getRootDeviceEnvironment()); if (gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(device.getRootDeviceEnvironment())) { NEO::AllocationProperties hostAllocationProperties{device.getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::bufferHostMemory, device.getDeviceBitfield()}; diff --git a/shared/source/helpers/in_order_cmd_helpers.h b/shared/source/helpers/in_order_cmd_helpers.h index 145fd8d27d..57b4b55ceb 100644 --- a/shared/source/helpers/in_order_cmd_helpers.h +++ b/shared/source/helpers/in_order_cmd_helpers.h @@ -48,7 +48,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass { InOrderExecInfo() = delete; - static std::shared_ptr create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList, bool atomicDeviceSignalling); + static std::shared_ptr create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList); static std::shared_ptr createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, uint64_t *hostAddress, uint64_t counterValue); InOrderExecInfo(TagNodeBase *deviceCounterNode, NEO::GraphicsAllocation *hostCounterAllocation, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex, diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index ec28a661b1..1d39e02d15 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -88,7 +88,7 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->getGpuAddress(), deviceNode->getGpuAddress()); EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->getUnderlyingBuffer(), deviceNode->getCpuBase()); - auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false, false); + auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false); EXPECT_EQ(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress()); EXPECT_EQ(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getDeviceCounterAllocation()); @@ -118,7 +118,7 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn DebugManagerStateRestore restore; debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1); - auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false, false); + auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false); EXPECT_NE(inOrderExecInfo->getDeviceCounterAllocation(), inOrderExecInfo->getHostCounterAllocation()); EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getHostCounterAllocation()); diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index ad75239ec9..b8efe0e535 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -1673,7 +1673,7 @@ HWTEST_F(GfxCoreHelperTest, whenAskingIf48bResourceNeededForCmdBufferThenReturnT EXPECT_TRUE(getHelper().is48ResourceNeededForCmdBuffer()); } -HWTEST_F(GfxCoreHelperTest, givenDebugVariableSetWhenAskingForDumplicatedInOrderHostStorageThenReturnCorrectValue) { +HWTEST_F(GfxCoreHelperTest, givenDebugVariableSetWhenAskingForDuplicatedInOrderHostStorageThenReturnCorrectValue) { DebugManagerStateRestore restore; auto &helper = getHelper(); @@ -1688,6 +1688,21 @@ HWTEST_F(GfxCoreHelperTest, givenDebugVariableSetWhenAskingForDumplicatedInOrder EXPECT_FALSE(helper.duplicatedInOrderCounterStorageEnabled(rootExecEnv)); } +HWTEST_F(GfxCoreHelperTest, givenDebugVariableSetWhenAskingForInOrderAtomicSignalingThenReturnCorrectValue) { + DebugManagerStateRestore restore; + + auto &helper = getHelper(); + auto &rootExecEnv = *pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]; + + EXPECT_FALSE(helper.inOrderAtomicSignallingEnabled(rootExecEnv)); + + debugManager.flags.InOrderAtomicSignallingEnabled.set(1); + EXPECT_TRUE(helper.inOrderAtomicSignallingEnabled(rootExecEnv)); + + debugManager.flags.InOrderAtomicSignallingEnabled.set(0); + EXPECT_FALSE(helper.inOrderAtomicSignallingEnabled(rootExecEnv)); +} + TEST_F(GfxCoreHelperTest, whenOnlyPerThreadPrivateMemorySizeIsDefinedThenItIsReturnedAsKernelPrivateMemorySize) { KernelDescriptor kernelDescriptor{}; kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = 0x100u;