From 1f734d6f6aa593487ef56776f24d3a431a69fe49 Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Mon, 21 Jul 2025 15:22:53 +0000 Subject: [PATCH] refactor: add helper to set default split mode Signed-off-by: Bartosz Dunajski --- level_zero/core/source/device/bcs_split.cpp | 13 +-- level_zero/core/source/device/bcs_split.h | 2 +- .../gfx_core_helpers/l0_gfx_core_helper.h | 2 + .../l0_gfx_core_helper_base.inl | 5 ++ .../sources/cmdlist/test_cmdlist_blit.cpp | 13 +++ .../cmdlist/test_in_order_cmdlist_2.cpp | 88 +++++++++++++++---- 6 files changed, 98 insertions(+), 25 deletions(-) diff --git a/level_zero/core/source/device/bcs_split.cpp b/level_zero/core/source/device/bcs_split.cpp index 319443ae8b..1af3e46267 100644 --- a/level_zero/core/source/device/bcs_split.cpp +++ b/level_zero/core/source/device/bcs_split.cpp @@ -13,6 +13,7 @@ #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle.h" +#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" #include "level_zero/driver_experimental/zex_api.h" namespace L0 { @@ -44,6 +45,12 @@ bool BcsSplit::setupDevice(uint32_t productFamily, bool internalUsage, const ze_ return true; } + events.aggregatedEventsMode = device.getL0GfxCoreHelper().bcsSplitAggregatedModeEnabled(); + + if (NEO::debugManager.flags.SplitBcsAggregatedEventsMode.get() != -1) { + events.aggregatedEventsMode = !!NEO::debugManager.flags.SplitBcsAggregatedEventsMode.get(); + } + setupEnginesMask(bcsSplitSettings); return setupQueues(bcsSplitSettings, productFamily); @@ -134,12 +141,6 @@ std::vector &BcsSplit::getCmdQsForSplit(NEO::TransferDirection d return this->cmdQs; } -BcsSplit::Events::Events(BcsSplit &bcsSplit) : bcsSplit(bcsSplit) { - if (NEO::debugManager.flags.SplitBcsAggregatedEventsMode.get() != -1) { - aggregatedEventsMode = !!NEO::debugManager.flags.SplitBcsAggregatedEventsMode.get(); - } -}; - size_t BcsSplit::Events::obtainAggregatedEventsForSplit(Context *context) { for (size_t i = 0; i < this->marker.size(); i++) { if (this->marker[i]->queryStatus() == ZE_RESULT_SUCCESS) { diff --git a/level_zero/core/source/device/bcs_split.h b/level_zero/core/source/device/bcs_split.h index d9348cd6a7..fc4b448bb3 100644 --- a/level_zero/core/source/device/bcs_split.h +++ b/level_zero/core/source/device/bcs_split.h @@ -58,7 +58,7 @@ struct BcsSplit { size_t createAggregatedEvent(Context *context); uint64_t *getNextAllocationForAggregatedEvent(); - Events(BcsSplit &bcsSplit); + Events(BcsSplit &bcsSplit) : bcsSplit(bcsSplit) {} } events; std::vector cmdQs; diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h index 05070671e9..710f1998d7 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h @@ -116,6 +116,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper { virtual uint64_t getOaTimestampValidBits() const = 0; virtual CopyOffloadMode getDefaultCopyOffloadMode(bool additionalBlitPropertiesSupported) const = 0; virtual bool isDefaultCmdListWithCopyOffloadSupported(bool additionalBlitPropertiesSupported) const = 0; + virtual bool bcsSplitAggregatedModeEnabled() const = 0; protected: L0GfxCoreHelper() = default; @@ -174,6 +175,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper { uint64_t getOaTimestampValidBits() const override; CopyOffloadMode getDefaultCopyOffloadMode(bool additionalBlitPropertiesSupported) const override; bool isDefaultCmdListWithCopyOffloadSupported(bool additionalBlitPropertiesSupported) const override; + bool bcsSplitAggregatedModeEnabled() const override; protected: L0GfxCoreHelperHw() = default; diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl index 2553021454..352f94ec5a 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl @@ -110,4 +110,9 @@ bool L0GfxCoreHelperHw::isDefaultCmdListWithCopyOffloadSupported(bool ad return (NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 2); } +template +bool L0GfxCoreHelperHw::bcsSplitAggregatedModeEnabled() const { + return false; +} + } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp index e362824550..f2818c2df2 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp @@ -16,6 +16,7 @@ #include "shared/test/common/test_macros/hw_test.h" #include "level_zero/core/source/event/event.h" +#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" @@ -1086,6 +1087,18 @@ struct AggregatedBcsSplitTests : public ::testing::Test { uint32_t expectedNumRootDevices = 1; }; +HWTEST2_F(AggregatedBcsSplitTests, givenPlatformSupporingAggregatedSplitModeWhenInitializingThenEnableInBcsSplitObject, IsAtLeastXeHpcCore) { + debugManager.flags.SplitBcsAggregatedEventsMode.set(-1); + + BcsSplit bcsSplit(static_cast(*device)); + + bcsSplit.setupDevice(device->getHwInfo().platform.eProductFamily, false, nullptr, cmdList->getCsr(false)); + + EXPECT_EQ(device->getL0GfxCoreHelper().bcsSplitAggregatedModeEnabled(), bcsSplit.events.aggregatedEventsMode); + + bcsSplit.releaseResources(); +} + HWTEST2_F(AggregatedBcsSplitTests, whenObtainCalledThenAggregatedEventsCreated, IsAtLeastXeHpcCore) { EXPECT_EQ(0u, bcsSplit->events.subcopy.size()); EXPECT_TRUE(bcsSplit->events.aggregatedEventsMode); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index 135a74bc0a..f40055562b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -3971,10 +3971,20 @@ void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_ auto itor = cmdList.begin(); + bool aggregatedEventSplit = bcsSplit.events.aggregatedEventsMode; + for (uint32_t i = 0; i < numLinkCopyEngines; i++) { auto beginItor = itor; - auto signalSubCopyEventGpuVa = bcsSplit.events.subcopy[i + (submissionId * numLinkCopyEngines)]->getCompletionFieldGpuAddress(device); + auto engineOffset = aggregatedEventSplit ? submissionId : (submissionId * numLinkCopyEngines); + + uint64_t signalSubCopyEventGpuVa = 0; + + if (aggregatedEventSplit) { + signalSubCopyEventGpuVa = bcsSplit.events.subcopy[engineOffset]->getInOrderExecInfo()->getBaseDeviceAddress(); + } else { + signalSubCopyEventGpuVa = bcsSplit.events.subcopy[i + engineOffset]->getCompletionFieldGpuAddress(device); + } size_t numExpectedSemaphores = 0; @@ -4005,20 +4015,38 @@ void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_ ASSERT_NE(nullptr, genCmdCast(*itor)); if (!device->getProductHelper().useAdditionalBlitProperties()) { - auto flushDwItor = find(++itor, cmdList.end()); - ASSERT_NE(cmdList.end(), flushDwItor); + GenCmdList::iterator signalItor; - auto signalSubCopyEvent = genCmdCast(*flushDwItor); - ASSERT_NE(nullptr, signalSubCopyEvent); + if (aggregatedEventSplit) { + signalItor = find(++itor, cmdList.end()); - while (signalSubCopyEvent->getDestinationAddress() != signalSubCopyEventGpuVa) { - flushDwItor = find(++flushDwItor, cmdList.end()); - ASSERT_NE(cmdList.end(), flushDwItor); - - signalSubCopyEvent = genCmdCast(*flushDwItor); + auto signalSubCopyEvent = genCmdCast(*signalItor); ASSERT_NE(nullptr, signalSubCopyEvent); + + while (signalSubCopyEvent->getMemoryAddress() != signalSubCopyEventGpuVa) { + signalItor = find(++signalItor, cmdList.end()); + ASSERT_NE(cmdList.end(), signalItor); + + signalSubCopyEvent = genCmdCast(*signalItor); + ASSERT_NE(nullptr, signalSubCopyEvent); + } + } else { + signalItor = find(++itor, cmdList.end()); + ASSERT_NE(cmdList.end(), signalItor); + + auto signalSubCopyEvent = genCmdCast(*signalItor); + ASSERT_NE(nullptr, signalSubCopyEvent); + + while (signalSubCopyEvent->getDestinationAddress() != signalSubCopyEventGpuVa) { + signalItor = find(++signalItor, cmdList.end()); + ASSERT_NE(cmdList.end(), signalItor); + + signalSubCopyEvent = genCmdCast(*signalItor); + ASSERT_NE(nullptr, signalSubCopyEvent); + } } - itor = ++flushDwItor; + + itor = ++signalItor; } else { ASSERT_TRUE(false); } @@ -4038,18 +4066,39 @@ void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_ ASSERT_TRUE(verifyInOrderDependency(semaphoreItor, submissionId, counterGpuAddress, immCmdList.isQwordInOrderCounter(), true)); } - for (uint32_t i = 0; i < numLinkCopyEngines; i++) { + if (aggregatedEventSplit) { + semaphoreItor = find(semaphoreItor, cmdList.end()); + ASSERT_NE(cmdList.end(), semaphoreItor); + auto subCopyEventSemaphore = genCmdCast(*semaphoreItor); ASSERT_NE(nullptr, subCopyEventSemaphore); - EXPECT_EQ(bcsSplit.events.subcopy[i + (submissionId * numLinkCopyEngines)]->getCompletionFieldGpuAddress(device), subCopyEventSemaphore->getSemaphoreGraphicsAddress()); + while (bcsSplit.events.subcopy[submissionId]->getInOrderExecInfo()->getBaseDeviceAddress() != subCopyEventSemaphore->getSemaphoreGraphicsAddress()) { + semaphoreItor = find(++semaphoreItor, cmdList.end()); + ASSERT_NE(cmdList.end(), semaphoreItor); + + auto subCopyEventSemaphore = genCmdCast(*semaphoreItor); + ASSERT_NE(nullptr, subCopyEventSemaphore); + } itor = ++semaphoreItor; + + EXPECT_EQ(nullptr, genCmdCast(*itor)); // no marker event + + } else { + for (uint32_t i = 0; i < numLinkCopyEngines; i++) { + auto subCopyEventSemaphore = genCmdCast(*semaphoreItor); + ASSERT_NE(nullptr, subCopyEventSemaphore); + + EXPECT_EQ(bcsSplit.events.subcopy[i + (submissionId * numLinkCopyEngines)]->getCompletionFieldGpuAddress(device), subCopyEventSemaphore->getSemaphoreGraphicsAddress()); + + itor = ++semaphoreItor; + } + + ASSERT_NE(nullptr, genCmdCast(*itor)); // marker event } - ASSERT_NE(nullptr, genCmdCast(*itor)); // marker event - - if (immCmdList.isHeaplessModeEnabled()) { + if (immCmdList.isHeaplessModeEnabled() && !aggregatedEventSplit) { auto inOrderAtomicSignalling = genCmdCast(*(++itor)); ASSERT_NE(nullptr, inOrderAtomicSignalling); } @@ -4130,10 +4179,10 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyTh EXPECT_FALSE(event->isCounterBased()); } for (auto &event : bcsSplit.events.subcopy) { - EXPECT_FALSE(event->isCounterBased()); + EXPECT_EQ(bcsSplit.events.aggregatedEventsMode, event->isCounterBased()); } for (auto &event : bcsSplit.events.marker) { - EXPECT_FALSE(event->isCounterBased()); + EXPECT_EQ(bcsSplit.events.aggregatedEventsMode, event->isCounterBased()); } } @@ -4173,6 +4222,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCo *immCmdList->getCsr(false)->getBarrierCountTagAddress() = 0u; immCmdList->getCsr(false)->getNextBarrierCount(); + *immCmdList->inOrderExecInfo->getBaseHostAddress() = 0; immCmdList->appendMemoryCopy(©Data, ©Data, copySize, nullptr, 0, nullptr, copyParams); // implicit dependencies @@ -4195,6 +4245,8 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCo size_t offset = cmdStream->getUsed(); + *immCmdList->inOrderExecInfo->getBaseHostAddress() = 0; + immCmdList->appendMemoryCopy(©Data, ©Data, copySize, nullptr, 1, &eventHandle, copyParams); verifySplitCmds(*cmdStream, offset, device, 1, *immCmdList, events[0]->getCompletionFieldGpuAddress(device));