From f12b11786e77aed6341b4a5ca06db4b3105e3f17 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Tue, 11 Apr 2023 19:41:11 +0000 Subject: [PATCH] [feat, perf] add primary batch buffer support to front end properties update For primary batch buffer command list driver should not use return point. Return points are useful when batch buffers are dispatched as secondary, for primary buffers, patching of front end command is more desirable option. Related-To: NEO-7807 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 4 +- .../unit_tests/fixtures/cmdlist_fixture.cpp | 3 +- .../unit_tests/fixtures/cmdlist_fixture.h | 14 +- .../core/test/unit_tests/mocks/mock_cmdlist.h | 1 + .../sources/cmdlist/test_cmdlist_3.cpp | 177 +++++++++++++++++- .../sources/cmdlist/test_cmdlist_7.cpp | 14 +- 6 files changed, 200 insertions(+), 13 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 574ca0854e..d635dae400 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2568,7 +2568,7 @@ void CommandListCoreFamily::updateStreamPropertiesForRegularComma } finalStreamState.frontEndState.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperative, fusedEuDisabled); - bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get(); + bool isPatchingVfeStateAllowed = (NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get() || (this->frontEndStateTracking && this->dispatchCmdListBatchBufferAsPrimary)); if (logicalStateHelperBlock && finalStreamState.frontEndState.isDirty()) { if (isPatchingVfeStateAllowed) { auto frontEndStateAddress = NEO::PreambleHelper::getSpaceForVfeState(commandContainer.getCommandStream(), device->getHwInfo(), engineGroupType); @@ -2576,7 +2576,7 @@ void CommandListCoreFamily::updateStreamPropertiesForRegularComma NEO::PreambleHelper::programVfeState(frontEndStateCmd, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr); commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, CommandToPatch::FrontEndState}); } - if (this->frontEndStateTracking) { + if (this->frontEndStateTracking && !this->dispatchCmdListBatchBufferAsPrimary) { auto &stream = *commandContainer.getCommandStream(); NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(stream); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index 46f4509650..efc002af47 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -134,7 +134,8 @@ void ModuleMutableCommandListFixture::tearDown() { ModuleImmutableDataFixture::tearDown(); } -void MultiReturnCommandListFixture::setUp() { +void FrontEndCommandListFixtureInit::setUp(int32_t dispatchCmdBufferPrimary) { + DebugManager.flags.DispatchCmdlistCmdBufferPrimary.set(dispatchCmdBufferPrimary); DebugManager.flags.EnableFrontEndTracking.set(1); DebugManager.flags.EnableFlushTaskSubmission.set(1); ModuleMutableCommandListFixture::setUp(REVISION_B); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 5806c8d11e..b08a5dcfe7 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -86,8 +86,18 @@ struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture { DebugManagerStateRestore restorer; }; -struct MultiReturnCommandListFixture : public ModuleMutableCommandListFixture { - void setUp(); +struct FrontEndCommandListFixtureInit : public ModuleMutableCommandListFixture { + void setUp() { + setUp(0); + } + void setUp(int32_t dispatchCmdBufferPrimary); +}; + +template +struct FrontEndCommandListFixture : public FrontEndCommandListFixtureInit { + void setUp() { + FrontEndCommandListFixtureInit::setUp(dispatchCmdBufferPrimary); + } }; struct CmdListPipelineSelectStateFixture : public ModuleMutableCommandListFixture { diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 0fe61ddd58..7aff841ebd 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -205,6 +205,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp { using BaseClass::cmdQImmediate; using BaseClass::commandContainer; using BaseClass::commandListPreemptionMode; + using BaseClass::commandsToPatch; using BaseClass::csr; using BaseClass::currentBindingTablePoolBaseAddress; using BaseClass::currentDynamicStateBaseAddress; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index 99e825ccb7..00a682b0c9 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -19,7 +19,7 @@ #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" #include "level_zero/core/source/image/image_hw.h" -#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_event.h" @@ -1518,5 +1518,180 @@ HWTEST2_F(CommandListCreate, givenGetAlignedAllocationWhenExternalMemWithinDiffe commandList->hostPtrMap.clear(); } +using FrontEndPrimaryBatchBufferCommandListTest = Test>; +HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest, + givenFrontEndTrackingIsUsedWhenPropertyDisableEuFusionSupportedThenExpectFrontEndAddedToPatchlist, + IsAtLeastXeHpCore) { + using CFE_STATE = typename FamilyType::CFE_STATE; + + NEO::FrontEndPropertiesSupport fePropertiesSupport = {}; + auto &productHelper = device->getProductHelper(); + productHelper.fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo()); + + EXPECT_TRUE(commandList->frontEndStateTracking); + + auto &cmdStream = *commandList->getCmdContainer().getCommandStream(); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + auto &commandsToPatch = commandList->commandsToPatch; + EXPECT_EQ(0u, commandsToPatch.size()); + + mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1; + + size_t usedBefore = cmdStream.getUsed(); + commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + if (fePropertiesSupport.disableEuFusion) { + ASSERT_EQ(1u, commandsToPatch.size()); + CommandList::CommandToPatch &cfePatch = commandsToPatch[0]; + EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type); + + void *expectedDestination = ptrOffset(cmdStream.getCpuBase(), usedBefore); + EXPECT_EQ(expectedDestination, cfePatch.pDestination); + + auto cfeCmd = genCmdCast(cfePatch.pCommand); + ASSERT_NE(nullptr, cfeCmd); + EXPECT_TRUE(NEO::UnitTestHelper::getDisableFusionStateFromFrontEndCommand(*cfeCmd)); + } else { + EXPECT_EQ(0u, commandsToPatch.size()); + } + + commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + if (fePropertiesSupport.disableEuFusion) { + EXPECT_EQ(1u, commandsToPatch.size()); + } else { + EXPECT_EQ(0u, commandsToPatch.size()); + } + + mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 0; + + usedBefore = cmdStream.getUsed(); + commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + if (fePropertiesSupport.disableEuFusion) { + ASSERT_EQ(2u, commandsToPatch.size()); + CommandList::CommandToPatch &cfePatch = commandsToPatch[1]; + EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type); + + void *expectedDestination = ptrOffset(cmdStream.getCpuBase(), usedBefore); + EXPECT_EQ(expectedDestination, cfePatch.pDestination); + + auto cfeCmd = genCmdCast(cfePatch.pCommand); + ASSERT_NE(nullptr, cfeCmd); + EXPECT_FALSE(NEO::UnitTestHelper::getDisableFusionStateFromFrontEndCommand(*cfeCmd)); + } + + mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1; + + usedBefore = cmdStream.getUsed(); + commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + if (fePropertiesSupport.disableEuFusion) { + ASSERT_EQ(3u, commandsToPatch.size()); + CommandList::CommandToPatch &cfePatch = commandsToPatch[2]; + EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type); + + void *expectedDestination = ptrOffset(cmdStream.getCpuBase(), usedBefore); + EXPECT_EQ(expectedDestination, cfePatch.pDestination); + + auto cfeCmd = genCmdCast(cfePatch.pCommand); + ASSERT_NE(nullptr, cfeCmd); + EXPECT_TRUE(NEO::UnitTestHelper::getDisableFusionStateFromFrontEndCommand(*cfeCmd)); + } else { + EXPECT_EQ(0u, commandsToPatch.size()); + } + + if (fePropertiesSupport.disableEuFusion) { + commandList->reset(); + EXPECT_EQ(0u, commandsToPatch.size()); + } +} +HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest, + givenFrontEndTrackingCmdListIsExecutedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectFrontEndAddedToPatchlist, + IsAtLeastXeHpCore) { + using CFE_STATE = typename FamilyType::CFE_STATE; + + NEO::FrontEndPropertiesSupport fePropertiesSupport = {}; + auto &productHelper = device->getProductHelper(); + productHelper.fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo()); + + NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1); + + EXPECT_TRUE(commandList->frontEndStateTracking); + EXPECT_TRUE(commandQueue->frontEndStateTracking); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + + ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto &commandsToPatch = commandList->commandsToPatch; + EXPECT_EQ(0u, commandsToPatch.size()); + + result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + if (fePropertiesSupport.computeDispatchAllWalker) { + ASSERT_EQ(1u, commandsToPatch.size()); + CommandList::CommandToPatch &cfePatch = commandsToPatch[0]; + EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type); + + auto cfeCmd = genCmdCast(cfePatch.pCommand); + ASSERT_NE(nullptr, cfeCmd); + EXPECT_TRUE(NEO::UnitTestHelper::getComputeDispatchAllWalkerFromFrontEndCommand(*cfeCmd)); + } else { + EXPECT_EQ(0u, commandsToPatch.size()); + } + + result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + if (fePropertiesSupport.computeDispatchAllWalker) { + EXPECT_EQ(1u, commandsToPatch.size()); + } else { + EXPECT_EQ(0u, commandsToPatch.size()); + } + + result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + if (fePropertiesSupport.computeDispatchAllWalker) { + ASSERT_EQ(2u, commandsToPatch.size()); + CommandList::CommandToPatch &cfePatch = commandsToPatch[1]; + EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type); + + auto cfeCmd = genCmdCast(cfePatch.pCommand); + ASSERT_NE(nullptr, cfeCmd); + EXPECT_FALSE(NEO::UnitTestHelper::getComputeDispatchAllWalkerFromFrontEndCommand(*cfeCmd)); + } else { + EXPECT_EQ(0u, commandsToPatch.size()); + } + + result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + if (fePropertiesSupport.computeDispatchAllWalker) { + ASSERT_EQ(3u, commandsToPatch.size()); + CommandList::CommandToPatch &cfePatch = commandsToPatch[2]; + EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type); + + auto cfeCmd = genCmdCast(cfePatch.pCommand); + ASSERT_NE(nullptr, cfeCmd); + EXPECT_TRUE(NEO::UnitTestHelper::getComputeDispatchAllWalkerFromFrontEndCommand(*cfeCmd)); + } else { + EXPECT_EQ(0u, commandsToPatch.size()); + } + + if (fePropertiesSupport.computeDispatchAllWalker) { + commandList->reset(); + EXPECT_EQ(0u, commandsToPatch.size()); + } +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index 1d882a072a..b0a4d6f6de 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -803,9 +803,9 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests, commandList->cmdQImmediate = nullptr; } -using MultiReturnCommandListTest = Test; +using FrontEndMultiReturnCommandListTest = Test>; -HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyDisableEuFusionSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) { +HWTEST2_F(FrontEndMultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyDisableEuFusionSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) { using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; NEO::FrontEndPropertiesSupport fePropertiesSupport = {}; auto &productHelper = device->getProductHelper(); @@ -972,7 +972,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyDis } } -HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) { +HWTEST2_F(FrontEndMultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) { using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; NEO::FrontEndPropertiesSupport fePropertiesSupport = {}; auto &productHelper = device->getProductHelper(); @@ -1134,7 +1134,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyCom } } -HWTEST2_F(MultiReturnCommandListTest, +HWTEST2_F(FrontEndMultiReturnCommandListTest, givenFrontEndTrackingCmdListIsExecutedWhenPropertyDisableEuFusionSupportedThenExpectFrontEndProgrammingInCmdQueue, IsAtLeastSkl) { using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; @@ -1384,7 +1384,7 @@ HWTEST2_F(MultiReturnCommandListTest, } } -HWTEST2_F(MultiReturnCommandListTest, +HWTEST2_F(FrontEndMultiReturnCommandListTest, givenFrontEndTrackingCmdListIsExecutedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectFrontEndProgrammingInCmdQueue, IsAtLeastSkl) { using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; @@ -1635,7 +1635,7 @@ HWTEST2_F(MultiReturnCommandListTest, } } -HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothRegularFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) { +HWTEST2_F(FrontEndMultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothRegularFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) { using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE; NEO::FrontEndPropertiesSupport fePropertiesSupport = {}; auto &productHelper = device->getProductHelper(); @@ -1758,7 +1758,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr EXPECT_EQ(0u, feStateCmds.size()); } -HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothImmediateFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) { +HWTEST2_F(FrontEndMultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothImmediateFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) { using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE; NEO::FrontEndPropertiesSupport fePropertiesSupport = {}; auto &productHelper = device->getProductHelper();