From 631933e374fe2cd4ceae482cce2247a7ba10898f Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Thu, 2 Dec 2021 19:22:16 +0000 Subject: [PATCH] Remove needless partition offset register reconfiguration Related-To: NEO-6262 Signed-off-by: Zbigniew Zdanowicz --- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 15 +------------ .../xe_hp_core/test_cmdlist_xe_hp_core.cpp | 21 +++++++++---------- 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 511aba9843..3a280479c5 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -108,12 +108,7 @@ void programEventL3Flush(ze_event_handle_t hEvent, auto &cmdListStream = *commandContainer.getCommandStream(); NEO::PipeControlArgs args; args.dcFlushEnable = true; - - if (partitionCount > 1) { - args.workloadPartitionOffset = true; - NEO::ImplicitScalingDispatch::dispatchOffsetRegister(cmdListStream, - static_cast(event->getSinglePacketSize())); - } + args.workloadPartitionOffset = partitionCount > 1; NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( cmdListStream, @@ -122,11 +117,6 @@ void programEventL3Flush(ze_event_handle_t hEvent, Event::STATE_SIGNALED, commandContainer.getDevice()->getHardwareInfo(), args); - - if (partitionCount > 1) { - NEO::ImplicitScalingDispatch::dispatchOffsetRegister(cmdListStream, - NEO::ImplicitScalingDispatch::getPostSyncOffset()); - } } template @@ -252,9 +242,6 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z } if (L3FlushEnable) { size_t estimatedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(neoDevice->getHardwareInfo()); - if (partitionCount > 1) { - estimatedSize += 2 * NEO::ImplicitScalingDispatch::getOffsetRegisterSize(); - } increaseCommandStreamSpace(estimatedSize); programEventL3Flush(hEvent, this->device, partitionCount, commandContainer); } diff --git a/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp b/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp index fece79362f..0a639a59ba 100644 --- a/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp @@ -14,6 +14,7 @@ #include "test.h" +#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" @@ -143,8 +144,9 @@ HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithGlobalAtomics EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics); } -using CommandListAppendLaunchKernelL3Flush = Test; -HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) { +using MultTileCommandListAppendLaunchKernelL3Flush = Test>; + +HWTEST2_F(MultTileCommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; @@ -181,9 +183,9 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWi ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed())); - EXPECT_LT(1u, pCommandList->partitionCount); + EXPECT_EQ(2u, pCommandList->partitionCount); auto itorLri = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), itorLri); + EXPECT_EQ(cmdList.end(), itorLri); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); uint32_t postSyncCount = 0u; @@ -196,13 +198,11 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWi ASSERT_LE(1u, postSyncCount); } -HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) { +HWTEST2_F(MultTileCommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; - DebugManagerStateRestore restorer; - DebugManager.flags.EnableWalkerPartition.set(1); Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); @@ -233,9 +233,9 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAnd ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed())); - EXPECT_LT(1u, pCommandList->partitionCount); + EXPECT_EQ(2u, pCommandList->partitionCount); auto itorLri = findAll(cmdList.begin(), cmdList.end()); - ASSERT_LE(2u, itorLri.size()); + EXPECT_EQ(0u, itorLri.size()); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); uint32_t postSyncCount = 0u; @@ -248,11 +248,10 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAnd ASSERT_LE(1u, postSyncCount); } +using CommandListAppendLaunchKernelL3Flush = Test; HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventAndWithoutWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; - DebugManagerStateRestore restorer; - DebugManager.flags.EnableWalkerPartition.set(0); Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get();