Remove needless partition offset register reconfiguration

Related-To: NEO-6262

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2021-12-02 19:22:16 +00:00 committed by Compute-Runtime-Automation
parent c4f1b16239
commit 631933e374
2 changed files with 11 additions and 25 deletions

View File

@ -108,12 +108,7 @@ void programEventL3Flush(ze_event_handle_t hEvent,
auto &cmdListStream = *commandContainer.getCommandStream();
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
if (partitionCount > 1) {
args.workloadPartitionOffset = true;
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(cmdListStream,
static_cast<uint32_t>(event->getSinglePacketSize()));
}
args.workloadPartitionOffset = partitionCount > 1;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
cmdListStream,
@ -122,11 +117,6 @@ void programEventL3Flush(ze_event_handle_t hEvent,
Event::STATE_SIGNALED,
commandContainer.getDevice()->getHardwareInfo(),
args);
if (partitionCount > 1) {
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(cmdListStream,
NEO::ImplicitScalingDispatch<GfxFamily>::getPostSyncOffset());
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
@ -252,9 +242,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
}
if (L3FlushEnable) {
size_t estimatedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(neoDevice->getHardwareInfo());
if (partitionCount > 1) {
estimatedSize += 2 * NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
}
increaseCommandStreamSpace(estimatedSize);
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
}

View File

@ -14,6 +14,7 @@
#include "test.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
@ -143,8 +144,9 @@ HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithGlobalAtomics
EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics);
}
using CommandListAppendLaunchKernelL3Flush = Test<ModuleFixture>;
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
using MultTileCommandListAppendLaunchKernelL3Flush = Test<MultiTileCommandListFixture<false, false>>;
HWTEST2_F(MultTileCommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
@ -181,9 +183,9 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWi
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed()));
EXPECT_LT(1u, pCommandList->partitionCount);
EXPECT_EQ(2u, pCommandList->partitionCount);
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLri);
EXPECT_EQ(cmdList.end(), itorLri);
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, itorPC.size());
uint32_t postSyncCount = 0u;
@ -196,13 +198,11 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWi
ASSERT_LE(1u, postSyncCount);
}
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
HWTEST2_F(MultTileCommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(1);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
@ -233,9 +233,9 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAnd
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed()));
EXPECT_LT(1u, pCommandList->partitionCount);
EXPECT_EQ(2u, pCommandList->partitionCount);
auto itorLri = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_LE(2u, itorLri.size());
EXPECT_EQ(0u, itorLri.size());
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, itorPC.size());
uint32_t postSyncCount = 0u;
@ -248,11 +248,10 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAnd
ASSERT_LE(1u, postSyncCount);
}
using CommandListAppendLaunchKernelL3Flush = Test<ModuleFixture>;
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventAndWithoutWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(0);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();