performance: skip dummy blits prior to flush without postsync

Related-To: NEO-9996
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2024-03-12 11:12:51 +00:00
committed by Compute-Runtime-Automation
parent ee1a225a41
commit 8d83f7603c
3 changed files with 29 additions and 1 deletions

View File

@@ -3809,6 +3809,7 @@ uint64_t CommandListCoreFamily<gfxCoreFamily>::getInOrderIncrementValue() const
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::encodeMiFlush(uint64_t immediateDataGpuAddress, uint64_t immediateData, NEO::MiFlushArgs &args) {
args.waArgs.isWaRequired &= args.commandWithPostSync;
auto isDummyBlitRequired = NEO::BlitCommandsHelper<GfxFamily>::isDummyBlitWaNeeded(args.waArgs);
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), immediateDataGpuAddress, immediateData, args);
if (isDummyBlitRequired) {

View File

@@ -3024,7 +3024,7 @@ TEST(BuiltinTypeHelperTest, givenHeaplessWhenAdjustBuiltinTypeIsCalledThenCorrec
EXPECT_EQ(Builtin::fillBufferMiddleStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferMiddle>(isStateless, isHeapless));
EXPECT_EQ(Builtin::fillBufferRightLeftoverStatelessHeapless, BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferRightLeftover>(isStateless, isHeapless));
}
HWTEST2_F(CommandListCreate, givenDummyBlitRequiredWhenEncodeMiFlushThenDummyBlitIsProgrammedPriorToMiFlushAndDummyAllocationIsAddedToResidencyContainer, IsAtLeastXeHpCore) {
HWTEST2_F(CommandListCreate, givenDummyBlitRequiredWhenEncodeMiFlushWithPostSyncThenDummyBlitIsProgrammedPriorToMiFlushAndDummyAllocationIsAddedToResidencyContainer, IsAtLeastXeHpCore) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
DebugManagerStateRestore restorer;
debugManager.flags.ForceDummyBlitWa.set(1);
@@ -3034,6 +3034,7 @@ HWTEST2_F(CommandListCreate, givenDummyBlitRequiredWhenEncodeMiFlushThenDummyBli
auto &commandContainer = cmdlist.getCmdContainer();
cmdlist.dummyBlitWa.isWaRequired = true;
MiFlushArgs args{cmdlist.dummyBlitWa};
args.commandWithPostSync = true;
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
commandContainer.getResidencyContainer().clear();
EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation());
@@ -3051,6 +3052,31 @@ HWTEST2_F(CommandListCreate, givenDummyBlitRequiredWhenEncodeMiFlushThenDummyBli
EXPECT_EQ(commandContainer.getResidencyContainer()[0], rootDeviceEnvironment.getDummyAllocation());
}
HWTEST2_F(CommandListCreate, givenDummyBlitRequiredWhenEncodeMiFlushWithoutPostSyncThenDummyBlitIsNotProgrammedAndDummyAllocationIsNotAddedToResidencyContainer, IsAtLeastXeHpCore) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
DebugManagerStateRestore restorer;
debugManager.flags.ForceDummyBlitWa.set(1);
MockCommandListCoreFamily<gfxCoreFamily> cmdlist;
cmdlist.initialize(device, NEO::EngineGroupType::copy, 0u);
cmdlist.csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
auto &commandContainer = cmdlist.getCmdContainer();
cmdlist.dummyBlitWa.isWaRequired = true;
MiFlushArgs args{cmdlist.dummyBlitWa};
args.commandWithPostSync = false;
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
rootDeviceEnvironment.initDummyAllocation();
EXPECT_NE(nullptr, rootDeviceEnvironment.getDummyAllocation());
commandContainer.getResidencyContainer().clear();
cmdlist.encodeMiFlush(0, 0, args);
GenCmdList programmedCommands;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
programmedCommands, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(programmedCommands.begin(), programmedCommands.end());
EXPECT_EQ(programmedCommands.begin(), itor);
EXPECT_NE(programmedCommands.end(), itor);
EXPECT_EQ(commandContainer.getResidencyContainer().size(), 0u);
}
HWTEST2_F(CommandListCreate, givenDummyBlitNotRequiredWhenEncodeMiFlushThenDummyBlitIsNotProgrammedAndDummyAllocationIsNotAddedToResidencyContainer, IsAtLeastXeHpCore) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
DebugManagerStateRestore restorer;