feature: add pipeline state management for append command list operation

- command list append state is managed from internal queue and can be skipped
- initial state configuration should be processed by both kernel and non-kernel
- only kernel operation can process required state, as non-kernel cannot change

Related-To: NEO-10356

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2025-03-04 15:51:22 +00:00
committed by Compute-Runtime-Automation
parent ff4da67979
commit 27d7d72033
3 changed files with 274 additions and 52 deletions

View File

@@ -3705,6 +3705,21 @@ HWTEST2_F(CommandStreamReceiverHwTest,
EXPECT_EQ(nullptr, pipelineSelectCmd);
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskCmdListDispatchWhenPipelineSelectNotInitializedThenDoNotDispatchPipelineSelectCommand,
IsWithinXeGfxFamily) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_FALSE(commandStreamReceiver.getPreambleSetFlag());
immediateFlushTaskFlags.dispatchOperation = NEO::AppendOperations::cmdList;
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
EXPECT_EQ(usedSize, commandStreamReceiver.commandStream.getUsed());
EXPECT_FALSE(commandStreamReceiver.getPreambleSetFlag());
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskOnSystolicPlatformWhenPipelineSelectAlreadyInitializedAndSystolicRequiredThenDispatchPipelineSelectCommandForSystolic,
IsWithinXeGfxFamily) {
@@ -3742,6 +3757,35 @@ HWTEST2_F(CommandStreamReceiverHwTest,
}
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskNonKernelDispatchWhenPipelineSelectAlreadyInitializedAndSystolicRequiredThenIgnoreRequiredStreamProperties,
IsWithinXeGfxFamily) {
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_FALSE(commandStreamReceiver.getPreambleSetFlag());
immediateFlushTaskFlags.dispatchOperation = NEO::AppendOperations::nonKernel;
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
HardwareParse hwParserCsr;
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto pipelineSelectCmd = hwParserCsr.getCommand<PIPELINE_SELECT>();
ASSERT_NE(nullptr, pipelineSelectCmd);
EXPECT_TRUE(commandStreamReceiver.getPreambleSetFlag());
this->requiredStreamProperties.pipelineSelect.setPropertySystolicMode(true);
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream,
commandStream.getUsed(),
immediateFlushTaskFlags,
*pDevice);
EXPECT_EQ(usedSize, commandStreamReceiver.commandStream.getUsed());
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskWhenFrontEndNotInitializedThenDispatchFrontEndCommand,
IsAtLeastXeHpCore) {
@@ -3772,6 +3816,21 @@ HWTEST2_F(CommandStreamReceiverHwTest,
EXPECT_EQ(nullptr, frontEndCmd);
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskCmdListDispatchWhenFrontEndNotInitializedThenDoNotDispatchFrontEndCommand,
IsAtLeastXeHpCore) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
immediateFlushTaskFlags.dispatchOperation = NEO::AppendOperations::cmdList;
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
EXPECT_EQ(usedSize, commandStreamReceiver.commandStream.getUsed());
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskOnChangingFrontEndPropertiesPlatformWhenFrontEndAlreadyInitializedAndFrontEndPropertyChangeRequiredThenDispatchFrontEndCommand,
IsWithinXeGfxFamily) {
@@ -3818,6 +3877,35 @@ HWTEST2_F(CommandStreamReceiverHwTest,
}
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskNonKernelDispatchWhenFrontEndAlreadyInitializedAndFrontEndPropertyChangeRequiredThenIgnoreRequiredStreamProperties,
IsWithinXeGfxFamily) {
using CFE_STATE = typename FamilyType::CFE_STATE;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
immediateFlushTaskFlags.dispatchOperation = NEO::AppendOperations::nonKernel;
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
HardwareParse hwParserCsr;
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto frontEndCmd = hwParserCsr.getCommand<CFE_STATE>();
ASSERT_NE(nullptr, frontEndCmd);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
this->requiredStreamProperties.frontEndState.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(true, true);
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream,
commandStream.getUsed(),
immediateFlushTaskFlags,
*pDevice);
EXPECT_EQ(usedSize, commandStreamReceiver.commandStream.getUsed());
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskWhenStateComputeModeNotInitializedThenDispatchStateComputeModeCommand,
IsAtLeastXeHpCore) {
@@ -3851,6 +3939,24 @@ HWTEST2_F(CommandStreamReceiverHwTest,
EXPECT_EQ(nullptr, stateComputeModeCmd);
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskCmdListDispatchWhenStateComputeModeNotInitializedThenDoNotDispatchStateComputeModeCommand,
IsAtLeastXeHpCore) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
if (commandStreamReceiver.heaplessStateInitialized) {
GTEST_SKIP();
}
EXPECT_TRUE(commandStreamReceiver.getStateComputeModeDirty());
immediateFlushTaskFlags.dispatchOperation = NEO::AppendOperations::cmdList;
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
EXPECT_EQ(usedSize, commandStreamReceiver.commandStream.getUsed());
EXPECT_TRUE(commandStreamReceiver.getStateComputeModeDirty());
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskOnChangingStateComputeModePropertiesPlatformWhenStateComputeModeAlreadyInitializedAndStateComputeModePropertyChangeRequiredThenDispatchStateComputeModeCommand,
IsAtLeastXeHpCore) {
@@ -3896,6 +4002,40 @@ HWTEST2_F(CommandStreamReceiverHwTest,
}
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskNonKernelDispatchWhenStateComputeModeAlreadyInitializedAndStateComputeModePropertyChangeRequiredThenIgnoreRequiredStreamProperties,
IsAtLeastXeHpCore) {
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
if (commandStreamReceiver.heaplessStateInitialized) {
GTEST_SKIP();
}
EXPECT_TRUE(commandStreamReceiver.getStateComputeModeDirty());
this->requiredStreamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::defaultGrfNumber, ThreadArbitrationPolicy::AgeBased, NEO::PreemptionMode::ThreadGroup);
immediateFlushTaskFlags.dispatchOperation = NEO::AppendOperations::nonKernel;
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
HardwareParse hwParserCsr;
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateComputeModeCmd = hwParserCsr.getCommand<STATE_COMPUTE_MODE>();
ASSERT_NE(nullptr, stateComputeModeCmd);
EXPECT_FALSE(commandStreamReceiver.getStateComputeModeDirty());
this->requiredStreamProperties.stateComputeMode.setPropertiesGrfNumberThreadArbitration(GrfConfig::largeGrfNumber, ThreadArbitrationPolicy::RoundRobin);
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream,
commandStream.getUsed(),
immediateFlushTaskFlags,
*pDevice);
EXPECT_EQ(usedSize, commandStreamReceiver.commandStream.getUsed());
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskAndBindingPoolBaseAddressNeededWhenStateBaseAddressNotInitializedThenDispatchStateBaseAddressAndBindingPoolBaseAddressCommand,
IsAtLeastXeHpCore) {
@@ -3937,6 +4077,25 @@ HWTEST2_F(CommandStreamReceiverHwTest,
EXPECT_EQ(nullptr, bindingTableAddress);
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskCmdListDispatchWhenStateBaseAddressNotInitializedThenDoNotDispatchStateBaseAddressAndBindingPoolBaseAddressCommand,
IsAtLeastXeHpCore) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
if (commandStreamReceiver.heaplessStateInitialized) {
GTEST_SKIP();
}
EXPECT_TRUE(commandStreamReceiver.getGSBAStateDirty());
this->requiredStreamProperties.stateBaseAddress.setPropertiesAll(1, 0x1000, 0x100, 0x2000, 0x100, 0x3000, 0x100, 0x4000, 0x100);
immediateFlushTaskFlags.dispatchOperation = NEO::AppendOperations::cmdList;
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
EXPECT_EQ(usedSize, commandStreamReceiver.commandStream.getUsed());
EXPECT_TRUE(commandStreamReceiver.getGSBAStateDirty());
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskAndBindingPoolBaseAddressNeededWhenStateBaseAddressInitializedAndHeapsChangedThenDispatchStateBaseAddressAndBindingPoolBaseAddressCommandTwice,
IsAtLeastXeHpCore) {
@@ -3980,6 +4139,46 @@ HWTEST2_F(CommandStreamReceiverHwTest,
ASSERT_NE(nullptr, bindingTableAddress);
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskNonKernelDispatchWhenStateBaseAddressInitializedThenDispatchInitialStateBaseAddressAndIgnoreRequiredStreamProperties,
IsAtLeastXeHpCore) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
if (commandStreamReceiver.heaplessStateInitialized) {
GTEST_SKIP();
}
EXPECT_TRUE(commandStreamReceiver.getGSBAStateDirty());
this->requiredStreamProperties.stateBaseAddress.setPropertiesAll(1, 0x1000, 0x100, 0x2000, 0x100, 0x3000, 0x100, 0x4000, 0x100);
immediateFlushTaskFlags.dispatchOperation = NEO::AppendOperations::nonKernel;
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
HardwareParse hwParserCsr;
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddress = hwParserCsr.getCommand<STATE_BASE_ADDRESS>();
ASSERT_NE(nullptr, stateBaseAddress);
EXPECT_EQ(0x4000u, stateBaseAddress->getGeneralStateBaseAddress());
auto bindingTableAddress = hwParserCsr.getCommand<_3DSTATE_BINDING_TABLE_POOL_ALLOC>();
ASSERT_NE(nullptr, bindingTableAddress);
EXPECT_FALSE(commandStreamReceiver.getGSBAStateDirty());
this->requiredStreamProperties.stateBaseAddress.setPropertiesBindingTableSurfaceState(0x5000, 0x100, 0x6000, 0x100);
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream,
commandStream.getUsed(),
immediateFlushTaskFlags,
*pDevice);
EXPECT_EQ(usedSize, commandStreamReceiver.commandStream.getUsed());
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskAndBindingPoolBaseAddressNeededWhenStateBaseAddressPropertiesNotProvidedForFirstFlushThenDispatchSecondSbaCommandWhenProvided,
IsAtLeastXeHpCore) {
@@ -6148,3 +6347,19 @@ HWTEST_F(CommandStreamReceiverHwTest, givenEpilogueStreamAvailableWhenFlushImmed
commandStreamReceiver.getMemoryManager()->freeGraphicsMemoryImpl(commandBuffer);
}
HWTEST_F(CommandStreamReceiverHwTest, givenFlushBcsTaskCmdListDispatchWhenCalledThenNoPrologueIsSent) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
DispatchBcsFlags dispatchBcsFlags(false, false, false);
dispatchBcsFlags.dispatchOperation = NEO::AppendOperations::cmdList;
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushBcsTask(commandStream,
commandStream.getUsed(),
dispatchBcsFlags,
pDevice->getHardwareInfo());
EXPECT_EQ(usedSize, commandStreamReceiver.commandStream.getUsed());
}