feature: enable dispatch primary batch buffer from immediate

Related-To: NEO-10356

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2025-03-07 19:38:39 +00:00
committed by Compute-Runtime-Automation
parent 8ede026c5d
commit fc770cacf2
3 changed files with 45 additions and 5 deletions

View File

@@ -246,7 +246,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->l1CachePolicyData.init(productHelper);
this->cmdListHeapAddressModel = L0GfxCoreHelper::getHeapAddressModel(rootDeviceEnvironment);
this->dummyBlitWa.rootDeviceEnvironment = &(neoDevice->getRootDeviceEnvironmentRef());
this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, !isImmediateType());
this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, !(this->internalUsage && isImmediateType()));
this->useOnlyGlobalTimestamps = gfxCoreHelper.useOnlyGlobalTimestamps();
this->maxFillPaternSizeForCopyEngine = productHelper.getMaxFillPaternSizeForCopyEngine();
this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled();

View File

@@ -98,7 +98,7 @@ ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal, bool imm
auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
this->doubleSbaWa = productHelper.isAdditionalStateBaseAddressWARequired(hwInfo);
this->cmdListHeapAddressModel = L0GfxCoreHelper::getHeapAddressModel(rootDeviceEnvironment);
this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, !immediateCmdListQueue);
this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, !(internalUsage && immediateCmdListQueue));
auto &compilerProductHelper = rootDeviceEnvironment.getHelper<NEO::CompilerProductHelper>();
this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled();
this->heaplessStateInitEnabled = compilerProductHelper.isHeaplessStateInitEnabled(this->heaplessModeEnabled);

View File

@@ -1729,14 +1729,14 @@ HWTEST2_F(CommandListCreateTests, givenPlatformNotSupportsSharedHeapsWhenImmedia
using PrimaryBatchBufferCmdListTest = Test<PrimaryBatchBufferCmdListFixture>;
HWTEST_F(PrimaryBatchBufferCmdListTest, givenForcedPrimaryBatchBufferWhenRegularAndImmediateObjectCreatedThenRegularSetPrimaryFlagAndImmediateNot) {
HWTEST_F(PrimaryBatchBufferCmdListTest, givenForcedPrimaryBatchBufferWhenRegularAndImmediateObjectCreatedThenRegularAndImmediateSetPrimaryFlag) {
EXPECT_TRUE(commandList->dispatchCmdListBatchBufferAsPrimary);
EXPECT_TRUE(commandQueue->dispatchCmdListBatchBufferAsPrimary);
EXPECT_FALSE(commandListImmediate->dispatchCmdListBatchBufferAsPrimary);
EXPECT_TRUE(commandListImmediate->dispatchCmdListBatchBufferAsPrimary);
ASSERT_NE(nullptr, commandListImmediate->cmdQImmediate);
auto immediateCmdQueue = static_cast<L0::ult::CommandQueue *>(commandListImmediate->cmdQImmediate);
EXPECT_FALSE(immediateCmdQueue->dispatchCmdListBatchBufferAsPrimary);
EXPECT_TRUE(immediateCmdQueue->dispatchCmdListBatchBufferAsPrimary);
}
HWTEST_F(PrimaryBatchBufferCmdListTest, givenPrimaryBatchBufferWhenAppendingKernelAndClosingCommandListThenExpectAlignedSpaceForBatchBufferStart) {
@@ -1851,6 +1851,46 @@ HWTEST_F(PrimaryBatchBufferCmdListTest, givenRegularCmdListWhenFlushingThenPassS
EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds);
}
HWTEST_F(PrimaryBatchBufferCmdListTest, givenRegularCmdListWhenNoPreambleExpectedAndForceBbStartThenDispatchBbStart) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto cmdListHandle = commandList->toHandle();
auto regularCmdBufferAllocation = commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation();
// 1st dispatch can carry state preamble
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &queueStream = commandQueue->commandStream;
auto offsetBefore = queueStream.getUsed();
commandQueue->triggerBbStartJump();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto offsetAfter = queueStream.getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList,
ptrOffset(queueStream.getCpuBase(), offsetBefore),
offsetAfter - offsetBefore));
ASSERT_NE(0u, cmdList.size());
auto firstCmdIt = cmdList.begin();
auto bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(*firstCmdIt);
ASSERT_NE(nullptr, bbStartCmd);
EXPECT_EQ(regularCmdBufferAllocation->getGpuAddress(), bbStartCmd->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH, bbStartCmd->getSecondLevelBatchBuffer());
}
HWTEST2_F(PrimaryBatchBufferCmdListTest, givenRelaxedOrderingAndRegularCmdListAndSubmittedToImmediateWhenFlushingThenPassStallingCmdsInfo, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restore;
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);