fix: enable flush task submission for internal cmd lists

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2025-06-12 12:42:08 +00:00
committed by Compute-Runtime-Automation
parent d38de8cf79
commit 7d5679cbfe
5 changed files with 34 additions and 64 deletions

View File

@@ -374,8 +374,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::surfaceState);
}
if (this->device->getL0Debugger()) {
UNRECOVERABLE_IF(!NEO::Debugger::isDebugEnabled(this->internalUsage));
if (this->device->getL0Debugger() && NEO::Debugger::isDebugEnabled(this->internalUsage)) {
csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId()));
csr->makeResident(*this->device->getDebugSurface());
if (this->device->getNEODevice()->getBindlessHeapsHelper()) {

View File

@@ -49,7 +49,7 @@ ze_result_t CommandListImp::destroy() {
static_cast<DeviceImp *>(this->device)->bcsSplit.releaseResources();
}
if (isImmediateType() && this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
if (this->cmdQImmediate && this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
this->hostSynchronize(std::numeric_limits<uint64_t>::max());
}
@@ -207,14 +207,13 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
}
auto &productHelper = device->getProductHelper();
commandList->isFlushTaskSubmissionEnabled = gfxCoreHelper.isPlatformFlushTaskEnabled(productHelper);
if (NEO::debugManager.flags.EnableFlushTaskSubmission.get() != -1) {
commandList->isFlushTaskSubmissionEnabled = !!NEO::debugManager.flags.EnableFlushTaskSubmission.get();
}
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Flush Task for Immediate command list : %s\n", commandList->isFlushTaskSubmissionEnabled ? "Enabled" : "Disabled");
if (!internalUsage) {
commandList->isFlushTaskSubmissionEnabled = gfxCoreHelper.isPlatformFlushTaskEnabled(productHelper);
if (NEO::debugManager.flags.EnableFlushTaskSubmission.get() != -1) {
commandList->isFlushTaskSubmissionEnabled = !!NEO::debugManager.flags.EnableFlushTaskSubmission.get();
}
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Flush Task for Immediate command list : %s\n", commandList->isFlushTaskSubmissionEnabled ? "Enabled" : "Disabled");
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
bool enabledCmdListSharing = !NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType) && commandList->isFlushTaskSubmissionEnabled;
commandList->immediateCmdListHeapSharing = L0GfxCoreHelper::enableImmediateCmdListHeapSharing(rootDeviceEnvironment, enabledCmdListSharing);

View File

@@ -810,14 +810,20 @@ HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAnd
auto pageFaultCmdQueue = whiteboxCast(CommandList::whiteboxCast(deviceImp->pageFaultCommandList)->cmdQImmediate);
auto pageFaultCsr = pageFaultCmdQueue->getCsr();
auto &pageFaultCsrStream = pageFaultCsr->getCS(0);
auto pageFaultCsrStreamBefore = pageFaultCsrStream.getUsed();
auto sizeBefore = commandQueue->commandStream.getUsed();
auto pageFaultSizeBefore = pageFaultCmdQueue->commandStream.getUsed();
auto handle = commandList->toHandle();
commandQueue->executeCommandLists(1, &handle, nullptr, true, nullptr, nullptr);
auto sizeAfter = commandQueue->commandStream.getUsed();
auto pageFaultSizeAfter = pageFaultCmdQueue->commandStream.getUsed();
auto pageFaultCsrStreamAfter = pageFaultCsrStream.getUsed();
EXPECT_LT(sizeBefore, sizeAfter);
EXPECT_LT(pageFaultSizeBefore, pageFaultSizeAfter);
EXPECT_EQ(pageFaultSizeBefore, pageFaultSizeAfter);
EXPECT_LT(pageFaultCsrStreamBefore, pageFaultCsrStreamAfter);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(commandQueue->commandStream.getCpuBase(), 0),
@@ -828,6 +834,11 @@ HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAnd
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(pageFaultCmdQueue->commandStream.getCpuBase(), 0),
pageFaultSizeAfter);
count = findAll<CFE_STATE *>(commands.begin(), commands.end()).size();
EXPECT_EQ(0u, count);
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(pageFaultCsrStream.getCpuBase(), 0),
pageFaultCsrStreamAfter);
count = findAll<CFE_STATE *>(commands.begin(), commands.end()).size();
EXPECT_EQ(1u, count);
res = context->freeMem(ptr);

View File

@@ -48,17 +48,27 @@ HWTEST_P(L0DebuggerWithBlitterTest, givenFlushTaskSubmissionEnabledWhenCommandLi
NEO::debugManager.flags.EnableFlushTaskSubmission.set(true);
debugManager.flags.EnableStateBaseAddressTracking.set(0);
size_t usedSpaceBefore = 0;
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::renderCompute, returnValue);
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
Mock<Module> module(device, nullptr, ModuleType::user);
Mock<::L0::KernelImp> kernel;
kernel.module = &module;
auto &csrStream = commandList->getCsr(false)->getCS(0);
size_t usedSpaceBefore = csrStream.getUsed();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams);
auto usedSpaceAfter = csrStream.getUsed();
ASSERT_GE(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, commandList->getCmdContainer().getCommandStream()->getCpuBase(), usedSpaceAfter));
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, csrStream.getCpuBase(), usedSpaceAfter));
auto sbaItor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), sbaItor);
@@ -331,55 +341,6 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenImmediateFlushTaskWhenExecutingKernelT
commandList->destroy();
}
HWTEST_P(L0DebuggerWithBlitterTest, givenInternalUsageImmediateCommandListWhenExecutingThenDebuggerAllocationsAreNotResident) {
using STATE_SIP = typename FamilyType::STATE_SIP;
auto &compilerProductHelper = neoDevice->getCompilerProductHelper();
auto heaplessEnabled = compilerProductHelper.isHeaplessModeEnabled(*defaultHwInfo);
if (compilerProductHelper.isHeaplessStateInitEnabled(heaplessEnabled)) {
GTEST_SKIP();
}
Mock<Module> module(device, nullptr, ModuleType::user);
Mock<::L0::KernelImp> kernel;
kernel.module = &module;
DebugManagerStateRestore restorer;
NEO::debugManager.flags.EnableFlushTaskSubmission.set(true);
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue = ZE_RESULT_SUCCESS;
ze_group_count_t groupCount{1, 1, 1};
auto commandList = CommandList::whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::renderCompute, returnValue));
// Internal command list must not have flush task enabled
EXPECT_FALSE(commandList->isFlushTaskSubmissionEnabled);
auto &csr = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> &>(*commandList->getCsr(false));
csr.storeMakeResidentAllocations = true;
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto sbaBuffer = device->getL0Debugger()->getSbaTrackingBuffer(csr.getOsContext().getContextId());
auto sipIsa = NEO::SipKernel::getSipKernel(*neoDevice, nullptr).getSipAllocation();
auto debugSurface = device->getDebugSurface();
EXPECT_FALSE(csr.isMadeResident(sbaBuffer));
EXPECT_FALSE(csr.isMadeResident(sipIsa));
EXPECT_FALSE(csr.isMadeResident(debugSurface));
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, csr.getCS().getCpuBase(), csr.getCS().getUsed()));
auto sipItor = find<STATE_SIP *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(cmdList.end(), sipItor);
commandList->destroy();
}
HWTEST2_P(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendLaunchKernelIndirectThenSuccessIsReturned, Gen12Plus) {
Mock<Module> module(device, nullptr, ModuleType::user);
Mock<::L0::KernelImp> kernel;

View File

@@ -1761,7 +1761,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStateBaseAddressCommon(
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
auto &rootDeviceEnvironment = this->peekRootDeviceEnvironment();
bool debuggingEnabled = device.getDebugger() != nullptr;
bool debuggingEnabled = device.getDebugger() != nullptr && !this->osContext->isInternalEngine();
EncodeWA<GfxFamily>::addPipeControlBeforeStateBaseAddress(csrCommandStream, rootDeviceEnvironment, isRcs(), this->dcFlushSupport);
EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(csrCommandStream, pipelineSelectArgs, true, rootDeviceEnvironment, isRcs());