performance: make reservation in residency before adding to the container

Related-To: NEO-13916

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2025-04-16 16:15:23 +00:00
committed by Compute-Runtime-Automation
parent a855ce019e
commit 66c8f86fd8

View File

@@ -168,8 +168,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
size_t linearStreamSizeEstimate = this->estimateStreamSizeForExecuteCommandListsRegularHeapless(ctx, numCommandLists, commandListHandles, instructionCacheFlushRequired, stateCacheFlushRequired);
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
if (ctx.cmdListScratchAddressPatchingEnabled == true) {
this->handleScratchSpaceAndUpdateGSBAStateDirtyFlag(ctx);
}
@@ -261,7 +259,6 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateStreamSizeForExecuteCommandListsRe
for (uint32_t i = 0; i < numCommandLists; i++) {
auto cmdList = CommandList::fromHandle(commandListHandles[i]);
linearStreamSizeEstimate += estimateCommandListSecondaryStart(cmdList);
ctx.spaceForResidency += estimateCommandListResidencySize(cmdList);
}
if (ctx.isDispatchTaskCountPostSyncRequired) {
@@ -333,8 +330,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), false);
}
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
NEO::LinearStream child(nullptr);
if (const auto ret = this->makeAlignedChildStreamAndSetGpuBase(child, linearStreamSizeEstimate, ctx); ret != ZE_RESULT_SUCCESS) {
return ret;
@@ -485,7 +480,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
fenceRequired |= commandList->isTaskCountUpdateFenceRequired();
linearStreamSizeEstimate += estimateCommandListSecondaryStart(commandList);
ctx.spaceForResidency += estimateCommandListResidencySize(commandList);
}
linearStreamSizeEstimate += this->estimateCommandListPrimaryStart(ctx.globalInit || this->forceBbStartJump);
@@ -493,8 +487,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleAdditionalSynchronization(device->getNEODevice()->getRootDeviceEnvironment());
}
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
NEO::EncodeDummyBlitWaArgs waArgs{false, &(this->device->getNEODevice()->getRootDeviceEnvironmentRef())};
linearStreamSizeEstimate += NEO::EncodeMiFlushDW<GfxFamily>::getCommandSizeWithWa(waArgs);
@@ -828,9 +820,16 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
ctx.cmdListScratchAddressPatchingEnabled |= commandList->getCmdListScratchAddressPatchingEnabled();
commandList->registerCsrDcFlushForDcMitigation(*this->getCsr());
ctx.spaceForResidency += estimateCommandListResidencySize(commandList);
}
}
makeResidentAndMigrate(ctx.isMigrationRequested, commandContainer.getResidencyContainer());
this->getCsr()->getResidencyAllocations().reserve(ctx.spaceForResidency);
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
makeResidentAndMigrate(ctx.isMigrationRequested, commandList->getCmdContainer().getResidencyContainer());
}
if (parentImmediateCommandlistLinearStream) {
@@ -975,7 +974,6 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
linearStreamSizeEstimate += computePreemptionSizeForCommandList(ctx, cmdList, cmdListState.flags.preemptionDirty);
linearStreamSizeEstimate += estimateCommandListSecondaryStart(cmdList);
ctx.spaceForResidency += estimateCommandListResidencySize(cmdList);
if (cmdListState.flags.isAnyDirty()) {
cmdListState.commandList = cmdList;