[perf] reallocate residency container once for all command lists

When getting residency count for all command lists, driver is able to
reallocate container only once and not per each command list.
Add non-zero initial value for command queue residual allocations.

Related-To: NEO-7828

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-04-12 11:17:56 +00:00
committed by Compute-Runtime-Automation
parent d48f5b01f6
commit 1a4dda57e7
2 changed files with 11 additions and 1 deletions

View File

@ -71,7 +71,7 @@ struct CommandQueueHw : public CommandQueueImp {
NEO::StreamProperties cmdListBeginState{};
uint64_t scratchGsba = 0;
size_t spaceForResidency = 0;
size_t spaceForResidency = 10;
CommandList *firstCommandList = nullptr;
CommandList *lastCommandList = nullptr;
NEO::PreemptionMode preemptionMode{};
@ -122,6 +122,7 @@ struct CommandQueueHw : public CommandQueueImp {
MOCKABLE_VIRTUAL bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
inline size_t estimateLinearStreamSizeInitial(CommandListExecutionContext &ctx);
inline size_t estimateCommandListSecondaryStart(CommandList *commandList);
inline size_t estimateCommandListResidencySize(CommandList *commandList);
inline void setFrontEndStateProperties(CommandListExecutionContext &ctx);
inline void handleScratchSpaceAndUpdateGSBAStateDirtyFlag(CommandListExecutionContext &ctx);
inline size_t estimateLinearStreamSizeComplementary(CommandListExecutionContext &ctx,

View File

@ -128,6 +128,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), false);
}
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
NEO::LinearStream child(nullptr);
if (const auto ret = this->makeAlignedChildStreamAndSetGpuBase(child, linearStreamSizeEstimate); ret != ZE_RESULT_SUCCESS) {
return ret;
@ -238,6 +240,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
linearStreamSizeEstimate += estimateCommandListSecondaryStart(commandList);
ctx.spaceForResidency += estimateCommandListResidencySize(commandList);
}
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
@ -664,6 +667,11 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListSecondaryStart(CommandL
return (commandList->getCmdContainer().getCmdBufferAllocations().size() * NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize());
}
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListResidencySize(CommandList *commandList) {
return commandList->getCmdContainer().getResidencyContainer().size();
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::setFrontEndStateProperties(CommandListExecutionContext &ctx) {
@ -726,6 +734,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
linearStreamSizeEstimate += computePreemptionSizeForCommandList(ctx, cmdList);
linearStreamSizeEstimate += estimateCommandListSecondaryStart(cmdList);
ctx.spaceForResidency += estimateCommandListResidencySize(cmdList);
}
if (ctx.gsbaStateDirty && !this->stateBaseAddressTracking) {