mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
[perf] reallocate residency container once for all command lists
When getting residency count for all command lists, driver is able to reallocate container only once and not per each command list. Add non-zero initial value for command queue residual allocations. Related-To: NEO-7828 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
d48f5b01f6
commit
1a4dda57e7
@ -71,7 +71,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
|
||||
NEO::StreamProperties cmdListBeginState{};
|
||||
uint64_t scratchGsba = 0;
|
||||
size_t spaceForResidency = 0;
|
||||
size_t spaceForResidency = 10;
|
||||
CommandList *firstCommandList = nullptr;
|
||||
CommandList *lastCommandList = nullptr;
|
||||
NEO::PreemptionMode preemptionMode{};
|
||||
@ -122,6 +122,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
MOCKABLE_VIRTUAL bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
|
||||
inline size_t estimateLinearStreamSizeInitial(CommandListExecutionContext &ctx);
|
||||
inline size_t estimateCommandListSecondaryStart(CommandList *commandList);
|
||||
inline size_t estimateCommandListResidencySize(CommandList *commandList);
|
||||
inline void setFrontEndStateProperties(CommandListExecutionContext &ctx);
|
||||
inline void handleScratchSpaceAndUpdateGSBAStateDirtyFlag(CommandListExecutionContext &ctx);
|
||||
inline size_t estimateLinearStreamSizeComplementary(CommandListExecutionContext &ctx,
|
||||
|
@ -128,6 +128,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), false);
|
||||
}
|
||||
|
||||
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
|
||||
|
||||
NEO::LinearStream child(nullptr);
|
||||
if (const auto ret = this->makeAlignedChildStreamAndSetGpuBase(child, linearStreamSizeEstimate); ret != ZE_RESULT_SUCCESS) {
|
||||
return ret;
|
||||
@ -238,6 +240,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
|
||||
for (auto i = 0u; i < numCommandLists; i++) {
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||
linearStreamSizeEstimate += estimateCommandListSecondaryStart(commandList);
|
||||
ctx.spaceForResidency += estimateCommandListResidencySize(commandList);
|
||||
}
|
||||
|
||||
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
|
||||
@ -664,6 +667,11 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListSecondaryStart(CommandL
|
||||
return (commandList->getCmdContainer().getCmdBufferAllocations().size() * NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListResidencySize(CommandList *commandList) {
|
||||
return commandList->getCmdContainer().getResidencyContainer().size();
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::setFrontEndStateProperties(CommandListExecutionContext &ctx) {
|
||||
|
||||
@ -726,6 +734,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
|
||||
linearStreamSizeEstimate += computePreemptionSizeForCommandList(ctx, cmdList);
|
||||
|
||||
linearStreamSizeEstimate += estimateCommandListSecondaryStart(cmdList);
|
||||
ctx.spaceForResidency += estimateCommandListResidencySize(cmdList);
|
||||
}
|
||||
|
||||
if (ctx.gsbaStateDirty && !this->stateBaseAddressTracking) {
|
||||
|
Reference in New Issue
Block a user