Correct linear stream size estimation - dispatch task count post sync

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
Related-To: NEO-7156
This commit is contained in:
Kamil Kopryk
2022-09-07 05:53:23 +00:00
committed by Compute-Runtime-Automation
parent 38fd01ef41
commit 3223a0bace
3 changed files with 53 additions and 6 deletions

View File

@@ -110,8 +110,8 @@ struct CommandQueueHw : public CommandQueueImp {
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists,
ze_fence_handle_t hFence);
inline bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
inline size_t estimateLinearStreamSizeInitial(CommandListExecutionContext &ctx,
MOCKABLE_VIRTUAL bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
inline size_t estimateLinearStreamSizeInitial(const CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists);
inline void setFrontEndStateProperties(CommandListExecutionContext &ctx);
@@ -119,7 +119,7 @@ struct CommandQueueHw : public CommandQueueImp {
inline size_t estimateLinearStreamSizeComplementary(CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists);
inline ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize);
MOCKABLE_VIRTUAL ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize);
inline void allocateGlobalFenceAndMakeItResident();
inline void allocateWorkPartitionAndMakeItResident();
inline void allocateTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &commandStream);

View File

@@ -120,14 +120,16 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
size_t linearStreamSizeEstimate = this->estimateLinearStreamSizeInitial(ctx, phCommandLists, numCommandLists);
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
this->handleScratchSpaceAndUpdateGSBAStateDirtyFlag(ctx);
this->setFrontEndStateProperties(ctx);
linearStreamSizeEstimate += this->estimateLinearStreamSizeComplementary(ctx, phCommandLists, numCommandLists);
linearStreamSizeEstimate += this->computePreemptionSize(ctx, phCommandLists, numCommandLists);
linearStreamSizeEstimate += this->computeDebuggerCmdsSize(ctx);
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(this->device->getHwInfo(), false);
if (ctx.isDispatchTaskCountPostSyncRequired) {
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(this->device->getHwInfo(), false);
}
NEO::LinearStream child(nullptr);
if (const auto ret = this->makeAlignedChildStreamAndSetGpuBase(child, linearStreamSizeEstimate); ret != ZE_RESULT_SUCCESS) {
@@ -546,7 +548,7 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
CommandListExecutionContext &ctx,
const CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists) {