executeCommandLists: cleanup and split copy-only vs non-copy-only

Split the function into submethods to improve readability, reusability
and maintability (initially it was ~500 lines long!).

Also, split the execution into 'copy-only' and 'regular' cases to reduce
amount of `if()`s in the code.

Resolves: NEO-7118
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski
2022-06-24 12:58:24 +00:00
committed by Compute-Runtime-Automation
parent aa27a4c3ff
commit f82b2e2984
2 changed files with 971 additions and 485 deletions

View File

@ -11,7 +11,7 @@
namespace NEO {
class ScratchSpaceController;
}
} // namespace NEO
namespace L0 {
@ -29,9 +29,6 @@ struct CommandQueueHw : public CommandQueueImp {
void *phCommands,
ze_fence_handle_t hFence) override;
void dispatchTaskCountPostSync(NEO::LinearStream &commandStream, const NEO::HardwareInfo &hwInfo);
bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed);
size_t estimateStateBaseAddressCmdSize();
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream);
@ -40,7 +37,7 @@ struct CommandQueueHw : public CommandQueueImp {
ze_command_list_handle_t *phCommandLists);
size_t estimateFrontEndCmdSize();
size_t estimatePipelineSelect();
void programPipelineSelect(NEO::LinearStream &commandStream);
void programPipelineSelectIfGpgpuDisabled(NEO::LinearStream &commandStream);
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
@ -50,6 +47,113 @@ struct CommandQueueHw : public CommandQueueImp {
bool getPreemptionCmdProgramming() override;
void patchCommands(CommandList &commandList, uint64_t scratchAddress);
protected:
struct CommandListExecutionContext {
CommandListExecutionContext(ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists,
NEO::PreemptionMode contextPreemptionMode,
Device *device,
bool debugEnabled,
bool programActivePartitionConfig,
bool performMigration);
inline bool isNEODebuggerActive(Device *device);
bool anyCommandListWithCooperativeKernels = false;
bool anyCommandListWithoutCooperativeKernels = false;
bool anyCommandListRequiresDisabledEUFusion = false;
bool cachedMOCSAllowed = true;
bool performMemoryPrefetch = false;
bool containsAnyRegularCmdList = false;
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
size_t spaceForResidency = 0;
NEO::PreemptionMode preemptionMode{};
NEO::PreemptionMode statePreemption{};
uint32_t perThreadScratchSpaceSize = 0;
uint32_t perThreadPrivateScratchSize = 0;
const bool isPreemptionModeInitial{};
bool isDevicePreemptionModeMidThread{};
bool isDebugEnabled{};
bool stateSipRequired{};
bool isProgramActivePartitionConfigRequired{};
bool isMigrationRequested{};
bool isDirectSubmissionEnabled{};
bool isDispatchTaskCountPostSyncRequired{};
};
ze_result_t validateCommandListsParams(CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists);
inline ze_result_t executeCommandListsRegular(CommandListExecutionContext &ctx,
uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists,
ze_fence_handle_t hFence);
inline ze_result_t executeCommandListsCopyOnly(CommandListExecutionContext &ctx,
uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists,
ze_fence_handle_t hFence);
inline size_t computeDebuggerCmdsSize(const CommandListExecutionContext &ctx);
inline size_t computePreemptionSize(CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists);
inline void setupCmdListsAndContextParams(CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists,
ze_fence_handle_t hFence);
inline bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
inline size_t estimateLinearStreamSizeInitial(CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists);
inline void setFrontEndStateProperties(CommandListExecutionContext &ctx);
inline void handleScratchSpaceAndUpdateGSBAStateDirtyFlag(CommandListExecutionContext &ctx);
inline size_t estimateLinearStreamSizeComplementary(CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists);
inline ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize);
inline void allocateGlobalFenceAndMakeItResident();
inline void allocateWorkPartitionAndMakeItResident();
inline void allocateTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &commandStream);
inline void makeSbaTrackingBufferResidentIfL0DebuggerEnabled(bool isDebugEnabled);
inline void programCommandQueueDebugCmdsForSourceLevelOrL0DebuggerIfEnabled(bool isDebugEnabled, NEO::LinearStream &commandStream);
inline void programSbaWithUpdatedGsbaIfDirty(CommandListExecutionContext &ctx,
ze_command_list_handle_t hCommandList,
NEO::LinearStream &commandStream);
inline void programCsrBaseAddressIfPreemptionModeInitial(bool isPreemptionModeInitial, NEO::LinearStream &commandStream);
inline void programStateSip(bool isStateSipRequired, NEO::LinearStream &commandStream);
inline void updateOneCmdListPreemptionModeAndCtxStatePreemption(CommandListExecutionContext &ctx,
NEO::PreemptionMode commandListPreemption,
NEO::LinearStream &commandStream);
inline void makePreemptionAllocationResidentForModeMidThread(bool isDevicePreemptionModeMidThread);
inline void makeSipIsaResidentIfSipKernelUsed(CommandListExecutionContext &ctx);
inline void makeDebugSurfaceResidentIfNEODebuggerActive(bool isNEODebuggerActive);
inline void makeCsrTagAllocationResident();
inline void programActivePartitionConfig(bool isProgramActivePartitionConfigRequired, NEO::LinearStream &commandStream);
inline void encodeKernelArgsBufferAndMakeItResident();
inline void writeCsrStreamInlineIfLogicalStateHelperAvailable(NEO::LinearStream &commandStream);
inline void programOneCmdListFrontEndIfDirty(CommandListExecutionContext &ctx,
CommandList *commandList,
NEO::LinearStream &commandStream);
inline void programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &commandStream);
inline void mergeOneCmdListPipelinedState(CommandList *commandList);
inline void programFrontEndAndClearDirtyFlag(bool shouldFrontEndBeProgrammed,
CommandListExecutionContext &ctx,
NEO::LinearStream &commandStream);
inline void collectPrintfContentsFromAllCommandsLists(ze_command_list_handle_t *phCommandLists, uint32_t numCommandLists);
inline void migrateSharedAllocationsIfRequested(bool isMigrationRequested, ze_command_list_handle_t hCommandList);
inline void prefetchMemoryIfRequested(bool &isMemoryPrefetchRequested);
inline void programStateSipEndWA(bool isStateSipRequired, NEO::LinearStream &commandStream);
inline void assignCsrTaskCountToFenceIfAvailable(ze_fence_handle_t hFence);
inline void dispatchTaskCountPostSyncRegular(bool isDispatchTaskCountPostSyncRequired, NEO::LinearStream &commandStream);
inline void dispatchTaskCountPostSyncByMiFlushDw(bool isDispatchTaskCountPostSyncRequired, NEO::LinearStream &commandStream);
inline NEO::SubmissionStatus prepareAndSubmitBatchBuffer(CommandListExecutionContext &ctx, NEO::LinearStream &innerCommandStream);
inline void updateTaskCountAndPostSync(bool isDispatchTaskCountPostSyncRequired);
inline ze_result_t waitForCommandQueueCompletionAndCleanHeapContainer();
inline ze_result_t handleSubmissionAndCompletionResults(NEO::SubmissionStatus submitRet, ze_result_t completionRet);
size_t alignedChildStreamPadding{};
};
} // namespace L0

File diff suppressed because it is too large Load Diff