From 20aa85336989389c280c2331e7b8fe438e5758f1 Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 28 Aug 2024 23:14:23 +0000 Subject: [PATCH] feature: Append recorded commandlist into immediate (5/N) - add support for heapless mode Related-To: NEO-10356 Signed-off-by: Aravind Gopalakrishnan --- level_zero/core/source/cmdqueue/cmdqueue_hw.h | 3 +- .../core/source/cmdqueue/cmdqueue_hw.inl | 38 ++++++++++++------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.h b/level_zero/core/source/cmdqueue/cmdqueue_hw.h index 4e1001de71..1362de43e0 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.h @@ -121,8 +121,7 @@ struct CommandQueueHw : public CommandQueueImp { uint32_t numCommandLists, ze_command_list_handle_t *commandListHandles, ze_fence_handle_t hFence, - ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents); + NEO::LinearStream *parentImmediateCommandlistLinearStream); MOCKABLE_VIRTUAL ze_result_t executeCommandListsRegular(CommandListExecutionContext &ctx, uint32_t numCommandLists, diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 1b87a57f78..168e7bb209 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -117,7 +117,7 @@ ze_result_t CommandQueueHw::executeCommandLists( ret = this->executeCommandListsCopyOnly(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream); } else if (this->heaplessStateInitEnabled) { ctx.globalInit = false; - ret = this->executeCommandListsRegularHeapless(ctx, numCommandLists, phCommandLists, hFence, nullptr, 0, nullptr); + ret = this->executeCommandListsRegularHeapless(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream); } else { ret = this->executeCommandListsRegular(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream); } @@ -135,13 +135,12 @@ ze_result_t CommandQueueHw::executeCommandListsRegularHeapless( uint32_t numCommandLists, ze_command_list_handle_t *commandListHandles, ze_fence_handle_t hFence, - ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents) { + NEO::LinearStream *parentImmediateCommandlistLinearStream) { auto neoDevice = this->device->getNEODevice(); this->csr->initializeDeviceWithFirstSubmission(*neoDevice); - this->setupCmdListsAndContextParams(ctx, commandListHandles, numCommandLists, hFence, nullptr); + this->setupCmdListsAndContextParams(ctx, commandListHandles, numCommandLists, hFence, parentImmediateCommandlistLinearStream); ctx.isDirectSubmissionEnabled = this->csr->isDirectSubmissionEnabled(); bool instructionCacheFlushRequired = this->csr->isInstructionCacheFlushRequired(); bool stateCacheFlushRequired = neoDevice->getBindlessHeapsHelper() ? neoDevice->getBindlessHeapsHelper()->getStateDirtyForContext(this->csr->getOsContext().getContextId()) : false; @@ -164,6 +163,8 @@ ze_result_t CommandQueueHw::executeCommandListsRegularHeapless( return ret; } + NEO::LinearStream *streamForDispatch = parentImmediateCommandlistLinearStream ? parentImmediateCommandlistLinearStream : &child; + this->getGlobalFenceAndMakeItResident(); this->getWorkPartitionAndMakeItResident(); this->getGlobalStatelessHeapAndMakeItResident(ctx); @@ -176,22 +177,22 @@ ze_result_t CommandQueueHw::executeCommandListsRegularHeapless( this->makeCsrTagAllocationResident(); if (instructionCacheFlushRequired) { - NEO::MemorySynchronizationCommands::addInstructionCacheFlush(child); + NEO::MemorySynchronizationCommands::addInstructionCacheFlush(*streamForDispatch); this->csr->setInstructionCacheFlushed(); } if (stateCacheFlushRequired) { - NEO::MemorySynchronizationCommands::addStateCacheFlush(child, neoDevice->getRootDeviceEnvironment()); + NEO::MemorySynchronizationCommands::addStateCacheFlush(*streamForDispatch, neoDevice->getRootDeviceEnvironment()); neoDevice->getBindlessHeapsHelper()->clearStateDirtyForContext(this->csr->getOsContext().getContextId()); } for (auto i = 0u; i < numCommandLists; ++i) { auto commandList = CommandList::fromHandle(commandListHandles[i]); - ctx.childGpuAddressPositionBeforeDynamicPreamble = child.getCurrentGpuAddressPosition(); + ctx.childGpuAddressPositionBeforeDynamicPreamble = (*streamForDispatch).getCurrentGpuAddressPosition(); this->patchCommands(*commandList, ctx); - this->programOneCmdListBatchBufferStart(commandList, child, ctx); + this->programOneCmdListBatchBufferStart(commandList, *streamForDispatch, ctx); this->prefetchMemoryToDeviceAssociatedWithCmdList(commandList); if (commandList->hasKernelWithAssert()) { @@ -202,18 +203,29 @@ ze_result_t CommandQueueHw::executeCommandListsRegularHeapless( } this->migrateSharedAllocationsIfRequested(ctx.isMigrationRequested, ctx.firstCommandList); - this->programLastCommandListReturnBbStart(child, ctx); + this->programLastCommandListReturnBbStart(*streamForDispatch, ctx); this->assignCsrTaskCountToFenceIfAvailable(hFence); - this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, child); + this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, *streamForDispatch); + + auto submitResult = NEO::SubmissionStatus::failed; + if (parentImmediateCommandlistLinearStream) { + submitResult = NEO::SubmissionStatus::success; + } else { + submitResult = this->prepareAndSubmitBatchBuffer(ctx, *streamForDispatch); + } - auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, child); this->updateTaskCountAndPostSync(ctx.isDispatchTaskCountPostSyncRequired); this->csr->makeSurfacePackNonResident(this->csr->getResidencyAllocations(), false); - auto completionResult = this->waitForCommandQueueCompletionAndCleanHeapContainer(); + auto completionResult = ZE_RESULT_SUCCESS; + if (!parentImmediateCommandlistLinearStream) { + completionResult = this->waitForCommandQueueCompletionAndCleanHeapContainer(); + } ze_result_t retVal = this->handleSubmissionAndCompletionResults(submitResult, completionResult); - this->csr->getResidencyAllocations().clear(); + if (!parentImmediateCommandlistLinearStream) { + this->csr->getResidencyAllocations().clear(); + } return retVal; }