mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
feature: Append recorded commandlist into immediate (5/N)
- add support for heapless mode Related-To: NEO-10356 Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
63528e70a7
commit
20aa853369
@@ -121,8 +121,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
uint32_t numCommandLists,
|
||||
ze_command_list_handle_t *commandListHandles,
|
||||
ze_fence_handle_t hFence,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents);
|
||||
NEO::LinearStream *parentImmediateCommandlistLinearStream);
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t executeCommandListsRegular(CommandListExecutionContext &ctx,
|
||||
uint32_t numCommandLists,
|
||||
|
||||
@@ -117,7 +117,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
ret = this->executeCommandListsCopyOnly(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream);
|
||||
} else if (this->heaplessStateInitEnabled) {
|
||||
ctx.globalInit = false;
|
||||
ret = this->executeCommandListsRegularHeapless(ctx, numCommandLists, phCommandLists, hFence, nullptr, 0, nullptr);
|
||||
ret = this->executeCommandListsRegularHeapless(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream);
|
||||
} else {
|
||||
ret = this->executeCommandListsRegular(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream);
|
||||
}
|
||||
@@ -135,13 +135,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
|
||||
uint32_t numCommandLists,
|
||||
ze_command_list_handle_t *commandListHandles,
|
||||
ze_fence_handle_t hFence,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
NEO::LinearStream *parentImmediateCommandlistLinearStream) {
|
||||
|
||||
auto neoDevice = this->device->getNEODevice();
|
||||
this->csr->initializeDeviceWithFirstSubmission(*neoDevice);
|
||||
|
||||
this->setupCmdListsAndContextParams(ctx, commandListHandles, numCommandLists, hFence, nullptr);
|
||||
this->setupCmdListsAndContextParams(ctx, commandListHandles, numCommandLists, hFence, parentImmediateCommandlistLinearStream);
|
||||
ctx.isDirectSubmissionEnabled = this->csr->isDirectSubmissionEnabled();
|
||||
bool instructionCacheFlushRequired = this->csr->isInstructionCacheFlushRequired();
|
||||
bool stateCacheFlushRequired = neoDevice->getBindlessHeapsHelper() ? neoDevice->getBindlessHeapsHelper()->getStateDirtyForContext(this->csr->getOsContext().getContextId()) : false;
|
||||
@@ -164,6 +163,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
|
||||
return ret;
|
||||
}
|
||||
|
||||
NEO::LinearStream *streamForDispatch = parentImmediateCommandlistLinearStream ? parentImmediateCommandlistLinearStream : &child;
|
||||
|
||||
this->getGlobalFenceAndMakeItResident();
|
||||
this->getWorkPartitionAndMakeItResident();
|
||||
this->getGlobalStatelessHeapAndMakeItResident(ctx);
|
||||
@@ -176,22 +177,22 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
|
||||
this->makeCsrTagAllocationResident();
|
||||
|
||||
if (instructionCacheFlushRequired) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(child);
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(*streamForDispatch);
|
||||
this->csr->setInstructionCacheFlushed();
|
||||
}
|
||||
|
||||
if (stateCacheFlushRequired) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(child, neoDevice->getRootDeviceEnvironment());
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(*streamForDispatch, neoDevice->getRootDeviceEnvironment());
|
||||
neoDevice->getBindlessHeapsHelper()->clearStateDirtyForContext(this->csr->getOsContext().getContextId());
|
||||
}
|
||||
|
||||
for (auto i = 0u; i < numCommandLists; ++i) {
|
||||
auto commandList = CommandList::fromHandle(commandListHandles[i]);
|
||||
|
||||
ctx.childGpuAddressPositionBeforeDynamicPreamble = child.getCurrentGpuAddressPosition();
|
||||
ctx.childGpuAddressPositionBeforeDynamicPreamble = (*streamForDispatch).getCurrentGpuAddressPosition();
|
||||
|
||||
this->patchCommands(*commandList, ctx);
|
||||
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
|
||||
this->programOneCmdListBatchBufferStart(commandList, *streamForDispatch, ctx);
|
||||
|
||||
this->prefetchMemoryToDeviceAssociatedWithCmdList(commandList);
|
||||
if (commandList->hasKernelWithAssert()) {
|
||||
@@ -202,18 +203,29 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
|
||||
}
|
||||
|
||||
this->migrateSharedAllocationsIfRequested(ctx.isMigrationRequested, ctx.firstCommandList);
|
||||
this->programLastCommandListReturnBbStart(child, ctx);
|
||||
this->programLastCommandListReturnBbStart(*streamForDispatch, ctx);
|
||||
this->assignCsrTaskCountToFenceIfAvailable(hFence);
|
||||
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, child);
|
||||
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, *streamForDispatch);
|
||||
|
||||
auto submitResult = NEO::SubmissionStatus::failed;
|
||||
if (parentImmediateCommandlistLinearStream) {
|
||||
submitResult = NEO::SubmissionStatus::success;
|
||||
} else {
|
||||
submitResult = this->prepareAndSubmitBatchBuffer(ctx, *streamForDispatch);
|
||||
}
|
||||
|
||||
auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, child);
|
||||
this->updateTaskCountAndPostSync(ctx.isDispatchTaskCountPostSyncRequired);
|
||||
this->csr->makeSurfacePackNonResident(this->csr->getResidencyAllocations(), false);
|
||||
|
||||
auto completionResult = this->waitForCommandQueueCompletionAndCleanHeapContainer();
|
||||
auto completionResult = ZE_RESULT_SUCCESS;
|
||||
if (!parentImmediateCommandlistLinearStream) {
|
||||
completionResult = this->waitForCommandQueueCompletionAndCleanHeapContainer();
|
||||
}
|
||||
ze_result_t retVal = this->handleSubmissionAndCompletionResults(submitResult, completionResult);
|
||||
|
||||
this->csr->getResidencyAllocations().clear();
|
||||
if (!parentImmediateCommandlistLinearStream) {
|
||||
this->csr->getResidencyAllocations().clear();
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user