mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-06 19:32:25 +08:00
feature: Append recorded commandlist into immediate (5/N)
- add support for heapless mode Related-To: NEO-10356 Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
63528e70a7
commit
20aa853369
@@ -121,8 +121,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
|||||||
uint32_t numCommandLists,
|
uint32_t numCommandLists,
|
||||||
ze_command_list_handle_t *commandListHandles,
|
ze_command_list_handle_t *commandListHandles,
|
||||||
ze_fence_handle_t hFence,
|
ze_fence_handle_t hFence,
|
||||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
NEO::LinearStream *parentImmediateCommandlistLinearStream);
|
||||||
ze_event_handle_t *phWaitEvents);
|
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL ze_result_t executeCommandListsRegular(CommandListExecutionContext &ctx,
|
MOCKABLE_VIRTUAL ze_result_t executeCommandListsRegular(CommandListExecutionContext &ctx,
|
||||||
uint32_t numCommandLists,
|
uint32_t numCommandLists,
|
||||||
|
|||||||
@@ -117,7 +117,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
|||||||
ret = this->executeCommandListsCopyOnly(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream);
|
ret = this->executeCommandListsCopyOnly(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream);
|
||||||
} else if (this->heaplessStateInitEnabled) {
|
} else if (this->heaplessStateInitEnabled) {
|
||||||
ctx.globalInit = false;
|
ctx.globalInit = false;
|
||||||
ret = this->executeCommandListsRegularHeapless(ctx, numCommandLists, phCommandLists, hFence, nullptr, 0, nullptr);
|
ret = this->executeCommandListsRegularHeapless(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream);
|
||||||
} else {
|
} else {
|
||||||
ret = this->executeCommandListsRegular(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream);
|
ret = this->executeCommandListsRegular(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream);
|
||||||
}
|
}
|
||||||
@@ -135,13 +135,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
|
|||||||
uint32_t numCommandLists,
|
uint32_t numCommandLists,
|
||||||
ze_command_list_handle_t *commandListHandles,
|
ze_command_list_handle_t *commandListHandles,
|
||||||
ze_fence_handle_t hFence,
|
ze_fence_handle_t hFence,
|
||||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
NEO::LinearStream *parentImmediateCommandlistLinearStream) {
|
||||||
ze_event_handle_t *phWaitEvents) {
|
|
||||||
|
|
||||||
auto neoDevice = this->device->getNEODevice();
|
auto neoDevice = this->device->getNEODevice();
|
||||||
this->csr->initializeDeviceWithFirstSubmission(*neoDevice);
|
this->csr->initializeDeviceWithFirstSubmission(*neoDevice);
|
||||||
|
|
||||||
this->setupCmdListsAndContextParams(ctx, commandListHandles, numCommandLists, hFence, nullptr);
|
this->setupCmdListsAndContextParams(ctx, commandListHandles, numCommandLists, hFence, parentImmediateCommandlistLinearStream);
|
||||||
ctx.isDirectSubmissionEnabled = this->csr->isDirectSubmissionEnabled();
|
ctx.isDirectSubmissionEnabled = this->csr->isDirectSubmissionEnabled();
|
||||||
bool instructionCacheFlushRequired = this->csr->isInstructionCacheFlushRequired();
|
bool instructionCacheFlushRequired = this->csr->isInstructionCacheFlushRequired();
|
||||||
bool stateCacheFlushRequired = neoDevice->getBindlessHeapsHelper() ? neoDevice->getBindlessHeapsHelper()->getStateDirtyForContext(this->csr->getOsContext().getContextId()) : false;
|
bool stateCacheFlushRequired = neoDevice->getBindlessHeapsHelper() ? neoDevice->getBindlessHeapsHelper()->getStateDirtyForContext(this->csr->getOsContext().getContextId()) : false;
|
||||||
@@ -164,6 +163,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NEO::LinearStream *streamForDispatch = parentImmediateCommandlistLinearStream ? parentImmediateCommandlistLinearStream : &child;
|
||||||
|
|
||||||
this->getGlobalFenceAndMakeItResident();
|
this->getGlobalFenceAndMakeItResident();
|
||||||
this->getWorkPartitionAndMakeItResident();
|
this->getWorkPartitionAndMakeItResident();
|
||||||
this->getGlobalStatelessHeapAndMakeItResident(ctx);
|
this->getGlobalStatelessHeapAndMakeItResident(ctx);
|
||||||
@@ -176,22 +177,22 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
|
|||||||
this->makeCsrTagAllocationResident();
|
this->makeCsrTagAllocationResident();
|
||||||
|
|
||||||
if (instructionCacheFlushRequired) {
|
if (instructionCacheFlushRequired) {
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(child);
|
NEO::MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(*streamForDispatch);
|
||||||
this->csr->setInstructionCacheFlushed();
|
this->csr->setInstructionCacheFlushed();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stateCacheFlushRequired) {
|
if (stateCacheFlushRequired) {
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(child, neoDevice->getRootDeviceEnvironment());
|
NEO::MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(*streamForDispatch, neoDevice->getRootDeviceEnvironment());
|
||||||
neoDevice->getBindlessHeapsHelper()->clearStateDirtyForContext(this->csr->getOsContext().getContextId());
|
neoDevice->getBindlessHeapsHelper()->clearStateDirtyForContext(this->csr->getOsContext().getContextId());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto i = 0u; i < numCommandLists; ++i) {
|
for (auto i = 0u; i < numCommandLists; ++i) {
|
||||||
auto commandList = CommandList::fromHandle(commandListHandles[i]);
|
auto commandList = CommandList::fromHandle(commandListHandles[i]);
|
||||||
|
|
||||||
ctx.childGpuAddressPositionBeforeDynamicPreamble = child.getCurrentGpuAddressPosition();
|
ctx.childGpuAddressPositionBeforeDynamicPreamble = (*streamForDispatch).getCurrentGpuAddressPosition();
|
||||||
|
|
||||||
this->patchCommands(*commandList, ctx);
|
this->patchCommands(*commandList, ctx);
|
||||||
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
|
this->programOneCmdListBatchBufferStart(commandList, *streamForDispatch, ctx);
|
||||||
|
|
||||||
this->prefetchMemoryToDeviceAssociatedWithCmdList(commandList);
|
this->prefetchMemoryToDeviceAssociatedWithCmdList(commandList);
|
||||||
if (commandList->hasKernelWithAssert()) {
|
if (commandList->hasKernelWithAssert()) {
|
||||||
@@ -202,18 +203,29 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegularHeapless(
|
|||||||
}
|
}
|
||||||
|
|
||||||
this->migrateSharedAllocationsIfRequested(ctx.isMigrationRequested, ctx.firstCommandList);
|
this->migrateSharedAllocationsIfRequested(ctx.isMigrationRequested, ctx.firstCommandList);
|
||||||
this->programLastCommandListReturnBbStart(child, ctx);
|
this->programLastCommandListReturnBbStart(*streamForDispatch, ctx);
|
||||||
this->assignCsrTaskCountToFenceIfAvailable(hFence);
|
this->assignCsrTaskCountToFenceIfAvailable(hFence);
|
||||||
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, child);
|
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, *streamForDispatch);
|
||||||
|
|
||||||
|
auto submitResult = NEO::SubmissionStatus::failed;
|
||||||
|
if (parentImmediateCommandlistLinearStream) {
|
||||||
|
submitResult = NEO::SubmissionStatus::success;
|
||||||
|
} else {
|
||||||
|
submitResult = this->prepareAndSubmitBatchBuffer(ctx, *streamForDispatch);
|
||||||
|
}
|
||||||
|
|
||||||
auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, child);
|
|
||||||
this->updateTaskCountAndPostSync(ctx.isDispatchTaskCountPostSyncRequired);
|
this->updateTaskCountAndPostSync(ctx.isDispatchTaskCountPostSyncRequired);
|
||||||
this->csr->makeSurfacePackNonResident(this->csr->getResidencyAllocations(), false);
|
this->csr->makeSurfacePackNonResident(this->csr->getResidencyAllocations(), false);
|
||||||
|
|
||||||
auto completionResult = this->waitForCommandQueueCompletionAndCleanHeapContainer();
|
auto completionResult = ZE_RESULT_SUCCESS;
|
||||||
|
if (!parentImmediateCommandlistLinearStream) {
|
||||||
|
completionResult = this->waitForCommandQueueCompletionAndCleanHeapContainer();
|
||||||
|
}
|
||||||
ze_result_t retVal = this->handleSubmissionAndCompletionResults(submitResult, completionResult);
|
ze_result_t retVal = this->handleSubmissionAndCompletionResults(submitResult, completionResult);
|
||||||
|
|
||||||
this->csr->getResidencyAllocations().clear();
|
if (!parentImmediateCommandlistLinearStream) {
|
||||||
|
this->csr->getResidencyAllocations().clear();
|
||||||
|
}
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user