diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 960e56850c..b3f09d5100 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -116,7 +116,6 @@ ze_result_t CommandQueueHw::executeCommandLists( if (this->isCopyOnlyCommandQueue) { ret = this->executeCommandListsCopyOnly(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream); } else if (this->heaplessStateInitEnabled) { - ctx.globalInit = false; ret = this->executeCommandListsRegularHeapless(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream); } else { ret = this->executeCommandListsRegular(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream); @@ -176,6 +175,8 @@ ze_result_t CommandQueueHw::executeCommandListsRegularHeapless( this->makeCsrTagAllocationResident(); + this->programActivePartitionConfig(ctx.isProgramActivePartitionConfigRequired, *streamForDispatch); + if (instructionCacheFlushRequired) { NEO::MemorySynchronizationCommands::addInstructionCacheFlush(*streamForDispatch); this->csr->setInstructionCacheFlushed(); @@ -253,6 +254,7 @@ size_t CommandQueueHw::estimateStreamSizeForExecuteCommandListsRe for (uint32_t i = 0; i < numCommandLists; i++) { auto cmdList = CommandList::fromHandle(commandListHandles[i]); linearStreamSizeEstimate += estimateCommandListSecondaryStart(cmdList); + linearStreamSizeEstimate += this->estimateCommandListPrimaryStart(ctx.globalInit); ctx.spaceForResidency += estimateCommandListResidencySize(cmdList); } @@ -268,6 +270,11 @@ size_t CommandQueueHw::estimateStreamSizeForExecuteCommandListsRe linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForFullCacheFlush(); } + auto csrHw = reinterpret_cast *>(this->csr); + if (ctx.isProgramActivePartitionConfigRequired) { + linearStreamSizeEstimate += csrHw->getCmdSizeForActivePartitionConfig(); + } + return linearStreamSizeEstimate; }