fix: correctly assign state transition when same command list executed twice

Single command list object can be passed multiple times to the execution
command list.
Not all command list instances might require dynamic preamble, as it depends
what state is before particular command list instance.
Correctly assign the particular instance of command list to state transition.

Related-To: NEO-7828

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-04-21 20:51:02 +00:00
committed by Compute-Runtime-Automation
parent fc53bbe183
commit 09ef0201c6
6 changed files with 115 additions and 27 deletions

View File

@@ -184,8 +184,7 @@ struct CommandQueueHw : public CommandQueueImp {
NEO::StreamProperties &requiredState,
bool &propertyDirty);
inline size_t estimatePipelineSelectCmdSize();
inline void programOneCmdListPipelineSelect(CommandList *commandList,
NEO::LinearStream &commandStream,
inline void programOneCmdListPipelineSelect(NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequired);
inline size_t estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrState,
@@ -195,8 +194,7 @@ struct CommandQueueHw : public CommandQueueImp {
NEO::StreamProperties &requiredState,
bool &propertyDirty);
inline void programRequiredStateComputeModeForCommandList(CommandList *commandList,
NEO::LinearStream &commandStream,
inline void programRequiredStateComputeModeForCommandList(NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequired);
inline size_t estimateStateBaseAddressCmdDispatchSize(bool bindingTableBaseAddress);
@@ -223,8 +221,6 @@ struct CommandQueueHw : public CommandQueueImp {
inline void programRequiredStateBaseAddressForCommandList(CommandListExecutionContext &ctx,
NEO::LinearStream &commandStream,
NEO::HeapAddressModel commandListHeapAddressModel,
bool indirectHeapInLocalMemory,
CommandListRequiredStateChange &cmdListRequired);
inline void updateBaseAddressState(CommandList *lastCommandList);

View File

@@ -158,8 +158,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
if (this->stateChanges.size() > 0) {
auto &firstCmdListWithStateChange = this->stateChanges[0];
// check first required state change is for the first command list
if (firstCmdListWithStateChange.commandList == ctx.firstCommandList && firstCmdListWithStateChange.flags.propertyPsDirty) {
this->programOneCmdListPipelineSelect(ctx.firstCommandList, child, firstCmdListWithStateChange);
if (firstCmdListWithStateChange.cmdListIndex == 0 && firstCmdListWithStateChange.flags.propertyPsDirty) {
this->programOneCmdListPipelineSelect(child, firstCmdListWithStateChange);
firstCmdListWithStateChange.flags.propertyPsDirty = false;
}
}
@@ -187,12 +187,13 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
if (this->stateChanges.size() > this->currentStateChangeIndex) {
auto &stateChange = this->stateChanges[this->currentStateChangeIndex];
if (stateChange.commandList == commandList) {
if (stateChange.cmdListIndex == i) {
DEBUG_BREAK_IF(commandList != stateChange.commandList);
this->updateOneCmdListPreemptionModeAndCtxStatePreemption(child, stateChange);
this->programOneCmdListPipelineSelect(commandList, child, stateChange);
this->programOneCmdListPipelineSelect(child, stateChange);
this->programOneCmdListFrontEndIfDirty(ctx, child, stateChange);
this->programRequiredStateComputeModeForCommandList(commandList, child, stateChange);
this->programRequiredStateBaseAddressForCommandList(ctx, child, commandList->getCmdListHeapAddressModel(), commandList->getCmdContainer().isIndirectHeapInLocalMemory(), stateChange);
this->programRequiredStateComputeModeForCommandList(child, stateChange);
this->programRequiredStateBaseAddressForCommandList(ctx, child, stateChange);
this->currentStateChangeIndex++;
}
@@ -756,7 +757,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
if (propertyScmDirty || propertyFeDirty || propertyPsDirty || propertySbaDirty || frontEndReturnPoint || propertyPreemptionDirty) {
CommandListDirtyFlags dirtyFlags = {propertyScmDirty, propertyFeDirty, propertyPsDirty, propertySbaDirty, frontEndReturnPoint, propertyPreemptionDirty};
this->stateChanges.emplace_back(stagingState, cmdList, dirtyFlags, ctx.statePreemption);
this->stateChanges.emplace_back(stagingState, cmdList, dirtyFlags, ctx.statePreemption, i);
}
}
@@ -1278,7 +1279,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimatePipelineSelectCmdSizeForMultipleCo
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programOneCmdListPipelineSelect(CommandList *commandList, NEO::LinearStream &commandStream,
void CommandQueueHw<gfxCoreFamily>::programOneCmdListPipelineSelect(NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequired) {
if (!this->pipelineSelectStateTracking) {
return;
@@ -1290,7 +1291,7 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListPipelineSelect(CommandList
systolic,
false,
false,
commandList->getSystolicModeSupport()};
cmdListRequired.commandList->getSystolicModeSupport()};
NEO::PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, args, device->getNEODevice()->getRootDeviceEnvironment());
csr->setPreambleSetFlag(true);
@@ -1337,8 +1338,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateScmCmdSizeForMultipleCommandLists(
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandList(CommandList *commandList,
NEO::LinearStream &commandStream,
void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandList(NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequired) {
if (!this->stateComputeModeTracking) {
return;
@@ -1349,11 +1349,11 @@ void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandLis
cmdListRequired.requiredState.pipelineSelect.systolicMode.value == 1,
false,
false,
commandList->getSystolicModeSupport()};
cmdListRequired.commandList->getSystolicModeSupport()};
bool isRcs = this->getCsr()->isRcs();
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(commandStream, cmdListRequired.requiredState.stateComputeMode, pipelineSelectArgs,
false, device->getNEODevice()->getRootDeviceEnvironment(), isRcs, this->getCsr()->getDcFlushSupport(), nullptr);
false, device->getNEODevice()->getRootDeviceEnvironment(), this->csr->isRcs(),
this->csr->getDcFlushSupport(), nullptr);
this->csr->setStateComputeModeDirty(false);
}
}
@@ -1361,14 +1361,14 @@ void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandLis
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programRequiredStateBaseAddressForCommandList(CommandListExecutionContext &ctx,
NEO::LinearStream &commandStream,
NEO::HeapAddressModel commandListHeapAddressModel,
bool indirectHeapInLocalMemory,
CommandListRequiredStateChange &cmdListRequired) {
if (!this->stateBaseAddressTracking) {
return;
}
if (cmdListRequired.flags.propertySbaDirty) {
bool indirectHeapInLocalMemory = cmdListRequired.commandList->getCmdContainer().isIndirectHeapInLocalMemory();
programStateBaseAddress(ctx.scratchGsba,
indirectHeapInLocalMemory,
commandStream,
@@ -1439,7 +1439,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressCmdSizeForGlobalSt
if (baseAddressStateDirty) {
csrState.stateBaseAddress.copyPropertiesAll(cmdListRequired.stateBaseAddress);
} else {
csrState.stateBaseAddress.copyPropertiesStatelessMocs(cmdListFinal.stateBaseAddress);
csrState.stateBaseAddress.copyPropertiesStatelessMocs(cmdListRequired.stateBaseAddress);
}
csrState.stateBaseAddress.setPropertiesSurfaceState(globalStatelessHeap->getHeapGpuBase(), globalStatelessHeap->getHeapSizeInPages());

View File

@@ -112,14 +112,17 @@ struct CommandQueueImp : public CommandQueue {
CommandListRequiredStateChange() = default;
CommandListRequiredStateChange(NEO::StreamProperties &requiredState, CommandList *commandList,
CommandListDirtyFlags flags,
NEO::PreemptionMode newMode) : requiredState(requiredState),
commandList(commandList),
flags(flags),
newMode(newMode) {}
NEO::PreemptionMode newMode,
uint32_t cmdListIndex) : requiredState(requiredState),
commandList(commandList),
flags(flags),
newMode(newMode),
cmdListIndex(cmdListIndex) {}
NEO::StreamProperties requiredState{};
CommandList *commandList = nullptr;
CommandListDirtyFlags flags;
NEO::PreemptionMode newMode = NEO::PreemptionMode::Initial;
uint32_t cmdListIndex = 0;
};
using CommandListStateChangeList = std::vector<CommandListRequiredStateChange>;