diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.h b/level_zero/core/source/cmdqueue/cmdqueue_hw.h index f6d289555c..80eddbf79a 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.h @@ -182,6 +182,7 @@ struct CommandQueueHw : public CommandQueueImp { const NEO::StreamProperties &cmdListFinal); inline size_t estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy, + bool &scmStateDirty, const NEO::StreamProperties &cmdListRequired, const NEO::StreamProperties &cmdListFinal); inline void programRequiredStateComputeModeForCommandList(CommandList *commandList, diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 990bf48ef4..3a5f119a86 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -304,7 +304,11 @@ void CommandQueueHw::programOneCmdListFrontEndIfDirty( ctx.cmdListBeginState.frontEndState = {}; if (frontEndTrackingEnabled()) { - csrState.frontEndState.copyPropertiesAll(cmdListRequired.frontEndState); + if (shouldProgramVfe) { + csrState.frontEndState.copyPropertiesAll(cmdListRequired.frontEndState); + } else { + csrState.frontEndState.copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(cmdListRequired.frontEndState); + } csrState.frontEndState.setPropertySingleSliceDispatchCcsMode(ctx.engineInstanced); shouldProgramVfe |= csrState.frontEndState.isDirty(); @@ -314,7 +318,11 @@ void CommandQueueHw::programOneCmdListFrontEndIfDirty( this->programFrontEndAndClearDirtyFlag(shouldProgramVfe, ctx, cmdStream, csrState); if (frontEndTrackingEnabled()) { - csrState.frontEndState.copyPropertiesAll(cmdListFinal.frontEndState); + if (shouldProgramVfe) { + csrState.frontEndState.copyPropertiesAll(cmdListFinal.frontEndState); + } else { + csrState.frontEndState.copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(cmdListFinal.frontEndState); + } } } @@ -381,18 +389,28 @@ size_t CommandQueueHw::estimateFrontEndCmdSizeForMultipleCommandL auto singleFrontEndCmdSize = estimateFrontEndCmdSize(); size_t estimatedSize = 0; - csrStateCopy.frontEndState.copyPropertiesAll(cmdListRequired.frontEndState); + if (isFrontEndStateDirty) { + csrStateCopy.frontEndState.copyPropertiesAll(cmdListRequired.frontEndState); + } else { + csrStateCopy.frontEndState.copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(cmdListRequired.frontEndState); + } csrStateCopy.frontEndState.setPropertySingleSliceDispatchCcsMode(engineInstanced); + if (isFrontEndStateDirty || csrStateCopy.frontEndState.isDirty()) { estimatedSize += singleFrontEndCmdSize; - isFrontEndStateDirty = false; } if (this->frontEndStateTracking) { uint32_t frontEndChanges = commandList->getReturnPointsSize(); estimatedSize += (frontEndChanges * singleFrontEndCmdSize); estimatedSize += (frontEndChanges * NEO::EncodeBatchBufferStartOrEnd::getBatchBufferStartSize()); } - csrStateCopy.frontEndState.copyPropertiesAll(cmdListFinal.frontEndState); + + if (isFrontEndStateDirty) { + csrStateCopy.frontEndState.copyPropertiesAll(cmdListFinal.frontEndState); + isFrontEndStateDirty = false; + } else { + csrStateCopy.frontEndState.copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(cmdListFinal.frontEndState); + } return estimatedSize; } @@ -662,6 +680,7 @@ size_t CommandQueueHw::estimateLinearStreamSizeComplementary( bool frontEndStateDirtyCopy = ctx.frontEndStateDirty; bool gpgpuEnabledCopy = this->csr->getPreambleSetFlag(); bool baseAdresStateDirtyCopy = ctx.gsbaStateDirty; + bool scmStateDirtyCopy = this->csr->getStateComputeModeDirty(); for (uint32_t i = 0; i < numCommandLists; i++) { auto cmdList = CommandList::fromHandle(phCommandLists[i]); auto &requiredStreamState = cmdList->getRequiredStreamState(); @@ -670,7 +689,7 @@ size_t CommandQueueHw::estimateLinearStreamSizeComplementary( linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirtyCopy, ctx.engineInstanced, cmdList, streamPropertiesCopy, requiredStreamState, finalStreamState); linearStreamSizeEstimate += estimatePipelineSelectCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState, gpgpuEnabledCopy); - linearStreamSizeEstimate += estimateScmCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState); + linearStreamSizeEstimate += estimateScmCmdSizeForMultipleCommandLists(streamPropertiesCopy, scmStateDirtyCopy, requiredStreamState, finalStreamState); linearStreamSizeEstimate += estimateStateBaseAddressCmdSizeForMultipleCommandLists(baseAdresStateDirtyCopy, cmdList->getCmdListHeapAddressModel(), streamPropertiesCopy, requiredStreamState, finalStreamState); } } @@ -1148,16 +1167,24 @@ size_t CommandQueueHw::estimatePipelineSelectCmdSizeForMultipleCo return 0; } - size_t singlePipelineSelectSize = NEO::PreambleHelper::getCmdSizeForPipelineSelect(device->getNEODevice()->getRootDeviceEnvironment()); size_t estimatedSize = 0; - csrStateCopy.pipelineSelect.copyPropertiesAll(cmdListRequired.pipelineSelect); - if (!gpgpuEnabled || csrStateCopy.pipelineSelect.isDirty()) { - estimatedSize += singlePipelineSelectSize; - gpgpuEnabled = true; + if (!gpgpuEnabled) { + csrStateCopy.pipelineSelect.copyPropertiesAll(cmdListRequired.pipelineSelect); + } else { + csrStateCopy.pipelineSelect.copyPropertiesSystolicMode(cmdListRequired.pipelineSelect); } - csrStateCopy.pipelineSelect.copyPropertiesAll(cmdListFinal.pipelineSelect); + if (!gpgpuEnabled || csrStateCopy.pipelineSelect.isDirty()) { + estimatedSize += NEO::PreambleHelper::getCmdSizeForPipelineSelect(device->getNEODevice()->getRootDeviceEnvironment()); + } + + if (!gpgpuEnabled) { + csrStateCopy.pipelineSelect.copyPropertiesAll(cmdListFinal.pipelineSelect); + gpgpuEnabled = true; + } else { + csrStateCopy.pipelineSelect.copyPropertiesSystolicMode(cmdListFinal.pipelineSelect); + } return estimatedSize; } @@ -1170,7 +1197,11 @@ void CommandQueueHw::programOneCmdListPipelineSelect(CommandList } bool preambleSet = csr->getPreambleSetFlag(); - csrState.pipelineSelect.copyPropertiesAll(cmdListRequired.pipelineSelect); + if (!preambleSet) { + csrState.pipelineSelect.copyPropertiesAll(cmdListRequired.pipelineSelect); + } else { + csrState.pipelineSelect.copyPropertiesSystolicMode(cmdListRequired.pipelineSelect); + } if (!preambleSet || csrState.pipelineSelect.isDirty()) { bool systolic = csrState.pipelineSelect.systolicMode.value == 1 ? true : false; @@ -1184,11 +1215,16 @@ void CommandQueueHw::programOneCmdListPipelineSelect(CommandList csr->setPreambleSetFlag(true); } - csrState.pipelineSelect.copyPropertiesAll(cmdListFinal.pipelineSelect); + if (!preambleSet) { + csrState.pipelineSelect.copyPropertiesAll(cmdListFinal.pipelineSelect); + } else { + csrState.pipelineSelect.copyPropertiesSystolicMode(cmdListFinal.pipelineSelect); + } } template size_t CommandQueueHw::estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy, + bool &scmStateDirty, const NEO::StreamProperties &cmdListRequired, const NEO::StreamProperties &cmdListFinal) { if (!this->stateComputeModeTracking) { @@ -1199,11 +1235,22 @@ size_t CommandQueueHw::estimateScmCmdSizeForMultipleCommandLists( bool isRcs = this->getCsr()->isRcs(); - csrStateCopy.stateComputeMode.copyPropertiesAll(cmdListRequired.stateComputeMode); + if (scmStateDirty) { + csrStateCopy.stateComputeMode.copyPropertiesAll(cmdListRequired.stateComputeMode); + } else { + csrStateCopy.stateComputeMode.copyPropertiesGrfNumberThreadArbitration(cmdListRequired.stateComputeMode); + } + if (csrStateCopy.stateComputeMode.isDirty()) { estimatedSize = NEO::EncodeComputeMode::getCmdSizeForComputeMode(device->getNEODevice()->getRootDeviceEnvironment(), false, isRcs); } - csrStateCopy.stateComputeMode.copyPropertiesAll(cmdListFinal.stateComputeMode); + + if (scmStateDirty) { + csrStateCopy.stateComputeMode.copyPropertiesAll(cmdListFinal.stateComputeMode); + scmStateDirty = false; + } else { + csrStateCopy.stateComputeMode.copyPropertiesGrfNumberThreadArbitration(cmdListFinal.stateComputeMode); + } return estimatedSize; } @@ -1218,7 +1265,12 @@ void CommandQueueHw::programRequiredStateComputeModeForCommandLis return; } - csrState.stateComputeMode.copyPropertiesAll(cmdListRequired.stateComputeMode); + bool scmCsrDirty = this->csr->getStateComputeModeDirty(); + if (scmCsrDirty) { + csrState.stateComputeMode.copyPropertiesAll(cmdListRequired.stateComputeMode); + } else { + csrState.stateComputeMode.copyPropertiesGrfNumberThreadArbitration(cmdListRequired.stateComputeMode); + } if (csrState.stateComputeMode.isDirty()) { NEO::PipelineSelectArgs pipelineSelectArgs = { @@ -1232,7 +1284,12 @@ void CommandQueueHw::programRequiredStateComputeModeForCommandLis false, device->getNEODevice()->getRootDeviceEnvironment(), isRcs, this->getCsr()->getDcFlushSupport(), nullptr); this->csr->setStateComputeModeDirty(false); } - csrState.stateComputeMode.copyPropertiesAll(cmdListFinal.stateComputeMode); + + if (scmCsrDirty) { + csrState.stateComputeMode.copyPropertiesAll(cmdListFinal.stateComputeMode); + } else { + csrState.stateComputeMode.copyPropertiesGrfNumberThreadArbitration(cmdListFinal.stateComputeMode); + } } template @@ -1263,9 +1320,8 @@ void CommandQueueHw::programRequiredStateBaseAddressForGlobalStat const NEO::StreamProperties &cmdListFinal) { auto globalStatelessHeap = this->csr->getGlobalStatelessHeap(); - csrState.stateBaseAddress.copyPropertiesAll(cmdListRequired.stateBaseAddress); - csrState.stateBaseAddress.setPropertiesBindingTableSurfaceState(NEO::StreamProperty64::initValue, NEO::StreamPropertySizeT::initValue, - globalStatelessHeap->getHeapGpuBase(), globalStatelessHeap->getHeapSizeInPages()); + csrState.stateBaseAddress.copyPropertiesStatelessMocsIndirectState(cmdListRequired.stateBaseAddress); + csrState.stateBaseAddress.setPropertiesSurfaceState(globalStatelessHeap->getHeapGpuBase(), globalStatelessHeap->getHeapSizeInPages()); if (ctx.gsbaStateDirty || csrState.stateBaseAddress.isDirty()) { programStateBaseAddress(ctx.scratchGsba, @@ -1277,7 +1333,7 @@ void CommandQueueHw::programRequiredStateBaseAddressForGlobalStat ctx.gsbaStateDirty = false; } - csrState.stateBaseAddress.copyPropertiesAll(cmdListFinal.stateBaseAddress); + csrState.stateBaseAddress.copyPropertiesStatelessMocs(cmdListFinal.stateBaseAddress); } template @@ -1356,15 +1412,14 @@ size_t CommandQueueHw::estimateStateBaseAddressCmdSizeForGlobalSt size_t estimatedSize = 0; - csrStateCopy.stateBaseAddress.copyPropertiesAll(cmdListRequired.stateBaseAddress); - csrStateCopy.stateBaseAddress.setPropertiesBindingTableSurfaceState(NEO::StreamProperty64::initValue, NEO::StreamPropertySizeT::initValue, - globalStatelessHeap->getHeapGpuBase(), globalStatelessHeap->getHeapSizeInPages()); + csrStateCopy.stateBaseAddress.copyPropertiesStatelessMocsIndirectState(cmdListRequired.stateBaseAddress); + csrStateCopy.stateBaseAddress.setPropertiesSurfaceState(globalStatelessHeap->getHeapGpuBase(), globalStatelessHeap->getHeapSizeInPages()); if (baseAddressStateDirty || csrStateCopy.stateBaseAddress.isDirty()) { bool useBtiCommand = csrStateCopy.stateBaseAddress.bindingTablePoolBaseAddress.value != NEO::StreamProperty64::initValue; estimatedSize = estimateStateBaseAddressCmdDispatchSize(useBtiCommand); baseAddressStateDirty = false; } - csrStateCopy.stateBaseAddress.copyPropertiesAll(cmdListFinal.stateBaseAddress); + csrStateCopy.stateBaseAddress.copyPropertiesStatelessMocs(cmdListFinal.stateBaseAddress); return estimatedSize; } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 2418f7014d..4025a3420a 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -435,8 +435,8 @@ CompletionStamp CommandStreamReceiverHw::flushTask( } programHardwareContext(commandStreamCSR); - programComputeMode(commandStreamCSR, dispatchFlags, hwInfo); programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs); + programComputeMode(commandStreamCSR, dispatchFlags, hwInfo); programL3(commandStreamCSR, newL3Config); programPreamble(commandStreamCSR, device, newL3Config); programMediaSampler(commandStreamCSR, dispatchFlags); @@ -568,6 +568,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( device.getDeviceInfo().imageSupport); } setGSBAStateDirty(false); + this->streamProperties.stateBaseAddress.clearIsDirty(); } addPipeControlBeforeStateSip(commandStreamCSR, device); @@ -809,6 +810,7 @@ void CommandStreamReceiverHw::programComputeMode(LinearStream &stream stream, this->streamProperties.stateComputeMode, dispatchFlags.pipelineSelectArgs, hasSharedHandles(), this->peekRootDeviceEnvironment(), isRcs(), this->dcFlushSupport, logicalStateHelper.get()); this->setStateComputeModeDirty(false); + this->streamProperties.stateComputeMode.clearIsDirty(); } } @@ -1106,7 +1108,7 @@ inline void CommandStreamReceiverHw::programVFEState(LinearStream &cs auto isCooperative = dispatchFlags.kernelExecutionType == KernelExecutionType::Concurrent; auto disableOverdispatch = (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet); - streamProperties.frontEndState.setPropertiesAll(isCooperative, dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced()); + this->streamProperties.frontEndState.setPropertiesAll(isCooperative, dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced()); auto &gfxCoreHelper = getGfxCoreHelper(); auto engineGroupType = gfxCoreHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo); @@ -1120,6 +1122,7 @@ inline void CommandStreamReceiverHw::programVFEState(LinearStream &cs flatBatchBufferHelper->collectScratchSpacePatchInfo(getScratchPatchAddress(), commandOffset, csr); } setMediaVFEStateDirty(false); + this->streamProperties.frontEndState.clearIsDirty(); } } diff --git a/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl index 37782f8562..67b26deadd 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl @@ -61,6 +61,7 @@ void CommandStreamReceiverHw::programPipelineSelect(LinearStream &com this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired; this->lastSystolicPipelineSelectMode = pipelineSelectArgs.systolicPipelineSelectMode; this->streamProperties.pipelineSelect.setPropertiesAll(true, this->lastMediaSamplerConfig, this->lastSystolicPipelineSelectMode); + this->streamProperties.pipelineSelect.clearIsDirty(); } } diff --git a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl index 7dcc89ee18..412a451d75 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl @@ -45,6 +45,7 @@ void CommandStreamReceiverHw::programPipelineSelect(LinearStream &com this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired; this->lastSystolicPipelineSelectMode = pipelineSelectArgs.systolicPipelineSelectMode; this->streamProperties.pipelineSelect.setPropertiesAll(true, this->lastMediaSamplerConfig, this->lastSystolicPipelineSelectMode); + this->streamProperties.pipelineSelect.clearIsDirty(); } } diff --git a/shared/source/command_stream/definitions/stream_properties.inl b/shared/source/command_stream/definitions/stream_properties.inl index be92eea348..cd5b9c7527 100644 --- a/shared/source/command_stream/definitions/stream_properties.inl +++ b/shared/source/command_stream/definitions/stream_properties.inl @@ -39,9 +39,9 @@ struct StateComputeModeProperties { void copyPropertiesGrfNumberThreadArbitration(const StateComputeModeProperties &properties); bool isDirty() const; + void clearIsDirty(); protected: - void clearIsDirty(); void clearIsDirtyExtraPerContext(); void clearIsDirtyExtraPerKernel(); bool isDirtyExtra() const; @@ -85,10 +85,9 @@ struct FrontEndProperties { void copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(const FrontEndProperties &properties); bool isDirty() const; - - protected: void clearIsDirty(); + protected: FrontEndPropertiesSupport frontEndPropertiesSupport = {}; bool propertiesSupportLoaded = false; }; @@ -113,10 +112,9 @@ struct PipelineSelectProperties { void copyPropertiesSystolicMode(const PipelineSelectProperties &properties); bool isDirty() const; - - protected: void clearIsDirty(); + protected: PipelineSelectPropertiesSupport pipelineSelectPropertiesSupport = {}; bool propertiesSupportLoaded = false; }; @@ -158,10 +156,9 @@ struct StateBaseAddressProperties { void copyPropertiesStatelessMocsIndirectState(const StateBaseAddressProperties &properties); bool isDirty() const; - - protected: void clearIsDirty(); + protected: StateBaseAddressPropertiesSupport stateBaseAddressPropertiesSupport = {}; bool propertiesSupportLoaded = false; }; diff --git a/shared/test/unit_test/gen12lp/coherency_tests_gen12lp.inl b/shared/test/unit_test/gen12lp/coherency_tests_gen12lp.inl index 1f3b88dc14..2ecaf0c7d3 100644 --- a/shared/test/unit_test/gen12lp/coherency_tests_gen12lp.inl +++ b/shared/test/unit_test/gen12lp/coherency_tests_gen12lp.inl @@ -215,7 +215,7 @@ GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenCoherencyRequirementWithoutShar csr->flushTask(stream, 0, &stream, &stream, &stream, 0, flags, *device); }; - auto findCmd = [&](bool expectToBeProgrammed, bool expectCoherent, bool expectPipeControl) { + auto findCmd = [&](bool expectToBeProgrammed, bool expectCoherent) { HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, startOffset); bool foundOne = false; @@ -230,12 +230,6 @@ GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenCoherencyRequirementWithoutShar EXPECT_EQ(expectedCoherentMask, cmd->getMaskBits()); EXPECT_FALSE(foundOne); foundOne = true; - auto pc = genCmdCast(*(++it)); - if (!expectPipeControl && !SpecialUltHelperGen12lp::isPipeControlWArequired(device->getHardwareInfo().platform.eProductFamily)) { - EXPECT_EQ(nullptr, pc); - } else { - EXPECT_NE(nullptr, pc); - } } } EXPECT_EQ(expectToBeProgrammed, foundOne); @@ -243,13 +237,13 @@ GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenCoherencyRequirementWithoutShar flushTask(false); if (MemorySynchronizationCommands::isBarrierPriorToPipelineSelectWaRequired(device->getRootDeviceEnvironment())) { - findCmd(true, false, true); // first time + findCmd(true, false); // first time } else { - findCmd(true, false, false); // first time + findCmd(true, false); // first time } flushTask(false); - findCmd(false, false, false); // not changed + findCmd(false, false); // not changed csr->getMemoryManager()->freeGraphicsMemory(graphicAlloc); }