From e695059152d185cb5654d74111871dfbf64c1535 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Mon, 3 Apr 2023 22:03:02 +0000 Subject: [PATCH] [perf] reduce host overhead in command list reset call There is no need to reset all fields and load support flags every reset call. Add dedicated calls that will reset values and dirty flags. Call virtual methods only once at init time. Related-To: NEO-7828 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 6 +- .../definitions/stream_properties.inl | 5 + .../command_stream/stream_properties.cpp | 48 ++++ .../source/command_stream/stream_properties.h | 6 + .../stream_properties_extra.cpp | 3 + .../stream_properties_tests_common.cpp | 210 ++++++++++++++++++ 6 files changed, 274 insertions(+), 4 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 345838f9ae..e08abacaa2 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -111,10 +111,8 @@ ze_result_t CommandListCoreFamily::reset() { commandListPreemptionMode = device->getDevicePreemptionMode(); commandListPerThreadScratchSize = 0u; commandListPerThreadPrivateScratchSize = 0u; - auto &rootDeviceEnvironment = this->device->getNEODevice()->getRootDeviceEnvironment(); - requiredStreamState = {}; - requiredStreamState.initSupport(rootDeviceEnvironment); - finalStreamState = requiredStreamState; + requiredStreamState.resetState(); + finalStreamState.resetState(); containsAnyKernel = false; containsCooperativeKernelsFlag = false; clearCommandsToPatch(); diff --git a/shared/source/command_stream/definitions/stream_properties.inl b/shared/source/command_stream/definitions/stream_properties.inl index cd5b9c7527..7f8964390b 100644 --- a/shared/source/command_stream/definitions/stream_properties.inl +++ b/shared/source/command_stream/definitions/stream_properties.inl @@ -30,6 +30,7 @@ struct StateComputeModeProperties { StreamProperty devicePreemptionMode{}; void initSupport(const RootDeviceEnvironment &rootDeviceEnvironment); + void resetState(); void setPropertiesAll(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode); void setPropertiesGrfNumberThreadArbitration(uint32_t numGrfRequired, int32_t threadArbitrationPolicy); @@ -45,6 +46,7 @@ struct StateComputeModeProperties { void clearIsDirtyExtraPerContext(); void clearIsDirtyExtraPerKernel(); bool isDirtyExtra() const; + void resetStateExtra(); void setPropertiesExtraPerContext(); void setPropertiesExtraPerKernel(); @@ -75,6 +77,7 @@ struct FrontEndProperties { StreamProperty singleSliceDispatchCcsMode{}; void initSupport(const RootDeviceEnvironment &rootDeviceEnvironment); + void resetState(); void setPropertiesAll(bool isCooperativeKernel, bool disableEuFusion, bool disableOverdispatch, int32_t engineInstancedDevice); void setPropertySingleSliceDispatchCcsMode(int32_t engineInstancedDevice); @@ -103,6 +106,7 @@ struct PipelineSelectProperties { StreamProperty systolicMode{}; void initSupport(const RootDeviceEnvironment &rootDeviceEnvironment); + void resetState(); void setPropertiesAll(bool modeSelected, bool mediaSamplerDopClockGate, bool systolicMode); void setPropertiesModeSelectedMediaSamplerClockGate(bool modeSelected, bool mediaSamplerDopClockGate, bool clearDirtyState); @@ -137,6 +141,7 @@ struct StateBaseAddressProperties { StreamProperty statelessMocs{}; void initSupport(const RootDeviceEnvironment &rootDeviceEnvironment); + void resetState(); void setPropertiesAll(bool globalAtomics, int32_t statelessMocs, int64_t bindingTablePoolBaseAddress, size_t bindingTablePoolSize, diff --git a/shared/source/command_stream/stream_properties.cpp b/shared/source/command_stream/stream_properties.cpp index f4e8de75e2..8d9379b6a3 100644 --- a/shared/source/command_stream/stream_properties.cpp +++ b/shared/source/command_stream/stream_properties.cpp @@ -132,6 +132,18 @@ void StateComputeModeProperties::initSupport(const RootDeviceEnvironment &rootDe this->propertiesSupportLoaded = true; } +void StateComputeModeProperties::resetState() { + clearIsDirty(); + + this->isCoherencyRequired.value = StreamProperty::initValue; + this->largeGrfMode.value = StreamProperty::initValue; + this->zPassAsyncComputeThreadLimit.value = StreamProperty::initValue; + this->pixelAsyncComputeThreadLimit.value = StreamProperty::initValue; + this->threadArbitrationPolicy.value = StreamProperty::initValue; + this->devicePreemptionMode.value = StreamProperty::initValue; + resetStateExtra(); +} + void StateComputeModeProperties::setPropertiesCoherencyDevicePreemption(bool requiresCoherency, PreemptionMode devicePreemptionMode, bool clearDirtyState) { DEBUG_BREAK_IF(!this->propertiesSupportLoaded); @@ -169,6 +181,15 @@ void FrontEndProperties::initSupport(const RootDeviceEnvironment &rootDeviceEnvi this->propertiesSupportLoaded = true; } +void FrontEndProperties::resetState() { + clearIsDirty(); + + this->computeDispatchAllWalkerEnable.value = StreamProperty::initValue; + this->disableEUFusion.value = StreamProperty::initValue; + this->disableOverdispatch.value = StreamProperty::initValue; + this->singleSliceDispatchCcsMode.value = StreamProperty::initValue; +} + void FrontEndProperties::setPropertiesAll(bool isCooperativeKernel, bool disableEuFusion, bool disableOverdispatch, int32_t engineInstancedDevice) { DEBUG_BREAK_IF(!this->propertiesSupportLoaded); clearIsDirty(); @@ -270,6 +291,14 @@ void PipelineSelectProperties::initSupport(const RootDeviceEnvironment &rootDevi this->propertiesSupportLoaded = true; } +void PipelineSelectProperties::resetState() { + clearIsDirty(); + + this->modeSelected.value = StreamProperty::initValue; + this->mediaSamplerDopClockGate.value = StreamProperty::initValue; + this->systolicMode.value = StreamProperty::initValue; +} + void PipelineSelectProperties::setPropertiesAll(bool modeSelected, bool mediaSamplerDopClockGate, bool systolicMode) { DEBUG_BREAK_IF(!this->propertiesSupportLoaded); clearIsDirty(); @@ -344,6 +373,25 @@ void StateBaseAddressProperties::initSupport(const RootDeviceEnvironment &rootDe this->propertiesSupportLoaded = true; } +void StateBaseAddressProperties::resetState() { + clearIsDirty(); + + this->statelessMocs.value = StreamProperty::initValue; + this->globalAtomics.value = StreamProperty::initValue; + + this->bindingTablePoolBaseAddress.value = StreamProperty::initValue; + this->bindingTablePoolSize.value = StreamProperty::initValue; + + this->surfaceStateBaseAddress.value = StreamProperty::initValue; + this->surfaceStateSize.value = StreamProperty::initValue; + + this->indirectObjectBaseAddress.value = StreamProperty::initValue; + this->indirectObjectSize.value = StreamProperty::initValue; + + this->dynamicStateBaseAddress.value = StreamProperty::initValue; + this->dynamicStateSize.value = StreamProperty::initValue; +} + void StateBaseAddressProperties::setPropertiesBindingTableSurfaceState(int64_t bindingTablePoolBaseAddress, size_t bindingTablePoolSize, int64_t surfaceStateBaseAddress, size_t surfaceStateSize) { DEBUG_BREAK_IF(!this->propertiesSupportLoaded); diff --git a/shared/source/command_stream/stream_properties.h b/shared/source/command_stream/stream_properties.h index dbe65d95fb..2af315e402 100644 --- a/shared/source/command_stream/stream_properties.h +++ b/shared/source/command_stream/stream_properties.h @@ -23,6 +23,12 @@ struct StreamProperties { pipelineSelect.initSupport(rootDeviceEnvironment); stateBaseAddress.initSupport(rootDeviceEnvironment); } + void resetState() { + stateComputeMode.resetState(); + frontEndState.resetState(); + pipelineSelect.resetState(); + stateBaseAddress.resetState(); + } }; } // namespace NEO diff --git a/shared/source/command_stream/stream_properties_extra.cpp b/shared/source/command_stream/stream_properties_extra.cpp index e907d755c5..d1cdd67aaa 100644 --- a/shared/source/command_stream/stream_properties_extra.cpp +++ b/shared/source/command_stream/stream_properties_extra.cpp @@ -25,3 +25,6 @@ void StateComputeModeProperties::clearIsDirtyExtraPerContext() { } void StateComputeModeProperties::clearIsDirtyExtraPerKernel() { } + +void StateComputeModeProperties::resetStateExtra() { +} diff --git a/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp b/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp index 831c6ac60d..65f60a88b4 100644 --- a/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp +++ b/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp @@ -381,6 +381,39 @@ TEST(StreamPropertiesTests, givenGrfNumberAndThreadArbitrationStateComputeModePr EXPECT_EQ(threadArbitration, scmProperties.threadArbitrationPolicy.value); } +TEST(StreamPropertiesTests, givenSetAllStateComputeModePropertiesWhenResetingStateThenResetValuesAndDirtyKeepSupportFlagLoaded) { + MockStateComputeModeProperties scmProperties{}; + scmProperties.propertiesSupportLoaded = true; + scmProperties.scmPropertiesSupport.coherencyRequired = true; + scmProperties.scmPropertiesSupport.largeGrfMode = true; + scmProperties.scmPropertiesSupport.threadArbitrationPolicy = true; + scmProperties.scmPropertiesSupport.devicePreemptionMode = true; + + int32_t grfNumber = 128; + int32_t threadArbitration = 1; + PreemptionMode devicePreemptionMode = PreemptionMode::Initial; + bool coherency = false; + scmProperties.setPropertiesAll(coherency, static_cast(grfNumber), threadArbitration, devicePreemptionMode); + EXPECT_TRUE(scmProperties.isDirty()); + EXPECT_EQ(0, scmProperties.largeGrfMode.value); + EXPECT_EQ(threadArbitration, scmProperties.threadArbitrationPolicy.value); + EXPECT_EQ(0, scmProperties.isCoherencyRequired.value); + EXPECT_EQ(static_cast(devicePreemptionMode), scmProperties.devicePreemptionMode.value); + + scmProperties.resetState(); + EXPECT_FALSE(scmProperties.isDirty()); + EXPECT_EQ(-1, scmProperties.largeGrfMode.value); + EXPECT_EQ(-1, scmProperties.threadArbitrationPolicy.value); + EXPECT_EQ(-1, scmProperties.isCoherencyRequired.value); + EXPECT_EQ(-1, scmProperties.devicePreemptionMode.value); + + EXPECT_TRUE(scmProperties.propertiesSupportLoaded); + EXPECT_TRUE(scmProperties.scmPropertiesSupport.coherencyRequired); + EXPECT_TRUE(scmProperties.scmPropertiesSupport.largeGrfMode); + EXPECT_TRUE(scmProperties.scmPropertiesSupport.threadArbitrationPolicy); + EXPECT_TRUE(scmProperties.scmPropertiesSupport.devicePreemptionMode); +} + TEST(StreamPropertiesTests, givenGrfNumberAndThreadArbitrationStateComputeModePropertiesWhenCopyingPropertyAndCheckIfDirtyThenExpectCorrectState) { MockStateComputeModeProperties scmProperties{}; scmProperties.propertiesSupportLoaded = true; @@ -622,6 +655,39 @@ TEST(StreamPropertiesTests, givenComputeDispatchAllWalkerEnableAndDisableEuFusio EXPECT_EQ(1, fePropertiesCopy.computeDispatchAllWalkerEnable.value); } +TEST(StreamPropertiesTests, givenSetAllFrontEndPropertiesWhenResetingStateThenResetValuesAndDirtyKeepSupportFlagLoaded) { + MockFrontEndProperties feProperties{}; + feProperties.propertiesSupportLoaded = true; + feProperties.frontEndPropertiesSupport.computeDispatchAllWalker = true; + feProperties.frontEndPropertiesSupport.disableEuFusion = true; + feProperties.frontEndPropertiesSupport.disableOverdispatch = true; + feProperties.frontEndPropertiesSupport.singleSliceDispatchCcsMode = true; + + bool isCooperativeKernel = false; + bool disableEuFusion = true; + bool disableOverdispatch = true; + int32_t engineInstancedDevice = 3; + feProperties.setPropertiesAll(isCooperativeKernel, disableEuFusion, disableOverdispatch, engineInstancedDevice); + EXPECT_TRUE(feProperties.isDirty()); + EXPECT_EQ(0, feProperties.computeDispatchAllWalkerEnable.value); + EXPECT_EQ(1, feProperties.disableEUFusion.value); + EXPECT_EQ(1, feProperties.disableOverdispatch.value); + EXPECT_EQ(3, feProperties.singleSliceDispatchCcsMode.value); + + feProperties.resetState(); + EXPECT_FALSE(feProperties.isDirty()); + EXPECT_EQ(-1, feProperties.computeDispatchAllWalkerEnable.value); + EXPECT_EQ(-1, feProperties.disableEUFusion.value); + EXPECT_EQ(-1, feProperties.disableOverdispatch.value); + EXPECT_EQ(-1, feProperties.singleSliceDispatchCcsMode.value); + + EXPECT_TRUE(feProperties.propertiesSupportLoaded); + EXPECT_TRUE(feProperties.frontEndPropertiesSupport.computeDispatchAllWalker); + EXPECT_TRUE(feProperties.frontEndPropertiesSupport.disableEuFusion); + EXPECT_TRUE(feProperties.frontEndPropertiesSupport.disableOverdispatch); + EXPECT_TRUE(feProperties.frontEndPropertiesSupport.singleSliceDispatchCcsMode); +} + TEST(StreamPropertiesTests, whenSettingPipelineSelectPropertiesThenCorrectValueIsSet) { MockExecutionEnvironment mockExecutionEnvironment{}; auto &productHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper(); @@ -670,6 +736,32 @@ TEST(StreamPropertiesTests, givenModeSelectPipelineSelectPropertyWhenSettingChan EXPECT_TRUE(pipeProperties.isDirty()); } +TEST(StreamPropertiesTests, givenSetAllPipelineSelectPropertiesWhenResetingStateThenResetValuesAndDirtyKeepSupportFlagLoaded) { + MockPipelineSelectProperties psProperties{}; + psProperties.propertiesSupportLoaded = true; + psProperties.pipelineSelectPropertiesSupport.mediaSamplerDopClockGate = true; + psProperties.pipelineSelectPropertiesSupport.systolicMode = true; + + bool modeSelected = false; + bool mediaSamplerDopClockGate = false; + bool systolicMode = true; + psProperties.setPropertiesAll(modeSelected, mediaSamplerDopClockGate, systolicMode); + EXPECT_TRUE(psProperties.isDirty()); + EXPECT_EQ(0, psProperties.modeSelected.value); + EXPECT_EQ(0, psProperties.mediaSamplerDopClockGate.value); + EXPECT_EQ(1, psProperties.systolicMode.value); + + psProperties.resetState(); + EXPECT_FALSE(psProperties.isDirty()); + EXPECT_EQ(-1, psProperties.modeSelected.value); + EXPECT_EQ(-1, psProperties.mediaSamplerDopClockGate.value); + EXPECT_EQ(-1, psProperties.systolicMode.value); + + EXPECT_TRUE(psProperties.propertiesSupportLoaded); + EXPECT_TRUE(psProperties.pipelineSelectPropertiesSupport.mediaSamplerDopClockGate); + EXPECT_TRUE(psProperties.pipelineSelectPropertiesSupport.systolicMode); +} + TEST(StreamPropertiesTests, givenSystolicModePipelineSelectPropertyWhenSettingPropertyAndCheckIfSupportedThenExpectCorrectState) { MockPipelineSelectProperties pipeProperties{}; pipeProperties.propertiesSupportLoaded = true; @@ -1343,3 +1435,121 @@ TEST(StreamPropertiesTests, givenIndirectObjectBaseAddressStateBaseAddressProper EXPECT_EQ(2, sbaProperties.indirectObjectBaseAddress.value); EXPECT_EQ(2u, sbaProperties.indirectObjectSize.value); } + +TEST(StreamPropertiesTests, givenSetAllStateBaseAddressPropertiesWhenResetingStateThenResetValuesAndDirtyKeepSupportFlagLoaded) { + MockStateBaseAddressProperties sbaProperties{}; + sbaProperties.propertiesSupportLoaded = true; + sbaProperties.stateBaseAddressPropertiesSupport.globalAtomics = true; + sbaProperties.stateBaseAddressPropertiesSupport.bindingTablePoolBaseAddress = true; + + bool globalAtomics = true; + int32_t statelessMocs = 1; + int64_t bindingTablePoolBaseAddress = 2; + size_t bindingTablePoolSize = 3; + int64_t surfaceStateBaseAddress = 4; + size_t surfaceStateSize = 5; + int64_t dynamicStateBaseAddress = 6; + size_t dynamicStateSize = 7; + int64_t indirectObjectBaseAddress = 8; + size_t indirectObjectSize = 9; + + sbaProperties.setPropertiesAll(globalAtomics, statelessMocs, + bindingTablePoolBaseAddress, bindingTablePoolSize, + surfaceStateBaseAddress, surfaceStateSize, + dynamicStateBaseAddress, dynamicStateSize, + indirectObjectBaseAddress, indirectObjectSize); + EXPECT_TRUE(sbaProperties.isDirty()); + EXPECT_EQ(1, sbaProperties.globalAtomics.value); + EXPECT_EQ(1, sbaProperties.statelessMocs.value); + EXPECT_EQ(2, sbaProperties.bindingTablePoolBaseAddress.value); + EXPECT_EQ(3u, sbaProperties.bindingTablePoolSize.value); + EXPECT_EQ(4, sbaProperties.surfaceStateBaseAddress.value); + EXPECT_EQ(5u, sbaProperties.surfaceStateSize.value); + EXPECT_EQ(6, sbaProperties.dynamicStateBaseAddress.value); + EXPECT_EQ(7u, sbaProperties.dynamicStateSize.value); + EXPECT_EQ(8, sbaProperties.indirectObjectBaseAddress.value); + EXPECT_EQ(9u, sbaProperties.indirectObjectSize.value); + + sbaProperties.resetState(); + EXPECT_FALSE(sbaProperties.isDirty()); + EXPECT_EQ(-1, sbaProperties.globalAtomics.value); + EXPECT_EQ(-1, sbaProperties.statelessMocs.value); + EXPECT_EQ(-1, sbaProperties.bindingTablePoolBaseAddress.value); + EXPECT_EQ(StreamPropertySizeT::initValue, sbaProperties.bindingTablePoolSize.value); + EXPECT_EQ(-1, sbaProperties.surfaceStateBaseAddress.value); + EXPECT_EQ(StreamPropertySizeT::initValue, sbaProperties.surfaceStateSize.value); + EXPECT_EQ(-1, sbaProperties.dynamicStateBaseAddress.value); + EXPECT_EQ(StreamPropertySizeT::initValue, sbaProperties.dynamicStateSize.value); + EXPECT_EQ(-1, sbaProperties.indirectObjectBaseAddress.value); + EXPECT_EQ(StreamPropertySizeT::initValue, sbaProperties.indirectObjectSize.value); + + EXPECT_TRUE(sbaProperties.propertiesSupportLoaded); + EXPECT_TRUE(sbaProperties.stateBaseAddressPropertiesSupport.globalAtomics); + EXPECT_TRUE(sbaProperties.stateBaseAddressPropertiesSupport.bindingTablePoolBaseAddress); +} + +TEST(StreamPropertiesTests, givenAllStreamPropertiesSetWhenAllStreamPropertiesResetStateThenAllValuesBringToInitValue) { + MockExecutionEnvironment executionEnvironment{}; + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + + StreamProperties globalStreamProperties{}; + globalStreamProperties.initSupport(rootDeviceEnvironment); + + uint32_t grfNumber = 128; + int32_t threadArbitration = 1; + globalStreamProperties.stateComputeMode.setPropertiesAll(false, grfNumber, threadArbitration, PreemptionMode::Initial); + + bool isCooperativeKernel = false; + bool disableEuFusion = true; + bool disableOverdispatch = true; + int32_t engineInstancedDevice = 3; + globalStreamProperties.frontEndState.setPropertiesAll(isCooperativeKernel, disableEuFusion, disableOverdispatch, engineInstancedDevice); + + bool modeSelected = false; + bool mediaSamplerDopClockGate = false; + bool systolicMode = true; + globalStreamProperties.pipelineSelect.setPropertiesAll(modeSelected, mediaSamplerDopClockGate, systolicMode); + + bool globalAtomics = true; + int32_t statelessMocs = 1; + int64_t bindingTablePoolBaseAddress = 2; + size_t bindingTablePoolSize = 3; + int64_t surfaceStateBaseAddress = 4; + size_t surfaceStateSize = 5; + int64_t dynamicStateBaseAddress = 6; + size_t dynamicStateSize = 7; + int64_t indirectObjectBaseAddress = 8; + size_t indirectObjectSize = 9; + globalStreamProperties.stateBaseAddress.setPropertiesAll(globalAtomics, statelessMocs, + bindingTablePoolBaseAddress, bindingTablePoolSize, + surfaceStateBaseAddress, surfaceStateSize, + dynamicStateBaseAddress, dynamicStateSize, + indirectObjectBaseAddress, indirectObjectSize); + + globalStreamProperties.resetState(); + + EXPECT_EQ(-1, globalStreamProperties.stateComputeMode.largeGrfMode.value); + EXPECT_EQ(-1, globalStreamProperties.stateComputeMode.threadArbitrationPolicy.value); + EXPECT_EQ(-1, globalStreamProperties.stateComputeMode.isCoherencyRequired.value); + EXPECT_EQ(-1, globalStreamProperties.stateComputeMode.devicePreemptionMode.value); + + EXPECT_EQ(-1, globalStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value); + EXPECT_EQ(-1, globalStreamProperties.frontEndState.disableEUFusion.value); + EXPECT_EQ(-1, globalStreamProperties.frontEndState.disableOverdispatch.value); + EXPECT_EQ(-1, globalStreamProperties.frontEndState.singleSliceDispatchCcsMode.value); + + EXPECT_EQ(-1, globalStreamProperties.pipelineSelect.modeSelected.value); + EXPECT_EQ(-1, globalStreamProperties.pipelineSelect.mediaSamplerDopClockGate.value); + EXPECT_EQ(-1, globalStreamProperties.pipelineSelect.systolicMode.value); + + EXPECT_EQ(-1, globalStreamProperties.stateBaseAddress.globalAtomics.value); + EXPECT_EQ(-1, globalStreamProperties.stateBaseAddress.statelessMocs.value); + EXPECT_EQ(-1, globalStreamProperties.stateBaseAddress.bindingTablePoolBaseAddress.value); + EXPECT_EQ(StreamPropertySizeT::initValue, globalStreamProperties.stateBaseAddress.bindingTablePoolSize.value); + EXPECT_EQ(-1, globalStreamProperties.stateBaseAddress.surfaceStateBaseAddress.value); + EXPECT_EQ(StreamPropertySizeT::initValue, globalStreamProperties.stateBaseAddress.surfaceStateSize.value); + EXPECT_EQ(-1, globalStreamProperties.stateBaseAddress.dynamicStateBaseAddress.value); + EXPECT_EQ(StreamPropertySizeT::initValue, globalStreamProperties.stateBaseAddress.dynamicStateSize.value); + EXPECT_EQ(-1, globalStreamProperties.stateBaseAddress.indirectObjectBaseAddress.value); + EXPECT_EQ(StreamPropertySizeT::initValue, globalStreamProperties.stateBaseAddress.indirectObjectSize.value); +}