From 897420236a8e105b6fb5ddb2dbce8ac5540c05b4 Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Wed, 29 Sep 2021 10:28:33 +0000 Subject: [PATCH] Add method to set systolic mode Resolves: NEO-6040 Signed-off-by: Szymon Morek szymon.morek@intel.com --- .../helpers/test_preamble_xehp_and_later.cpp | 9 ++++++ .../libult/ult_command_stream_receiver.h | 1 + .../command_encoder_xehp_and_later.inl | 14 ++------- .../command_stream_receiver_hw_base.inl | 5 +++- shared/source/helpers/preamble.h | 4 +++ .../source/helpers/preamble_bdw_and_later.inl | 13 ++++++++ .../helpers/preamble_xehp_and_later.inl | 30 +++++++++++++++---- .../source/xe_hp_core/preamble_xe_hp_core.cpp | 11 +++++++ .../xe_hp_core/xehp/test_preamble_xehp.cpp | 12 ++++++++ .../unit_test/preamble/preamble_tests.cpp | 13 ++++++++ 10 files changed, 94 insertions(+), 18 deletions(-) diff --git a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp index 2ee79b996b..885d26a2da 100644 --- a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp +++ b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp @@ -504,3 +504,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, RenderSurfaceStateXeHPAndLaterTests, givenSpecificP EXPECT_EQ(FamilyType::RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, rssCmd.getCoherencyType()); } + +HWCMDTEST_F(IGFX_XE_HP_CORE, PreambleFixture, whenCallingIsSpecialPipelineSelectModeChangedThenReturnCorrectValue) { + using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; + bool oldPipelineSelectSpecialMode = true; + bool newPipelineSelectSpecialMode = false; + + auto result = PreambleHelper::isSpecialPipelineSelectModeChanged(oldPipelineSelectSpecialMode, newPipelineSelectSpecialMode, *defaultHwInfo); + EXPECT_TRUE(result); +} diff --git a/opencl/test/unit_test/libult/ult_command_stream_receiver.h b/opencl/test/unit_test/libult/ult_command_stream_receiver.h index afb3085aeb..772e084eb6 100644 --- a/opencl/test/unit_test/libult/ult_command_stream_receiver.h +++ b/opencl/test/unit_test/libult/ult_command_stream_receiver.h @@ -88,6 +88,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::lastSentL3Config; using BaseClass::CommandStreamReceiver::lastSentThreadArbitrationPolicy; using BaseClass::CommandStreamReceiver::lastSentUseGlobalAtomics; + using BaseClass::CommandStreamReceiver::lastSpecialPipelineSelectMode; using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig; using BaseClass::CommandStreamReceiver::latestFlushedTaskCount; using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig; diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 0090ade507..46cc3e8250 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -71,7 +71,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, if (kernelDescriptor.extendedInfo) { bool specialModeRequired = kernelDescriptor.extendedInfo->specialPipelineSelectModeRequired(); - if (container.lastPipelineSelectModeRequired != specialModeRequired) { + if (PreambleHelper::isSpecialPipelineSelectModeChanged(container.lastPipelineSelectModeRequired, specialModeRequired, hwInfo)) { container.lastPipelineSelectModeRequired = specialModeRequired; EncodeComputeMode::adjustPipelineSelect(container, kernelDescriptor); } @@ -565,18 +565,10 @@ template void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) { using PIPELINE_SELECT = typename Family::PIPELINE_SELECT; auto pipelineSelectCmd = Family::cmdInitPipelineSelect; + auto isSpecialModeSelected = kernelDescriptor.extendedInfo && kernelDescriptor.extendedInfo->specialPipelineSelectModeRequired(); - if (kernelDescriptor.extendedInfo && kernelDescriptor.extendedInfo->specialPipelineSelectModeRequired()) { - pipelineSelectCmd.setSystolicModeEnable(true); - } else { - pipelineSelectCmd.setSystolicModeEnable(false); - } + PreambleHelper::appendProgramPipelineSelect(&pipelineSelectCmd, isSpecialModeSelected, container.getDevice()->getHardwareInfo()); - if (DebugManager.flags.OverrideSystolicPipelineSelect.get() != -1) { - pipelineSelectCmd.setSystolicModeEnable(DebugManager.flags.OverrideSystolicPipelineSelect.get()); - } - - pipelineSelectCmd.setMaskBits(pipelineSelectSystolicModeEnableMaskBits); pipelineSelectCmd.setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); auto buffer = container.getCommandStream()->getSpace(sizeof(pipelineSelectCmd)); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index d4847a3b37..e1c0091b9a 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -266,12 +266,15 @@ CompletionStamp CommandStreamReceiverHw::flushTask( } auto newL3Config = PreambleHelper::getL3Config(peekHwInfo(), dispatchFlags.useSLM); + auto isSpecialPipelineSelectModeChanged = PreambleHelper::isSpecialPipelineSelectModeChanged(lastSpecialPipelineSelectMode, + dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode, + peekHwInfo()); csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config; csrSizeRequestFlags.coherencyRequestChanged = this->lastSentCoherencyRequest != static_cast(dispatchFlags.requiresCoherency); csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode; csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired); - csrSizeRequestFlags.specialPipelineSelectModeChanged = this->lastSpecialPipelineSelectMode != dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode; + csrSizeRequestFlags.specialPipelineSelectModeChanged = isSpecialPipelineSelectModeChanged; if (dispatchFlags.numGrfRequired == GrfConfig::NotApplicable) { dispatchFlags.numGrfRequired = lastSentNumGrfRequired; diff --git a/shared/source/helpers/preamble.h b/shared/source/helpers/preamble.h index c28a3093c4..b0a84acae0 100644 --- a/shared/source/helpers/preamble.h +++ b/shared/source/helpers/preamble.h @@ -37,6 +37,7 @@ struct PreambleHelper { static void programPipelineSelect(LinearStream *pCommandStream, const PipelineSelectArgs &pipelineSelectArgs, const HardwareInfo &hwInfo); + static void appendProgramPipelineSelect(void *cmd, bool isSpecialModeSelected, const HardwareInfo &hwInfo); static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy); static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr); static void addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo, EngineGroupType engineGroupType); @@ -58,6 +59,9 @@ struct PreambleHelper { static void programSemaphoreDelay(LinearStream *pCommandStream); static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM); static bool isL3Configurable(const HardwareInfo &hwInfo); + static bool isSystolicModeConfigurable(const HardwareInfo &hwInfo); + static bool isSpecialPipelineSelectModeChanged(bool lastSpecialPipelineSelectMode, bool newSpecialPipelineSelectMode, + const HardwareInfo &hwInfo); static size_t getAdditionalCommandsSize(const Device &device); static size_t getThreadArbitrationCommandsSize(); static std::vector getSupportedThreadArbitrationPolicies(); diff --git a/shared/source/helpers/preamble_bdw_and_later.inl b/shared/source/helpers/preamble_bdw_and_later.inl index ca55c6a9de..28c041d663 100644 --- a/shared/source/helpers/preamble_bdw_and_later.inl +++ b/shared/source/helpers/preamble_bdw_and_later.inl @@ -75,4 +75,17 @@ size_t PreambleHelper::getVFECommandsSize() { return sizeof(MEDIA_VFE_STATE) + sizeof(PIPE_CONTROL); } +template +void PreambleHelper::appendProgramPipelineSelect(void *cmd, bool isSpecialModeSelected, const HardwareInfo &hwInfo) {} + +template +bool PreambleHelper::isSystolicModeConfigurable(const HardwareInfo &hwInfo) { + return false; +} + +template +bool PreambleHelper::isSpecialPipelineSelectModeChanged(bool lastSpecialPipelineSelectMode, bool newSpecialPipelineSelectMode, + const HardwareInfo &hwInfo) { + return false; +} } // namespace NEO diff --git a/shared/source/helpers/preamble_xehp_and_later.inl b/shared/source/helpers/preamble_xehp_and_later.inl index bf15c2c336..10eda4d0e8 100644 --- a/shared/source/helpers/preamble_xehp_and_later.inl +++ b/shared/source/helpers/preamble_xehp_and_later.inl @@ -22,6 +22,28 @@ namespace NEO { +template <> +bool PreambleHelper::isSystolicModeConfigurable(const HardwareInfo &hwInfo); + +template <> +void PreambleHelper::appendProgramPipelineSelect(void *cmd, bool isSpecialModeSelected, const HardwareInfo &hwInfo) { + using PIPELINE_SELECT = typename Family::PIPELINE_SELECT; + auto command = static_cast(cmd); + auto mask = command->getMaskBits(); + + if (PreambleHelper::isSystolicModeConfigurable(hwInfo)) { + command->setSystolicModeEnable(isSpecialModeSelected); + mask |= pipelineSelectSystolicModeEnableMaskBits; + } + + if (DebugManager.flags.OverrideSystolicPipelineSelect.get() != -1) { + command->setSystolicModeEnable(DebugManager.flags.OverrideSystolicPipelineSelect.get()); + mask |= pipelineSelectSystolicModeEnableMaskBits; + } + + command->setMaskBits(mask); +} + template <> void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, const PipelineSelectArgs &pipelineSelectArgs, @@ -45,17 +67,13 @@ void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, auto pCmd = pCommandStream->getSpaceForCmd(); auto mask = pipelineSelectEnablePipelineSelectMaskBits | - pipelineSelectMediaSamplerDopClockGateMaskBits | - pipelineSelectSystolicModeEnableMaskBits; + pipelineSelectMediaSamplerDopClockGateMaskBits; cmd.setMaskBits(mask); cmd.setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); cmd.setMediaSamplerDopClockGateEnable(!pipelineSelectArgs.mediaSamplerRequired); - cmd.setSystolicModeEnable(pipelineSelectArgs.specialPipelineSelectMode); - if (DebugManager.flags.OverrideSystolicPipelineSelect.get() != -1) { - cmd.setSystolicModeEnable(DebugManager.flags.OverrideSystolicPipelineSelect.get()); - } + appendProgramPipelineSelect(&cmd, pipelineSelectArgs.specialPipelineSelectMode, hwInfo); *pCmd = cmd; diff --git a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp index 2df291c2c4..3997beb5b1 100644 --- a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp @@ -36,6 +36,17 @@ void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, c } } +template <> +bool PreambleHelper::isSpecialPipelineSelectModeChanged(bool lastSpecialPipelineSelectMode, bool newSpecialPipelineSelectMode, + const HardwareInfo &hwInfo) { + return lastSpecialPipelineSelectMode != newSpecialPipelineSelectMode; +} + +template <> +bool PreambleHelper::isSystolicModeConfigurable(const HardwareInfo &hwInfo) { + return true; +} + template struct PreambleHelper; } // namespace NEO diff --git a/shared/test/common/xe_hp_core/xehp/test_preamble_xehp.cpp b/shared/test/common/xe_hp_core/xehp/test_preamble_xehp.cpp index ae804f4aa2..c678338850 100644 --- a/shared/test/common/xe_hp_core/xehp/test_preamble_xehp.cpp +++ b/shared/test/common/xe_hp_core/xehp/test_preamble_xehp.cpp @@ -114,3 +114,15 @@ XEHPTEST_F(XeHPPreambleVfeState, WhenProgramVFEStateIsCalledThenCorrectCfeStateA EXPECT_EQ(1u, cfeCmd.getNumberOfWalkers()); EXPECT_EQ(expectedAddress, cfeCmd.getScratchSpaceBuffer()); } + +using XeHPPipelineSelect = ::testing::Test; + +XEHPTEST_F(XeHPPipelineSelect, WhenAppendProgramPipelineSelectThenCorrectValuesSet) { + using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; + PIPELINE_SELECT cmd = FamilyType::cmdInitPipelineSelect; + PreambleHelper::appendProgramPipelineSelect(&cmd, true, *defaultHwInfo); + EXPECT_TRUE(cmd.getSystolicModeEnable()); + PreambleHelper::appendProgramPipelineSelect(&cmd, false, *defaultHwInfo); + EXPECT_FALSE(cmd.getSystolicModeEnable()); + EXPECT_EQ(pipelineSelectSystolicModeEnableMaskBits, cmd.getMaskBits()); +} diff --git a/shared/test/unit_test/preamble/preamble_tests.cpp b/shared/test/unit_test/preamble/preamble_tests.cpp index 6e6b89b32e..b849b5565c 100644 --- a/shared/test/unit_test/preamble/preamble_tests.cpp +++ b/shared/test/unit_test/preamble/preamble_tests.cpp @@ -259,6 +259,19 @@ HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, WhenGetScratchSpaceAddressOffsetForVfe offset + reinterpret_cast(preambleStream.getCpuBase())); } +HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, WhenIsSystolicModeConfigurableThenReturnFalse) { + auto result = PreambleHelper::isSystolicModeConfigurable(*defaultHwInfo); + EXPECT_FALSE(result); +} + +HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, WhenAppendProgramPipelineSelectThenNothingChanged) { + using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; + PIPELINE_SELECT cmd = FamilyType::cmdInitPipelineSelect; + cmd.setMaskBits(pipelineSelectEnablePipelineSelectMaskBits); + PreambleHelper::appendProgramPipelineSelect(&cmd, true, *defaultHwInfo); + EXPECT_EQ(pipelineSelectEnablePipelineSelectMaskBits, cmd.getMaskBits()); +} + HWTEST_F(PreambleTest, givenSetForceSemaphoreDelayBetweenWaitsWhenProgramSemaphoreDelayThenSemaWaitPollRegisterIsProgrammed) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; DebugManagerStateRestore debugManagerStateRestore;