From d94e2bf14930d3c6e0ada34fcd0e6a6f21a9f981 Mon Sep 17 00:00:00 2001 From: Pawel Wilma Date: Thu, 26 Nov 2020 19:02:18 +0000 Subject: [PATCH] Refactoring of per-DSS backed buffer programming Signed-off-by: Pawel Wilma --- .../command_stream_receiver_hw_1_tests.cpp | 19 ------------------ .../command_stream_receiver_hw_2_tests.cpp | 2 +- .../libult/ult_command_stream_receiver.h | 2 ++ .../command_stream/command_stream_receiver.h | 1 + .../command_stream_receiver_hw.h | 2 ++ .../command_stream_receiver_hw_base.inl | 20 +++++++++++-------- shared/source/helpers/preamble.h | 4 +--- shared/source/helpers/preamble_base.inl | 14 +------------ .../unit_test/gen11/test_preamble_gen11.cpp | 2 +- .../gen12lp/test_preamble_gen12lp.cpp | 2 +- .../unit_test/gen9/skl/test_preamble_skl.cpp | 2 +- .../unit_test/preamble/preamble_tests.cpp | 6 +++--- 12 files changed, 26 insertions(+), 50 deletions(-) diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index 3484bb951f..b179cf30cd 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -140,25 +140,6 @@ HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentWhenEstimatingPreambleCm EXPECT_EQ(expectedDifference, actualDifference); } -HWTEST_F(UltCommandStreamReceiverTest, givenPerDssBackBufferProgrammingEnabledWhenEstimatingPreambleCmdSizeThenResultIncludesPerDssBackBufferProgramingCommandsSize) { - DebugManagerStateRestore restore; - DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true); - - auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; - - commandStreamReceiver.isPreambleSent = false; - auto preambleNotSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); - - commandStreamReceiver.isPreambleSent = true; - auto preambleSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); - - auto actualDifference = preambleNotSent - preambleSent; - auto expectedDifference = PreambleHelper::getThreadArbitrationCommandsSize() + PreambleHelper::getAdditionalCommandsSize(*pDevice) + PreambleHelper::getPerDssBackedBufferCommandsSize(pDevice->getHardwareInfo()); - - EXPECT_EQ(expectedDifference, actualDifference); -} - HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenMediaVfeStateDirtyEstimatingPreambleCmdSizeThenResultDependsVfeStateProgrammingCmdSize) { typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index cace157b95..7e70cd4f3b 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -1550,4 +1550,4 @@ TEST(BcsConstantsTests, givenBlitConstantsThenTheyHaveDesiredValues) { EXPECT_EQ(BlitterConstants::maxBlitHeight, 0x4000u); EXPECT_EQ(BlitterConstants::maxBlitSetWidth, 0x1FF80u); EXPECT_EQ(BlitterConstants::maxBlitSetHeight, 0x1FFC0u); -} +} \ No newline at end of file diff --git a/opencl/test/unit_test/libult/ult_command_stream_receiver.h b/opencl/test/unit_test/libult/ult_command_stream_receiver.h index 6381ddf69b..145f78c29e 100644 --- a/opencl/test/unit_test/libult/ult_command_stream_receiver.h +++ b/opencl/test/unit_test/libult/ult_command_stream_receiver.h @@ -41,8 +41,10 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::iohState; using BaseClass::isBlitterDirectSubmissionEnabled; using BaseClass::isDirectSubmissionEnabled; + using BaseClass::isPerDssBackedBufferSent; using BaseClass::perDssBackedBuffer; using BaseClass::programEnginePrologue; + using BaseClass::programPerDssBackedBuffer; using BaseClass::programPreamble; using BaseClass::programStateSip; using BaseClass::requiresInstructionCacheFlush; diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index ffc0c82560..ff546ee954 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -301,6 +301,7 @@ class CommandStreamReceiver { bool isPreambleSent = false; bool isStateSipSent = false; bool isEnginePrologueSent = false; + bool isPerDssBackedBufferSent = false; bool GSBAFor32BitProgrammed = false; bool bindingTableBaseAddressRequired = false; bool mediaVfeStateDirty = true; diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 2cf33798ef..72bda0375a 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -67,6 +67,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { size_t getCmdSizeForComputeMode(); size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const; size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const; + size_t getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo); bool isComputeModeNeeded() const; bool isPipelineSelectAlreadyProgrammed() const; @@ -118,6 +119,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { void programEpilogue(LinearStream &csr, void **batchBufferEndLocation, DispatchFlags &dispatchFlags); void programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags); void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags); + void programPerDssBackedBuffer(LinearStream &scr, Device &device, DispatchFlags &dispatchFlags); void programStateSip(LinearStream &cmdStream, Device &device); void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads); void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 7b9b14a273..383ffe5cb7 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -140,12 +140,6 @@ inline size_t CommandStreamReceiverHw::getRequiredCmdSizeForPreamble( if (!this->isPreambleSent || this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) { size += PreambleHelper::getThreadArbitrationCommandsSize(); } - - if (DebugManager.flags.ForcePerDssBackedBufferProgramming.get()) { - if (!this->isPreambleSent) { - size += PreambleHelper::getPerDssBackedBufferCommandsSize(device.getHardwareInfo()); - } - } if (!this->isPreambleSent) { if (DebugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) { size += PreambleHelper::getSemaphoreDelayCommandSize(); @@ -339,6 +333,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( programL3(commandStreamCSR, dispatchFlags, newL3Config); programPreamble(commandStreamCSR, device, dispatchFlags, newL3Config); programMediaSampler(commandStreamCSR, dispatchFlags); + programPerDssBackedBuffer(commandStreamCSR, device, dispatchFlags); if (this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) { PreambleHelper::programThreadArbitration(&commandStreamCSR, this->requiredThreadArbitrationPolicy); @@ -761,6 +756,7 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat size += getCmdSizeForMediaSampler(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired); size += getCmdSizeForPipelineSelect(); size += getCmdSizeForPreemption(dispatchFlags); + size += getCmdSizeForPerDssBackedBuffer(device.getHardwareInfo()); size += getCmdSizeForEpilogue(dispatchFlags); size += getCmdsSizeForHardwareContext(); @@ -868,8 +864,7 @@ inline void CommandStreamReceiverHw::programStateSip(LinearStream &cm template inline void CommandStreamReceiverHw::programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config) { if (!this->isPreambleSent) { - GraphicsAllocation *perDssBackedBufferToUse = dispatchFlags.usePerDssBackedBuffer ? this->perDssBackedBuffer : nullptr; - PreambleHelper::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation, perDssBackedBufferToUse); + PreambleHelper::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation); this->isPreambleSent = true; this->lastSentL3Config = newL3Config; this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy; @@ -897,6 +892,10 @@ template void CommandStreamReceiverHw::programMediaSampler(LinearStream &commandStream, DispatchFlags &dispatchFlags) { } +template +void CommandStreamReceiverHw::programPerDssBackedBuffer(LinearStream &commandStream, Device &device, DispatchFlags &dispatchFlags) { +} + template size_t CommandStreamReceiverHw::getCmdSizeForMediaSampler(bool mediaSamplerRequired) const { return 0; @@ -1188,4 +1187,9 @@ inline bool CommandStreamReceiverHw::checkDirectSubmissionSupportsEng return supported; } +template +size_t CommandStreamReceiverHw::getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo) { + return 0; +} + } // namespace NEO diff --git a/shared/source/helpers/preamble.h b/shared/source/helpers/preamble.h index b25f7ba4a7..65c04171fa 100644 --- a/shared/source/helpers/preamble.h +++ b/shared/source/helpers/preamble.h @@ -49,9 +49,8 @@ struct PreambleHelper { KernelExecutionType kernelExecutionType); static void programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo); static void programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config, - uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer); + uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr); static void programKernelDebugging(LinearStream *pCommandStream); - static void programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset); static void programSemaphoreDelay(LinearStream *pCommandStream); static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM); static bool isL3Configurable(const HardwareInfo &hwInfo); @@ -61,7 +60,6 @@ struct PreambleHelper { static size_t getKernelDebuggingCommandsSize(bool debuggingActive); static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo); static uint32_t getUrbEntryAllocationSize(); - static size_t getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo); static size_t getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo); static size_t getSemaphoreDelayCommandSize(); static uint32_t getScratchSizeValueToProgramMediaVfeState(uint32_t scratchSize); diff --git a/shared/source/helpers/preamble_base.inl b/shared/source/helpers/preamble_base.inl index 0d4f5f1383..d4e40ce86f 100644 --- a/shared/source/helpers/preamble_base.inl +++ b/shared/source/helpers/preamble_base.inl @@ -33,15 +33,6 @@ template void PreambleHelper::programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo) { } -template -void PreambleHelper::programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset) { -} - -template -size_t PreambleHelper::getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo) { - return 0; -} - template void PreambleHelper::programSemaphoreDelay(LinearStream *pCommandStream) { if (DebugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) { @@ -79,7 +70,7 @@ size_t PreambleHelper::getCmdSizeForPipelineSelect(const HardwareInfo template void PreambleHelper::programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config, - uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer) { + uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr) { programL3(pCommandStream, l3Config); programThreadArbitration(pCommandStream, requiredThreadArbitrationPolicy); programPreemption(pCommandStream, device, preemptionCsr); @@ -87,9 +78,6 @@ void PreambleHelper::programPreamble(LinearStream *pCommandStream, De programKernelDebugging(pCommandStream); } programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo()); - if (perDssBackedBuffer != nullptr) { - programPerDssBackedBuffer(pCommandStream, device.getHardwareInfo(), perDssBackedBuffer); - } programSemaphoreDelay(pCommandStream); } diff --git a/shared/test/unit_test/gen11/test_preamble_gen11.cpp b/shared/test/unit_test/gen11/test_preamble_gen11.cpp index 40292ef4b8..c7d907cb42 100644 --- a/shared/test/unit_test/gen11/test_preamble_gen11.cpp +++ b/shared/test/unit_test/gen11/test_preamble_gen11.cpp @@ -124,7 +124,7 @@ GEN11TEST_F(ThreadArbitrationGen11, givenPreambleWhenItIsProgrammedThenThreadArb MockDevice mockDevice; PreambleHelper::programPreamble(&linearStream, mockDevice, l3Config, ThreadArbitrationPolicy::RoundRobin, - nullptr, nullptr); + nullptr); parseCommands(cs); diff --git a/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp b/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp index 9961973da8..eb364b24b5 100644 --- a/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp @@ -24,7 +24,7 @@ HWTEST2_F(TglLpSlm, givenTglLpWhenPreambleIsBeingProgrammedThenThreadArbitration MockDevice mockDevice; PreambleHelper::programPreamble(&linearStream, mockDevice, l3Config, ThreadArbitrationPolicy::RoundRobin, - nullptr, nullptr); + nullptr); parseCommands(cs); diff --git a/shared/test/unit_test/gen9/skl/test_preamble_skl.cpp b/shared/test/unit_test/gen9/skl/test_preamble_skl.cpp index 589c1df3b5..aafc97fde1 100644 --- a/shared/test/unit_test/gen9/skl/test_preamble_skl.cpp +++ b/shared/test/unit_test/gen9/skl/test_preamble_skl.cpp @@ -80,7 +80,7 @@ SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitratio MockDevice mockDevice; PreambleHelper::programPreamble(&linearStream, mockDevice, l3Config, ThreadArbitrationPolicy::RoundRobin, - nullptr, nullptr); + nullptr); parseCommands(cs); diff --git a/shared/test/unit_test/preamble/preamble_tests.cpp b/shared/test/unit_test/preamble/preamble_tests.cpp index ba199b0f3b..4deb42a930 100644 --- a/shared/test/unit_test/preamble/preamble_tests.cpp +++ b/shared/test/unit_test/preamble/preamble_tests.cpp @@ -74,7 +74,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, givenMidThreadPreemptionWhenPreambleIs MockGraphicsAllocation csrSurface(reinterpret_cast(minCsrAlignment), 1024); PreambleHelper::programPreamble(&preambleStream, *mockDevice, 0U, - ThreadArbitrationPolicy::RoundRobin, &csrSurface, nullptr); + ThreadArbitrationPolicy::RoundRobin, &csrSurface); PreemptionHelper::programStateSip(preemptionStream, *mockDevice); @@ -144,7 +144,7 @@ HWTEST_F(PreambleTest, givenKernelDebuggingActiveWhenPreambleIsProgrammedThenPro LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size()); PreambleHelper::programPreamble(&preambleStream, *mockDevice, 0U, - ThreadArbitrationPolicy::RoundRobin, nullptr, nullptr); + ThreadArbitrationPolicy::RoundRobin, nullptr); HardwareParse hwParser; hwParser.parseCommands(preambleStream); @@ -158,7 +158,7 @@ HWTEST_F(PreambleTest, givenKernelDebuggingActiveWhenPreambleIsProgrammedThenPro StackVec preambleBuffer2(8192); preambleStream.replaceBuffer(&*preambleBuffer2.begin(), preambleBuffer2.size()); PreambleHelper::programPreamble(&preambleStream, *mockDevice, 0U, - ThreadArbitrationPolicy::RoundRobin, preemptionAllocation, nullptr); + ThreadArbitrationPolicy::RoundRobin, preemptionAllocation); HardwareParse hwParser2; hwParser2.parseCommands(preambleStream); cmdList = hwParser2.getCommandsList();