From d1bc7199de140e3c064498ab4299c00921759290 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Wed, 25 Mar 2020 10:04:42 +0100 Subject: [PATCH] Switch to 3D pipeline to program selected commands - part 2 Resolves: NEO-4447 Change-Id: I1dd6a9694cdf3be19aadec1cd139c466baecbcd7 Signed-off-by: Lukasz Jobczyk --- Jenkinsfile | 2 +- .../core/source/cmdqueue/cmdqueue_hw_base.inl | 7 ++- .../command_container/command_encoder.h | 6 +++ .../command_encoder_base.inl | 17 ++++++ .../gen12lp/command_encoder_gen12lp.cpp | 21 ++++++++ .../encoders/test_encode_dispatch_kernel.cpp | 9 +++- .../unit_test/encoders/test_encode_states.cpp | 7 ++- .../gen12lp/test_command_encoder_gen12lp.cpp | 52 +++++++++++++++++++ 8 files changed, 115 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0c8ebfa847..e765ad311b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,5 @@ #!groovy dependenciesRevision='3232e5d67b5c3dd2323f13bede3ab1558b5aa4b9-1401' strategy='EQUAL' -allowedCD=222 +allowedCD=221 allowedF=11 diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl index e1e416ac6b..8dd738631a 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl @@ -7,6 +7,8 @@ #pragma once +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_container/command_encoder_base.inl" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" @@ -40,6 +42,7 @@ void CommandQueueHw::programGeneralStateBaseAddress(uint64_t gsba pcCmd->setCommandStreamerStallEnable(true); auto gmmHelper = device->getNEODevice()->getGmmHelper(); + NEO::EncodeWA::encodeAdditionalPipelineSelect(*device->getNEODevice(), commandStream, true); NEO::StateBaseAddressHelper::programStateBaseAddress(commandStream, nullptr, @@ -54,6 +57,8 @@ void CommandQueueHw::programGeneralStateBaseAddress(uint64_t gsba false); gsbaInit = true; + + NEO::EncodeWA::encodeAdditionalPipelineSelect(*device->getNEODevice(), commandStream, false); } template @@ -62,7 +67,7 @@ size_t CommandQueueHw::estimateStateBaseAddressCmdSize() { using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - constexpr size_t size = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL); + size_t size = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL) + NEO::EncodeWA::getAdditionalPipelineSelectSize(*device->getNEODevice()); return size; } diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index ee064fd988..2fc27f7c90 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -173,6 +173,12 @@ struct EncodeComputeMode { static void adjustPipelineSelect(CommandContainer &container, uint32_t numGrfRequired); }; +template +struct EncodeWA { + static void encodeAdditionalPipelineSelect(Device &device, LinearStream &stream, bool is3DPipeline); + static size_t getAdditionalPipelineSelectSize(Device &device); +}; + template struct EncodeSempahore { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; diff --git a/shared/source/command_container/command_encoder_base.inl b/shared/source/command_container/command_encoder_base.inl index a2e8db4914..99f4bc3f11 100644 --- a/shared/source/command_container/command_encoder_base.inl +++ b/shared/source/command_container/command_encoder_base.inl @@ -53,7 +53,10 @@ void EncodeDispatchKernel::encode(CommandContainer &container, idd.setKernelStartPointer(offset); idd.setKernelStartPointerHigh(0u); } + + EncodeWA::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), true); EncodeStates::adjustStateComputeMode(*container.getCommandStream(), container.lastSentNumGrfRequired, nullptr, false, false); + EncodeWA::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), false); auto threadsPerThreadGroup = dispatchInterface->getThreadsPerThreadGroupCount(); idd.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup); @@ -224,6 +227,8 @@ void EncodeMediaInterfaceDescriptorLoad::encode(CommandContainer &contai template void EncodeStateBaseAddress::encode(CommandContainer &container) { + EncodeWA::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), true); + auto gmmHelper = container.getDevice()->getGmmHelper(); StateBaseAddressHelper::programStateBaseAddress( @@ -238,6 +243,8 @@ void EncodeStateBaseAddress::encode(CommandContainer &container) { false, gmmHelper, false); + + EncodeWA::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), false); } template @@ -259,6 +266,7 @@ size_t EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(Device totalSize += sizeof(MEDIA_STATE_FLUSH); totalSize += issueMediaInterfaceDescriptorLoad; totalSize += EncodeStates::getAdjustStateComputeModeSize(); + totalSize += EncodeWA::getAdditionalPipelineSelectSize(*device); totalSize += EncodeIndirectParams::getCmdsSizeForIndirectParams(); totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupCountIndirect(); totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupSizeIndirect(); @@ -278,4 +286,13 @@ template size_t EncodeMiFlushDW::getMiFlushDwWaSize() { return 0; } + +template +inline void EncodeWA::encodeAdditionalPipelineSelect(Device &device, LinearStream &stream, bool is3DPipeline) {} + +template +inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device) { + return 0; +} + } // namespace NEO diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 3ea23237f7..64e4f1535a 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -11,10 +11,21 @@ #include "shared/source/command_container/encode_compute_mode_tgllp_plus.inl" #include "shared/source/gen12lp/hw_cmds_base.h" #include "shared/source/gen12lp/reg_configs.h" +#include "shared/source/helpers/preamble.h" namespace NEO { using Family = TGLLPFamily; + +template <> +inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device) { + size_t size = 0; + if (device.getDefaultEngine().commandStreamReceiver->isRcs()) { + size += 2 * PreambleHelper::getCmdSizeForPipelineSelect(device.getHardwareInfo()); + } + return size; +} + template <> size_t EncodeStates::getAdjustStateComputeModeSize() { return sizeof(typename Family::STATE_COMPUTE_MODE); @@ -27,6 +38,15 @@ void EncodeComputeMode::adjustComputeMode(LinearStream &csr, uint32_t nu *reinterpret_cast(buffer) = *stateComputeMode; } +template <> +inline void EncodeWA::encodeAdditionalPipelineSelect(Device &device, LinearStream &stream, bool is3DPipeline) { + if (device.getDefaultEngine().commandStreamReceiver->isRcs()) { + PipelineSelectArgs args; + args.is3DPipelineRequired = is3DPipeline; + PreambleHelper::programPipelineSelect(&stream, args, device.getHardwareInfo()); + } +} + template struct EncodeDispatchKernel; template struct EncodeStates; template struct EncodeMath; @@ -42,4 +62,5 @@ template struct EncodeAtomic; template struct EncodeSempahore; template struct EncodeBatchBufferStartOrEnd; template struct EncodeMiFlushDW; +template struct EncodeWA; } // namespace NEO diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index e7e319db42..41feab2173 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -260,8 +260,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); - auto itorPC = find(commands.begin(), commands.end()); - ASSERT_EQ(itorPC, commands.end()); + if (HardwareCommandsHelper::isPipeControlPriorToPipelineSelectWArequired(pDevice->getHardwareInfo())) { + auto itorPC = findAll(commands.begin(), commands.end()); + EXPECT_EQ(2u, itorPC.size()); + } else { + auto itorPC = find(commands.begin(), commands.end()); + ASSERT_EQ(itorPC, commands.end()); + } } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotChangedWhenDispatchKernelThenHeapsAreCleanAndFlushAdded) { diff --git a/shared/test/unit_test/encoders/test_encode_states.cpp b/shared/test/unit_test/encoders/test_encode_states.cpp index 4d784bdbe7..44f5882a02 100644 --- a/shared/test/unit_test/encoders/test_encode_states.cpp +++ b/shared/test/unit_test/encoders/test_encode_states.cpp @@ -107,7 +107,6 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenGpuCoherency HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithDirtyHeapsWhenSetStateBaseAddressCalledThenStateBaseAddressAreNotSet) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; cmdContainer->dirtyHeaps = 0; - auto baseAddres = cmdContainer->getCommandStream()->getCpuBase(); cmdContainer->setHeapDirty(NEO::HeapType::DYNAMIC_STATE); cmdContainer->setHeapDirty(NEO::HeapType::INDIRECT_OBJECT); @@ -119,7 +118,11 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithDirtyHeapsWhenSetStat auto ioh = cmdContainer->getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT); auto ssh = cmdContainer->getIndirectHeap(NEO::HeapType::SURFACE_STATE); - auto pCmd = static_cast(baseAddres); + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); + + auto itorCmd = find(commands.begin(), commands.end()); + auto pCmd = genCmdCast(*itorCmd); EXPECT_EQ(dsh->getHeapGpuBase(), pCmd->getDynamicStateBaseAddress()); EXPECT_EQ(ioh->getHeapGpuBase(), pCmd->getIndirectObjectBaseAddress()); diff --git a/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp b/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp index 1a5f20e841..4935ed8dc9 100644 --- a/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp @@ -7,6 +7,8 @@ #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_container/command_encoder.h" +#include "shared/source/helpers/preamble.h" +#include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" @@ -62,3 +64,53 @@ GEN12LPTEST_F(CommandEncoderTest, givenCommandContainerWhenEncodeL3StateThenSetC EXPECT_EQ(cmd->getRegisterOffset(), 0xB134u); EXPECT_EQ(cmd->getDataDword(), 0xD0000020u); } + +struct MockOsContext : public OsContext { + using OsContext::engineType; +}; + +GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEncodeSBAThenAdditionalPipelineSelectWAIsAppliedOnlyToRcs) { + using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; + using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; + + CommandContainer cmdContainer; + + bool ret = cmdContainer.initialize(pDevice); + ASSERT_TRUE(ret); + + { + EncodeStateBaseAddress::encode(cmdContainer); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed()); + auto itorLRI = find(commands.begin(), commands.end()); + EXPECT_NE(itorLRI, commands.end()); + } + + cmdContainer.reset(); + + { + static_cast(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS; + + EncodeStateBaseAddress::encode(cmdContainer); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed()); + auto itorLRI = find(commands.begin(), commands.end()); + EXPECT_EQ(itorLRI, commands.end()); + } +} + +GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEstimateCommandBufferSizeThenRcsHasAdditionalPipelineSelectWASize) { + using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; + using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; + + auto sizeWA = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(pDevice); + static_cast(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS; + auto size = EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(pDevice); + + auto expectedDiff = 2 * PreambleHelper::getCmdSizeForPipelineSelect(pDevice->getHardwareInfo()); + auto diff = sizeWA - size; + + EXPECT_EQ(expectedDiff, diff); +}