From ae4425351fcdac5d027463e1e554a81c081c39d6 Mon Sep 17 00:00:00 2001 From: Daria Hinz Date: Tue, 10 Sep 2019 16:13:11 +0200 Subject: [PATCH] Preamble Helper Refactor Change-Id: Iacd05dcb6d9047fc2814895fa87d1cd9be6df446 Signed-off-by: Daria Hinz --- core/helpers/CMakeLists.txt | 1 + core/helpers/pipeline_select_args.h | 15 +++++++++++++++ runtime/command_queue/enqueue_common.h | 4 ++-- .../command_stream_receiver_hw.h | 2 +- .../command_stream_receiver_hw_base.inl | 18 ++++++------------ .../command_stream_receiver_hw_bdw_plus.inl | 6 +++--- runtime/command_stream/csr_definitions.h | 4 ++-- .../gen11/command_stream_receiver_hw_gen11.cpp | 4 ++-- runtime/gen11/preamble_gen11.cpp | 6 +++--- runtime/gen8/preamble_gen8.cpp | 2 +- runtime/gen9/preamble_gen9.cpp | 4 ++-- runtime/helpers/preamble.h | 6 +++++- runtime/helpers/preamble_base.inl | 12 ++++++++++++ runtime/helpers/task_information.cpp | 5 ++--- .../command_queue/enqueue_kernel_1_tests.cpp | 4 ++-- .../program_media_sampler_tests_gen11.cpp | 4 ++-- 16 files changed, 61 insertions(+), 36 deletions(-) create mode 100644 core/helpers/pipeline_select_args.h diff --git a/core/helpers/CMakeLists.txt b/core/helpers/CMakeLists.txt index 893651ba8f..21461bfff1 100644 --- a/core/helpers/CMakeLists.txt +++ b/core/helpers/CMakeLists.txt @@ -12,6 +12,7 @@ set(NEO_CORE_HELPERS ${CMAKE_CURRENT_SOURCE_DIR}/debug_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h ${CMAKE_CURRENT_SOURCE_DIR}/non_copyable_or_moveable.h + ${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_args.h ${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h ${CMAKE_CURRENT_SOURCE_DIR}/register_offsets.h ${CMAKE_CURRENT_SOURCE_DIR}/string.h diff --git a/core/helpers/pipeline_select_args.h b/core/helpers/pipeline_select_args.h new file mode 100644 index 0000000000..c06b4f81b4 --- /dev/null +++ b/core/helpers/pipeline_select_args.h @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2017-2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include + +namespace NEO { +struct PipelineSelectArgs { + bool specialPipelineSelectMode = false; + bool mediaSamplerRequired = false; +}; +} // namespace NEO diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 628e0f2c4c..9ed76ba6fb 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -676,7 +676,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( dispatchFlags.useSLM = multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel(); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.GSBA32BitRequired = commandType == CL_COMMAND_NDRANGE_KERNEL; - dispatchFlags.mediaSamplerRequired = mediaSamplerRequired; + dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; dispatchFlags.requiresCoherency = requiresCoherency; dispatchFlags.lowPriority = (QueuePriority::LOW == priority); dispatchFlags.throttle = getThrottle(); @@ -689,7 +689,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver()); } dispatchFlags.numGrfRequired = numGrfRequired; - dispatchFlags.specialPipelineSelectMode = specialPipelineSelectMode; + dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode; dispatchFlags.multiEngineQueue = this->multiEngineQueue; DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady); diff --git a/runtime/command_stream/command_stream_receiver_hw.h b/runtime/command_stream/command_stream_receiver_hw.h index 6992280d94..476802c771 100644 --- a/runtime/command_stream/command_stream_receiver_hw.h +++ b/runtime/command_stream/command_stream_receiver_hw.h @@ -78,7 +78,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags); void programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config); void programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config); - void programPipelineSelect(LinearStream &csr, DispatchFlags &dispatchFlags); + void programPipelineSelect(LinearStream &csr, PipelineSelectArgs &pipelineSelectArgs); void programEpilogue(LinearStream &csr, void **batchBufferEndLocation, DispatchFlags &dispatchFlags); void programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags); void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags); diff --git a/runtime/command_stream/command_stream_receiver_hw_base.inl b/runtime/command_stream/command_stream_receiver_hw_base.inl index 2efbaa1617..998a608cf0 100644 --- a/runtime/command_stream/command_stream_receiver_hw_base.inl +++ b/runtime/command_stream/command_stream_receiver_hw_base.inl @@ -215,9 +215,9 @@ CompletionStamp CommandStreamReceiverHw::flushTask( csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config; csrSizeRequestFlags.coherencyRequestChanged = this->lastSentCoherencyRequest != static_cast(dispatchFlags.requiresCoherency); csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode; - csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast(dispatchFlags.mediaSamplerRequired); + csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired); csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired; - csrSizeRequestFlags.specialPipelineSelectModeChanged = this->lastSpecialPipelineSelectMode != dispatchFlags.specialPipelineSelectMode; + csrSizeRequestFlags.specialPipelineSelectModeChanged = this->lastSpecialPipelineSelectMode != dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode; auto force32BitAllocations = getMemoryManager()->peekForce32BitAllocations(); bool stateBaseAddressDirty = false; @@ -264,7 +264,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( programPreemption(commandStreamCSR, dispatchFlags); programComputeMode(commandStreamCSR, dispatchFlags); programL3(commandStreamCSR, dispatchFlags, newL3Config); - programPipelineSelect(commandStreamCSR, dispatchFlags); + programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs); programPreamble(commandStreamCSR, device, dispatchFlags, newL3Config); programMediaSampler(commandStreamCSR, dispatchFlags); @@ -607,7 +607,7 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat size += getCmdSizeForL3Config(); size += getCmdSizeForComputeMode(); - size += getCmdSizeForMediaSampler(dispatchFlags.mediaSamplerRequired); + size += getCmdSizeForMediaSampler(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired); size += getCmdSizeForPipelineSelect(); size += getCmdSizeForPreemption(dispatchFlags); size += getCmdSizeForEpilogue(dispatchFlags); @@ -635,18 +635,12 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat template inline size_t CommandStreamReceiverHw::getCmdSizeForPipelineSelect() const { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - using PIPELINE_SELECT = typename GfxFamily::PIPELINE_SELECT; - size_t size = 0; + size_t size = 0; if (csrSizeRequestFlags.mediaSamplerConfigChanged || csrSizeRequestFlags.specialPipelineSelectModeChanged || !isPreambleSent) { - - size += sizeof(PIPELINE_SELECT); - if (HardwareCommandsHelper::isPipeControlPriorToPipelineSelectWArequired(peekHwInfo())) { - size += sizeof(PIPE_CONTROL); - } + size += PreambleHelper::getCmdSizeForPipelineSelect(peekHwInfo()); } return size; } diff --git a/runtime/command_stream/command_stream_receiver_hw_bdw_plus.inl b/runtime/command_stream/command_stream_receiver_hw_bdw_plus.inl index 7300e53274..1fa9dcd14b 100644 --- a/runtime/command_stream/command_stream_receiver_hw_bdw_plus.inl +++ b/runtime/command_stream/command_stream_receiver_hw_bdw_plus.inl @@ -46,10 +46,10 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForL3Config() const } template -void CommandStreamReceiverHw::programPipelineSelect(LinearStream &commandStream, DispatchFlags &dispatchFlags) { +void CommandStreamReceiverHw::programPipelineSelect(LinearStream &commandStream, PipelineSelectArgs &pipelineSelectArgs) { if (csrSizeRequestFlags.mediaSamplerConfigChanged || !isPreambleSent) { - PreambleHelper::programPipelineSelect(&commandStream, dispatchFlags, peekHwInfo()); - this->lastMediaSamplerConfig = dispatchFlags.mediaSamplerRequired; + PreambleHelper::programPipelineSelect(&commandStream, pipelineSelectArgs, peekHwInfo()); + this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired; } } diff --git a/runtime/command_stream/csr_definitions.h b/runtime/command_stream/csr_definitions.h index c06096295a..5ef3d38950 100644 --- a/runtime/command_stream/csr_definitions.h +++ b/runtime/command_stream/csr_definitions.h @@ -6,6 +6,7 @@ */ #pragma once +#include "core/helpers/pipeline_select_args.h" #include "core/memory_manager/memory_constants.h" #include "runtime/helpers/csr_deps.h" #include "runtime/helpers/hw_info.h" @@ -38,6 +39,7 @@ constexpr uint32_t l3AndL1On = 2u; struct DispatchFlags { CsrDependencies csrDependencies; + PipelineSelectArgs pipelineSelectArgs; FlushStampTrackingObj *flushStampReference = nullptr; QueueThrottle throttle = QueueThrottle::MEDIUM; PreemptionMode preemptionMode = PreemptionMode::Disabled; @@ -48,12 +50,10 @@ struct DispatchFlags { bool useSLM = false; bool guardCommandBufferWithPipeControl = false; bool GSBA32BitRequired = false; - bool mediaSamplerRequired = false; bool requiresCoherency = false; bool lowPriority = false; bool implicitFlush = false; bool outOfOrderExecutionAllowed = false; - bool specialPipelineSelectMode = false; bool multiEngineQueue = false; bool epilogueRequired = false; }; diff --git a/runtime/gen11/command_stream_receiver_hw_gen11.cpp b/runtime/gen11/command_stream_receiver_hw_gen11.cpp index a65504f0dd..b537828c74 100644 --- a/runtime/gen11/command_stream_receiver_hw_gen11.cpp +++ b/runtime/gen11/command_stream_receiver_hw_gen11.cpp @@ -37,7 +37,7 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, using PWR_CLK_STATE_REGISTER = Family::PWR_CLK_STATE_REGISTER; if (peekHwInfo().platform.eProductFamily == IGFX_ICELAKE_LP) { - if (dispatchFlags.mediaSamplerRequired) { + if (dispatchFlags.pipelineSelectArgs.mediaSamplerRequired) { if (!lastVmeSubslicesConfig) { auto pc = addPipeControlCmd(stream); pc->setDcFlushEnable(true); @@ -108,7 +108,7 @@ template <> bool CommandStreamReceiverHw::detectInitProgrammingFlagsRequired(const DispatchFlags &dispatchFlags) const { bool flag = DebugManager.flags.ForceCsrReprogramming.get(); if (peekHwInfo().platform.eProductFamily == IGFX_ICELAKE_LP) { - if (!dispatchFlags.mediaSamplerRequired) { + if (!dispatchFlags.pipelineSelectArgs.mediaSamplerRequired) { if (lastVmeSubslicesConfig) { flag = true; } diff --git a/runtime/gen11/preamble_gen11.cpp b/runtime/gen11/preamble_gen11.cpp index eae2dc5fdd..9f64717e6b 100644 --- a/runtime/gen11/preamble_gen11.cpp +++ b/runtime/gen11/preamble_gen11.cpp @@ -29,7 +29,7 @@ uint32_t PreambleHelper::getL3Config(const HardwareInfo &hwInfo, bool template <> void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, - const DispatchFlags &dispatchFlags, + const PipelineSelectArgs &pipelineSelectArgs, const HardwareInfo &hwInfo) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; @@ -43,8 +43,8 @@ void PreambleHelper::programPipelineSelect(LinearStream *pCommandStre pCmd->setMaskBits(mask); pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); - pCmd->setMediaSamplerDopClockGateEnable(!dispatchFlags.mediaSamplerRequired); - pCmd->setMediaSamplerPowerClockGateDisable(dispatchFlags.mediaSamplerRequired); + pCmd->setMediaSamplerDopClockGateEnable(!pipelineSelectArgs.mediaSamplerRequired); + pCmd->setMediaSamplerPowerClockGateDisable(pipelineSelectArgs.mediaSamplerRequired); } template <> diff --git a/runtime/gen8/preamble_gen8.cpp b/runtime/gen8/preamble_gen8.cpp index ed89e51e9f..180a17e2f7 100644 --- a/runtime/gen8/preamble_gen8.cpp +++ b/runtime/gen8/preamble_gen8.cpp @@ -38,7 +38,7 @@ bool PreambleHelper::isL3Configurable(const HardwareInfo &hwInfo) { template <> void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, - const DispatchFlags &dispatchFlags, + const PipelineSelectArgs &pipelineSelectArgs, const HardwareInfo &hwInfo) { typedef typename BDWFamily::PIPELINE_SELECT PIPELINE_SELECT; diff --git a/runtime/gen9/preamble_gen9.cpp b/runtime/gen9/preamble_gen9.cpp index 4cfd423181..1bd8e0ee82 100644 --- a/runtime/gen9/preamble_gen9.cpp +++ b/runtime/gen9/preamble_gen9.cpp @@ -34,7 +34,7 @@ bool PreambleHelper::isL3Configurable(const HardwareInfo &hwInfo) { template <> void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, - const DispatchFlags &dispatchFlags, + const PipelineSelectArgs &pipelineSelectArgs, const HardwareInfo &hwInfo) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; @@ -45,7 +45,7 @@ void PreambleHelper::programPipelineSelect(LinearStream *pCommandStre auto mask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; pCmd->setMaskBits(mask); pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); - pCmd->setMediaSamplerDopClockGateEnable(!dispatchFlags.mediaSamplerRequired); + pCmd->setMediaSamplerDopClockGateEnable(!pipelineSelectArgs.mediaSamplerRequired); } template <> diff --git a/runtime/helpers/preamble.h b/runtime/helpers/preamble.h index 5ef1dbefc2..2c8b200de9 100644 --- a/runtime/helpers/preamble.h +++ b/runtime/helpers/preamble.h @@ -20,6 +20,7 @@ class Device; struct DispatchFlags; class GraphicsAllocation; class LinearStream; +struct PipelineSelectArgs; template struct PreambleHelper { @@ -28,7 +29,7 @@ struct PreambleHelper { static void programL3(LinearStream *pCommandStream, uint32_t l3Config); static void programPipelineSelect(LinearStream *pCommandStream, - const DispatchFlags &dispatchFlags, + const PipelineSelectArgs &pipelineSelectArgs, const HardwareInfo &hwInfo); static uint32_t getDefaultThreadArbitrationPolicy(); static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy); @@ -51,7 +52,10 @@ struct PreambleHelper { static size_t getKernelDebuggingCommandsSize(bool debuggingActive); static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo); static uint32_t getUrbEntryAllocationSize(); + static size_t getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo); + + static size_t getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo); }; template diff --git a/runtime/helpers/preamble_base.inl b/runtime/helpers/preamble_base.inl index 67c7206c60..2937383286 100644 --- a/runtime/helpers/preamble_base.inl +++ b/runtime/helpers/preamble_base.inl @@ -9,6 +9,7 @@ #include "core/helpers/aligned_memory.h" #include "runtime/command_stream/preemption.h" #include "runtime/device/device.h" +#include "runtime/helpers/hardware_commands_helper.h" #include "runtime/helpers/preamble.h" #include "runtime/kernel/kernel.h" @@ -53,6 +54,17 @@ size_t PreambleHelper::getAdditionalCommandsSize(const Device &device return totalSize; } +template +size_t PreambleHelper::getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo) { + size_t size = 0; + using PIPELINE_SELECT = typename GfxFamily::PIPELINE_SELECT; + size += sizeof(PIPELINE_SELECT); + if (HardwareCommandsHelper::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) { + size += sizeof(PIPE_CONTROL); + } + return size; +} + template void PreambleHelper::programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config, uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer) { diff --git a/runtime/helpers/task_information.cpp b/runtime/helpers/task_information.cpp index ff6a488638..74f644784c 100644 --- a/runtime/helpers/task_information.cpp +++ b/runtime/helpers/task_information.cpp @@ -184,14 +184,13 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate dispatchFlags.lowPriority = commandQueue.getPriority() == QueuePriority::LOW; dispatchFlags.throttle = commandQueue.getThrottle(); dispatchFlags.preemptionMode = preemptionMode; - dispatchFlags.mediaSamplerRequired = kernel->isVmeKernel(); + dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = kernel->isVmeKernel(); dispatchFlags.multiEngineQueue = commandQueue.isMultiEngineQueue(); dispatchFlags.numGrfRequired = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired; if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { dispatchFlags.csrDependencies.fillFromEventsRequest(eventsRequest, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr); } - dispatchFlags.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode(); - + dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode(); if (anyUncacheableArgs) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff; } else if (!kernel->areStatelessWritesUsed()) { diff --git a/unit_tests/command_queue/enqueue_kernel_1_tests.cpp b/unit_tests/command_queue/enqueue_kernel_1_tests.cpp index 086bb30d2f..a6afe76dcf 100644 --- a/unit_tests/command_queue/enqueue_kernel_1_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_1_tests.cpp @@ -1074,7 +1074,7 @@ HWTEST_F(EnqueueKernelTest, givenVMEKernelWhenEnqueueKernelThenDispatchFlagsHave size_t gws[3] = {1, 0, 0}; mockKernel.kernelInfo.isVmeWorkload = true; clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - EXPECT_TRUE(mockCsr->passedDispatchFlags.mediaSamplerRequired); + EXPECT_TRUE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); } HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsDoesntHaveMediaSamplerRequired) { @@ -1086,5 +1086,5 @@ HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsD size_t gws[3] = {1, 0, 0}; mockKernel.kernelInfo.isVmeWorkload = false; clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - EXPECT_FALSE(mockCsr->passedDispatchFlags.mediaSamplerRequired); + EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); } diff --git a/unit_tests/gen11/program_media_sampler_tests_gen11.cpp b/unit_tests/gen11/program_media_sampler_tests_gen11.cpp index 4a207e568a..cb7a31330b 100644 --- a/unit_tests/gen11/program_media_sampler_tests_gen11.cpp +++ b/unit_tests/gen11/program_media_sampler_tests_gen11.cpp @@ -30,7 +30,7 @@ struct Gen11MediaSamplerProgramingTest : public ::testing::Test { void overrideMediaRequest(bool lastVmeConfig, bool mediaSamplerRequired) { csr->overrideLastVmeSubliceConfig(lastVmeConfig); - flags.mediaSamplerRequired = mediaSamplerRequired; + flags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; } void SetUp() override { @@ -46,7 +46,7 @@ struct Gen11MediaSamplerProgramingTest : public ::testing::Test { } size_t getCmdSize() { - return csr->getCmdSizeForMediaSampler(flags.mediaSamplerRequired); + return csr->getCmdSizeForMediaSampler(flags.pipelineSelectArgs.mediaSamplerRequired); } myCsr *csr = nullptr;