From 1292c3d5338d749ddb6d8139ffa3fc651af01fe8 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Tue, 20 Feb 2018 08:11:24 +0100 Subject: [PATCH] Improve thread arbitration policy programming Change-Id: Ibd764352e14d1a5112034b1c5a1fc6d6d67ebac0 --- Jenkinsfile | 2 +- runtime/command_queue/enqueue_common.h | 2 +- .../command_stream/command_stream_receiver.h | 4 +- .../command_stream_receiver_hw.h | 2 +- .../command_stream_receiver_hw.inl | 14 ++++-- .../thread_arbitration_policy.h | 10 ++--- runtime/gen8/preamble.cpp | 10 ----- runtime/gen9/preamble.cpp | 27 ++++++++++- runtime/gen9/reg_configs.h | 12 ++++- runtime/helpers/preamble.h | 7 ++- runtime/helpers/preamble.inl | 29 +++--------- runtime/kernel/kernel.h | 6 ++- runtime/os_interface/DebugVariables.def | 1 + .../command_stream_receiver_hw_tests.cpp | 45 ++++++++++--------- unit_tests/gen8/test_preamble.cpp | 19 ++------ unit_tests/gen9/enqueue_kernel.cpp | 17 ++++--- unit_tests/gen9/enqueue_media_kernel.cpp | 2 +- unit_tests/gen9/skl/test_preamble_skl.cpp | 6 ++- .../libult/ult_command_stream_receiver.h | 3 +- unit_tests/preamble/preamble_tests.cpp | 10 ++--- unit_tests/test_files/igdrcl.config | 3 +- 21 files changed, 127 insertions(+), 104 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 3cfdca313b..e8db9ce8e6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,4 +2,4 @@ neoDependenciesRev='735095-769' strategy='EQUAL' allowedF=42 -allowedCD=339 +allowedCD=340 diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 0031045c1e..35a9842472 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -532,7 +532,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT); } - commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.begin()->getKernel()->getThreadArbitrationPolicy()); + commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.begin()->getKernel()->getThreadArbitrationPolicy()); DispatchFlags dispatchFlags; dispatchFlags.blocking = blocking; diff --git a/runtime/command_stream/command_stream_receiver.h b/runtime/command_stream/command_stream_receiver.h index 26b2b8f5dc..329ba1b95e 100644 --- a/runtime/command_stream/command_stream_receiver.h +++ b/runtime/command_stream/command_stream_receiver.h @@ -153,8 +153,8 @@ class CommandStreamReceiver { LinearStream commandStream; - uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin; - uint32_t lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbitrationPolicyNotPresent; + uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; + uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; GraphicsAllocation *scratchAllocation = nullptr; GraphicsAllocation *preemptionCsrAllocation = nullptr; diff --git a/runtime/command_stream/command_stream_receiver_hw.h b/runtime/command_stream/command_stream_receiver_hw.h index 693a82da0a..7d9a710945 100644 --- a/runtime/command_stream/command_stream_receiver_hw.h +++ b/runtime/command_stream/command_stream_receiver_hw.h @@ -41,7 +41,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { return new CommandStreamReceiverHw(hwInfoIn); } - CommandStreamReceiverHw(const HardwareInfo &hwInfoIn) : hwInfo(hwInfoIn) {} + CommandStreamReceiverHw(const HardwareInfo &hwInfoIn); FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) override; diff --git a/runtime/command_stream/command_stream_receiver_hw.inl b/runtime/command_stream/command_stream_receiver_hw.inl index 7c4e6a1a2e..a2c3e1cc3a 100644 --- a/runtime/command_stream/command_stream_receiver_hw.inl +++ b/runtime/command_stream/command_stream_receiver_hw.inl @@ -37,6 +37,11 @@ namespace OCLRT { +template +CommandStreamReceiverHw::CommandStreamReceiverHw(const HardwareInfo &hwInfoIn) : hwInfo(hwInfoIn) { + requiredThreadArbitrationPolicy = PreambleHelper::getDefaultThreadArbitrationPolicy(); +} + template FlushStamp CommandStreamReceiverHw::flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) { return flushStamp->peekStamp(); @@ -171,6 +176,9 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (DebugManager.flags.ForceSLML3Config.get()) { dispatchFlags.useSLM = true; } + if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { + requestThreadArbitrationPolicy(static_cast(DebugManager.flags.OverrideThreadArbitrationPolicy.get())); + } auto newL3Config = PreambleHelper::getL3Config(peekHwInfo(), dispatchFlags.useSLM); @@ -208,9 +216,9 @@ CompletionStamp CommandStreamReceiverHw::flushTask( } } - if (this->lastSentThreadAribtrationPolicy != this->requiredThreadArbitrationPolicy) { + if (this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) { PreambleHelper::programThreadArbitration(&commandStreamCSR, this->requiredThreadArbitrationPolicy); - this->lastSentThreadAribtrationPolicy = this->requiredThreadArbitrationPolicy; + this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy; } stateBaseAddressDirty |= ((GSBAFor32BitProgrammed ^ dispatchFlags.GSBA32BitRequired) && force32BitAllocations); @@ -581,7 +589,7 @@ inline void CommandStreamReceiverHw::programPreamble(LinearStream &cs PreambleHelper::programPreamble(&csr, *memoryManager->device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionCsrAllocation); this->isPreambleSent = true; this->lastSentL3Config = newL3Config; - this->lastSentThreadAribtrationPolicy = this->requiredThreadArbitrationPolicy; + this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy; } } diff --git a/runtime/command_stream/thread_arbitration_policy.h b/runtime/command_stream/thread_arbitration_policy.h index 9e7db5383a..36e5ad3cab 100644 --- a/runtime/command_stream/thread_arbitration_policy.h +++ b/runtime/command_stream/thread_arbitration_policy.h @@ -23,8 +23,8 @@ #include namespace OCLRT { namespace ThreadArbitrationPolicy { -const uint32_t threadArbirtrationPolicyRoundRobin = 0x100u; -const uint32_t threadArbitrationPolicyAgeBased = 0x0u; -const uint32_t threadArbitrationPolicyNotPresent = 0xffffffffu; -} -} \ No newline at end of file +const uint32_t AgeBased = 0x0u; +const uint32_t RoundRobin = 0x1u; +const uint32_t NotPresent = 0xffffffffu; +} // namespace ThreadArbitrationPolicy +} // namespace OCLRT diff --git a/runtime/gen8/preamble.cpp b/runtime/gen8/preamble.cpp index 5fe0482bbd..2fb19b18ea 100644 --- a/runtime/gen8/preamble.cpp +++ b/runtime/gen8/preamble.cpp @@ -29,15 +29,6 @@ void PreambleHelper::setupPipeControlInFrontOfCommand(void *pCmd, con ((BDWFamily::PIPE_CONTROL *)pCmd)->setDcFlushEnable(true); } -template <> -void PreambleHelper::programThreadArbitration(LinearStream *pCommandStream, uint32_t threadArbitrationPolicy) { -} - -template <> -uint32_t PreambleHelper::getAdditionalCommandsSize(const Device &device) { - return 0; -} - template <> uint32_t PreambleHelper::getL3Config(const HardwareInfo &hwInfo, bool useSLM) { uint32_t l3Config = 0; @@ -61,6 +52,5 @@ void PreambleHelper::programPipelineSelect(LinearStream *pCommandStre pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); } -// Explicitly instantiate PreambleHelper for BDW device family template struct PreambleHelper; } // namespace OCLRT diff --git a/runtime/gen9/preamble.cpp b/runtime/gen9/preamble.cpp index ee643da518..83a7e5efb7 100644 --- a/runtime/gen9/preamble.cpp +++ b/runtime/gen9/preamble.cpp @@ -64,5 +64,30 @@ void PreambleHelper::setupPipeControlInFrontOfCommand(void *pCmd, con } } -template struct PreambleHelper; +template <> +uint32_t PreambleHelper::getDefaultThreadArbitrationPolicy() { + return ThreadArbitrationPolicy::RoundRobin; } + +template <> +void PreambleHelper::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) { + UNRECOVERABLE_IF(requiredThreadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent); + + auto pipeControl = pCommandStream->getSpaceForCmd(); + *pipeControl = PIPE_CONTROL::sInit(); + pipeControl->setCommandStreamerStallEnable(true); + + auto pCmd = pCommandStream->getSpaceForCmd(); + *pCmd = MI_LOAD_REGISTER_IMM::sInit(); + + pCmd->setRegisterOffset(DebugControlReg2::address); + pCmd->setDataDword(DebugControlReg2::getRegData(requiredThreadArbitrationPolicy)); +} + +template <> +size_t PreambleHelper::getAdditionalCommandsSize(const Device &device) { + return PreemptionHelper::getRequiredPreambleSize(device) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL); +} + +template struct PreambleHelper; +} // namespace OCLRT diff --git a/runtime/gen9/reg_configs.h b/runtime/gen9/reg_configs.h index 091a36f5d4..0b6339510c 100644 --- a/runtime/gen9/reg_configs.h +++ b/runtime/gen9/reg_configs.h @@ -22,6 +22,8 @@ #pragma once #include "runtime/helpers/preamble.h" +#include "runtime/command_stream/thread_arbitration_policy.h" + namespace OCLRT { struct SKLFamily; template <> @@ -40,4 +42,12 @@ struct L3CNTLREGConfig { static const uint32_t valueForSLM = 0x60000121u; static const uint32_t valueForNoSLM = 0x80000140u; }; -} + +namespace DebugControlReg2 { +constexpr uint32_t address = 0xE404; +constexpr uint32_t getRegData(const uint32_t &policy) { + return policy == ThreadArbitrationPolicy::RoundRobin ? 0x100 : 0x0; +}; +} // namespace DebugControlReg2 + +} // namespace OCLRT diff --git a/runtime/helpers/preamble.h b/runtime/helpers/preamble.h index 6aee7ff9da..9fa2812b65 100644 --- a/runtime/helpers/preamble.h +++ b/runtime/helpers/preamble.h @@ -36,10 +36,14 @@ class LinearStream; template struct PreambleHelper { + using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + static constexpr size_t getScratchSpaceOffsetFor64bit() { return 4096; } static void programL3(LinearStream *pCommandStream, uint32_t l3Config); static void programPipelineSelect(LinearStream *pCommandStream, bool mediaSamplerRequired); + static uint32_t getDefaultThreadArbitrationPolicy(); static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy); static void programPreemption(LinearStream *pCommandStream, const Device &device, GraphicsAllocation *preemptionCsr); static void setupPipeControlInFrontOfCommand(void *pCmd, const HardwareInfo *hwInfo, bool isVfeCommand); @@ -47,7 +51,7 @@ struct PreambleHelper { static void programPreamble(LinearStream *pCommandStream, const Device &device, uint32_t l3Config, uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr); static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM); - static uint32_t getAdditionalCommandsSize(const Device &device); + static size_t getAdditionalCommandsSize(const Device &device); static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo); static uint32_t getUrbEntryAllocationSize(); }; @@ -73,4 +77,5 @@ template struct L3CNTLRegisterOffset { static const uint32_t registerOffset; }; + } // namespace OCLRT diff --git a/runtime/helpers/preamble.inl b/runtime/helpers/preamble.inl index a2650c5578..d68ac91e68 100644 --- a/runtime/helpers/preamble.inl +++ b/runtime/helpers/preamble.inl @@ -34,21 +34,11 @@ namespace OCLRT { template void PreambleHelper::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) { - typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; - typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; +} - // Add a PIPE_CONTROL w/ CS_stall - auto pPipeControl = (PIPE_CONTROL *)pCommandStream->getSpace(sizeof(PIPE_CONTROL)); - *pPipeControl = PIPE_CONTROL::sInit(); - pPipeControl->setCommandStreamerStallEnable(true); - setupPipeControlInFrontOfCommand(pPipeControl, nullptr, false); - - auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)); - *pCmd = MI_LOAD_REGISTER_IMM::sInit(); - - pCmd->setRegisterOffset(0xE404); - auto data = requiredThreadArbitrationPolicy; - pCmd->setDataDword(data); +template +uint32_t PreambleHelper::getDefaultThreadArbitrationPolicy() { + return 0; } template @@ -56,17 +46,12 @@ void PreambleHelper::programGenSpecificPreambleWorkArounds(LinearStre } template -uint32_t PreambleHelper::getAdditionalCommandsSize(const Device &device) { - typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; - typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; - size_t requiredSize = sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL); - requiredSize += PreemptionHelper::getRequiredPreambleSize(device); - return static_cast(requiredSize); +size_t PreambleHelper::getAdditionalCommandsSize(const Device &device) { + return 0; } template void PreambleHelper::programVFEState(LinearStream *pCommandStream, const HardwareInfo &hwInfo, int scratchSize, uint64_t scratchAddress) { - typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; typedef typename GfxFamily::MEDIA_VFE_STATE MEDIA_VFE_STATE; // Add a PIPE_CONTROL w/ CS_stall @@ -90,7 +75,6 @@ void PreambleHelper::programVFEState(LinearStream *pCommandStream, co template void PreambleHelper::programL3(LinearStream *pCommandStream, uint32_t l3Config) { - typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)); *pCmd = MI_LOAD_REGISTER_IMM::sInit(); @@ -116,5 +100,4 @@ template uint32_t PreambleHelper::getUrbEntryAllocationSize() { return 0x782; } - } // namespace OCLRT diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index 36e33bbd3e..90ac7cbd00 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -25,6 +25,7 @@ #include "runtime/command_stream/thread_arbitration_policy.h" #include "runtime/device_queue/device_queue.h" #include "runtime/helpers/base_object.h" +#include "runtime/helpers/preamble.h" #include "runtime/program/program.h" #include "runtime/program/kernel_info.h" #include "runtime/os_interface/debug_settings_manager.h" @@ -337,11 +338,12 @@ class Kernel : public BaseObject<_cl_kernel> { const bool isParentKernel; const bool isSchedulerKernel; + template uint32_t getThreadArbitrationPolicy() { if (kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired) { - return ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin; + return PreambleHelper::getDefaultThreadArbitrationPolicy(); } else { - return ThreadArbitrationPolicy::threadArbitrationPolicyAgeBased; + return ThreadArbitrationPolicy::AgeBased; } } bool checkIfIsParentKernelAndBlocksUsesPrintf() { diff --git a/runtime/os_interface/DebugVariables.def b/runtime/os_interface/DebugVariables.def index f7d99c83b1..6aa2a79142 100644 --- a/runtime/os_interface/DebugVariables.def +++ b/runtime/os_interface/DebugVariables.def @@ -81,3 +81,4 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API v DECLARE_DEBUG_VARIABLE(int32_t, ForcePreemptionMode, -1, "Keep this variable in sync with PreemptionMode enum. -1 - devices default mode, 1 - disable, 2 - midBatch, 3 - threadGroup, 4 - midThread") DECLARE_DEBUG_VARIABLE(int32_t, NodeOrdinal, -1, "-1: default do not override, 0: ENGINE_RCS") DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger") +DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont override) or any valid config (0: Age Based, 1: Round Robin)") diff --git a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp index e1b50d1b4d..08370888f0 100644 --- a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp @@ -131,10 +131,9 @@ struct UltCommandStreamReceiverTest bool requiresCoherency = false, bool lowPriority = false) { - DispatchFlags dispatchFlags; - dispatchFlags.blocking = block; - dispatchFlags.requiresCoherency = requiresCoherency; - dispatchFlags.lowPriority = lowPriority; + flushTaskFlags.blocking = block; + flushTaskFlags.requiresCoherency = requiresCoherency; + flushTaskFlags.lowPriority = lowPriority; return commandStreamReceiver.flushTask( commandStream, @@ -144,7 +143,7 @@ struct UltCommandStreamReceiverTest ioh, ssh, taskLevel, - dispatchFlags); + flushTaskFlags); } template @@ -174,7 +173,7 @@ struct UltCommandStreamReceiverTest configureCSRHeapStatesToNonDirty(); commandStreamReceiver.taskLevel = taskLevel; - commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin; + commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; commandStreamReceiver.lastSentCoherencyRequest = 0; commandStreamReceiver.lastMediaSamplerConfig = 0; } @@ -184,6 +183,7 @@ struct UltCommandStreamReceiverTest return reinterpret_cast &>(pDevice->getCommandStreamReceiver()); } + DispatchFlags flushTaskFlags = {}; uint32_t taskLevel = 42; LinearStream commandStream; LinearStream dsh; @@ -254,6 +254,19 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, shouldSeeCommandsOnFirstFlush) { EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); } +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode) { + DebugManagerStateRestore restore; + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased; + commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased; + + DebugManager.flags.OverrideThreadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin); + + flushTask(commandStreamReceiver); + + EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, commandStreamReceiver.lastSentThreadArbitrationPolicy); +} + HWTEST_F(CommandStreamReceiverFlushTaskTests, taskCountShouldBeUpdated) { auto &commandStreamReceiver = pDevice->getCommandStreamReceiver(); flushTask(commandStreamReceiver); @@ -832,23 +845,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPr commandStreamReceiver.lastSentL3Config = l3Config; auto &csrCS = commandStreamReceiver.getCS(); - size_t sizeNeeded = getSizeRequiredPreambleCS(MockDevice(commandStreamReceiver.hwInfo)) + - sizeof(STATE_BASE_ADDRESS) + - sizeof(PIPE_CONTROL) + - sizeof(PIPELINE_SELECT) + - commandStreamReceiver.getRequiredPipeControlSize() + - sizeof(MI_BATCH_BUFFER_START); + size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSizeAligned(flushTaskFlags); - sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); - - DispatchFlags flags; - csrCS.getSpace(csrCS.getAvailableSpace() - commandStreamReceiver.getRequiredCmdStreamSizeAligned(flags)); + csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeeded); auto expectedBase = csrCS.getBase(); // This case handles when we have *just* enough space auto expectedUsed = csrCS.getUsed() + sizeNeeded; - flushTask(commandStreamReceiver); + flushTask(commandStreamReceiver, flushTaskFlags.blocking, 0, flushTaskFlags.requiresCoherency, flushTaskFlags.lowPriority); // Verify that we didn't grab a new CS buffer EXPECT_EQ(expectedUsed, csrCS.getUsed()); @@ -1465,7 +1470,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousNOSL HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) { MockCsrHw commandStreamReceiver(*platformDevices[0]); - EXPECT_EQ(ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, commandStreamReceiver.peekThreadArbitrationPolicy()); + EXPECT_EQ(PreambleHelper::getDefaultThreadArbitrationPolicy(), commandStreamReceiver.peekThreadArbitrationPolicy()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) { @@ -1484,7 +1489,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML // Mark Pramble as sent, override L3Config to SLM config commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = L3Config; - commandStreamReceiver->lastSentThreadAribtrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy(); + commandStreamReceiver->lastSentThreadArbitrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy(); ((MockKernel *)kernel)->setTotalSLMSize(1024); @@ -1963,7 +1968,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithPCWhenPreambleSentAnd commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.lastMediaSamplerConfig = 0; - commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin; + commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeeded = 2 * sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MEDIA_VFE_STATE) + diff --git a/unit_tests/gen8/test_preamble.cpp b/unit_tests/gen8/test_preamble.cpp index 2aedf4a671..a5bb0090ba 100644 --- a/unit_tests/gen8/test_preamble.cpp +++ b/unit_tests/gen8/test_preamble.cpp @@ -67,27 +67,16 @@ BDWTEST_F(Gen8L3Config, checkSLM) { } typedef PreambleFixture ThreadArbitrationGen8; -BDWTEST_F(ThreadArbitrationGen8, givenPreambleWhenItIsProgrammedThenThreadArbitrationIsNotPresent) { +BDWTEST_F(ThreadArbitrationGen8, givenPolicyWhenThreadArbitrationProgrammedThenDoNothing) { typedef BDWFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; LinearStream &cs = linearStream; - uint32_t l3Config = PreambleHelper::getL3Config(**platformDevices, true); - PreambleHelper::programPreamble(&linearStream, MockDevice(**platformDevices), l3Config, - ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, - nullptr); + PreambleHelper::programThreadArbitration(&cs, ThreadArbitrationPolicy::RoundRobin); - parseCommands(cs); - - auto itorLRI = reverse_find(cmdList.rbegin(), cmdList.rend()); - ASSERT_NE(cmdList.rend(), itorLRI); - - //we expect l3 programming here - const auto &lri = *reinterpret_cast(*itorLRI); - auto RegisterOffset = L3CNTLRegisterOffset::registerOffset; - EXPECT_EQ(RegisterOffset, lri.getRegisterOffset()); - EXPECT_EQ(1u, lri.getDataDword() & 1); + EXPECT_EQ(0u, cs.getUsed()); EXPECT_EQ(0u, PreambleHelper::getAdditionalCommandsSize(MockDevice(**platformDevices))); + EXPECT_EQ(0u, PreambleHelper::getDefaultThreadArbitrationPolicy()); } typedef PreambleFixture Gen8UrbEntryAllocationSize; diff --git a/unit_tests/gen9/enqueue_kernel.cpp b/unit_tests/gen9/enqueue_kernel.cpp index b5e3599f3e..433d6be4c3 100644 --- a/unit_tests/gen9/enqueue_kernel.cpp +++ b/unit_tests/gen9/enqueue_kernel.cpp @@ -21,6 +21,7 @@ */ #include "runtime/command_queue/command_queue_hw.h" +#include "runtime/gen9/reg_configs.h" #include "unit_tests/fixtures/device_fixture.h" #include "unit_tests/fixtures/memory_management_fixture.h" #include "unit_tests/helpers/hw_parse.h" @@ -30,8 +31,6 @@ namespace OCLRT { -constexpr uint32_t gen9ThreadArbiterPolicyRegOffset = 0xE404; - using Gen9EnqueueTest = Test; GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenRoundRobinPolicyIsProgrammed) { MockContext mc; @@ -44,10 +43,10 @@ GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKe HardwareParse hwParser; hwParser.parseCommands(cmdQ); - auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset); + auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address); ASSERT_NE(nullptr, cmd); - EXPECT_EQ(ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, cmd->getDataDword()); - EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset)); + EXPECT_EQ(DebugControlReg2::getRegData(PreambleHelper::getDefaultThreadArbitrationPolicy()), cmd->getDataDword()); + EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address)); } GEN9TEST_F(Gen9EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) { @@ -61,9 +60,9 @@ GEN9TEST_F(Gen9EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhe HardwareParse hwParser; hwParser.parseCommands(cmdQ); - auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset); + auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address); ASSERT_NE(nullptr, cmd); - EXPECT_EQ(ThreadArbitrationPolicy::threadArbitrationPolicyAgeBased, cmd->getDataDword()); - EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), gen9ThreadArbiterPolicyRegOffset)); -} + EXPECT_EQ(DebugControlReg2::getRegData(ThreadArbitrationPolicy::AgeBased), cmd->getDataDword()); + EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address)); } +} // namespace OCLRT diff --git a/unit_tests/gen9/enqueue_media_kernel.cpp b/unit_tests/gen9/enqueue_media_kernel.cpp index e356af4b2a..d6996506e5 100644 --- a/unit_tests/gen9/enqueue_media_kernel.cpp +++ b/unit_tests/gen9/enqueue_media_kernel.cpp @@ -21,7 +21,7 @@ */ #include "unit_tests/fixtures/media_kernel_fixture.h" -#include "runtime/helpers/preamble.inl" +#include "runtime/helpers/preamble.h" #include "test.h" using namespace OCLRT; diff --git a/unit_tests/gen9/skl/test_preamble_skl.cpp b/unit_tests/gen9/skl/test_preamble_skl.cpp index 08def96b0b..2449ba6100 100644 --- a/unit_tests/gen9/skl/test_preamble_skl.cpp +++ b/unit_tests/gen9/skl/test_preamble_skl.cpp @@ -80,7 +80,7 @@ SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitratio LinearStream &cs = linearStream; uint32_t l3Config = PreambleHelper::getL3Config(**platformDevices, true); PreambleHelper::programPreamble(&linearStream, MockDevice(**platformDevices), l3Config, - ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, + ThreadArbitrationPolicy::RoundRobin, nullptr); parseCommands(cs); @@ -99,6 +99,10 @@ SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitratio PreambleHelper::getAdditionalCommandsSize(MockDevice(*platformDevices[0]))); } +SKLTEST_F(ThreadArbitration, defaultArbitrationPolicy) { + EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, PreambleHelper::getDefaultThreadArbitrationPolicy()); +} + GEN9TEST_F(PreambleVfeState, WaOff) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; testWaTable.waSendMIFLUSHBeforeVFE = 0; diff --git a/unit_tests/libult/ult_command_stream_receiver.h b/unit_tests/libult/ult_command_stream_receiver.h index 691610c6ad..5df85d4902 100644 --- a/unit_tests/libult/ult_command_stream_receiver.h +++ b/unit_tests/libult/ult_command_stream_receiver.h @@ -47,7 +47,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { using BaseClass::CommandStreamReceiver::lastPreemptionMode; using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest; using BaseClass::CommandStreamReceiver::lastSentL3Config; - using BaseClass::CommandStreamReceiver::lastSentThreadAribtrationPolicy; + using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy; + using BaseClass::CommandStreamReceiver::lastSentThreadArbitrationPolicy; using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig; using BaseClass::CommandStreamReceiver::latestFlushedTaskCount; using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig; diff --git a/unit_tests/preamble/preamble_tests.cpp b/unit_tests/preamble/preamble_tests.cpp index 340df4df17..6a9bf8817d 100644 --- a/unit_tests/preamble/preamble_tests.cpp +++ b/unit_tests/preamble/preamble_tests.cpp @@ -39,12 +39,12 @@ HWTEST_F(PreambleTest, PreemptionIsTakenIntoAccountWhenProgrammingPreamble) { auto mockDevice = std::unique_ptr(MockDevice::create(nullptr)); mockDevice->setPreemptionMode(PreemptionMode::MidThread); - uint32_t cmdSizePreambleMidThread = PreambleHelper::getAdditionalCommandsSize(*mockDevice); - uint32_t cmdSizePreemptionMidThread = static_cast(PreemptionHelper::getRequiredPreambleSize(*mockDevice)); + auto cmdSizePreambleMidThread = PreambleHelper::getAdditionalCommandsSize(*mockDevice); + auto cmdSizePreemptionMidThread = PreemptionHelper::getRequiredPreambleSize(*mockDevice); mockDevice->setPreemptionMode(PreemptionMode::Disabled); - uint32_t cmdSizePreambleDisabled = PreambleHelper::getAdditionalCommandsSize(*mockDevice); - uint32_t cmdSizePreemptionDisabled = static_cast(PreemptionHelper::getRequiredPreambleSize(*mockDevice)); + auto cmdSizePreambleDisabled = PreambleHelper::getAdditionalCommandsSize(*mockDevice); + auto cmdSizePreemptionDisabled = PreemptionHelper::getRequiredPreambleSize(*mockDevice); EXPECT_LE(cmdSizePreemptionMidThread, cmdSizePreambleMidThread); EXPECT_LE(cmdSizePreemptionDisabled, cmdSizePreambleDisabled); @@ -64,7 +64,7 @@ HWTEST_F(PreambleTest, PreemptionIsTakenIntoAccountWhenProgrammingPreamble) { MockGraphicsAllocation csrSurface(reinterpret_cast(minCsrAlignment), 1024); PreambleHelper::programPreamble(&preambleStream, *mockDevice, 0U, - ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin, &csrSurface); + ThreadArbitrationPolicy::RoundRobin, &csrSurface); PreemptionHelper::programPreamble(preemptionStream, *mockDevice, &csrSurface); diff --git a/unit_tests/test_files/igdrcl.config b/unit_tests/test_files/igdrcl.config index 9289d30399..f85ae862c2 100644 --- a/unit_tests/test_files/igdrcl.config +++ b/unit_tests/test_files/igdrcl.config @@ -53,4 +53,5 @@ TrackParentEvents = false PrintLWSSizes = false DisableAUBBufferDump = false DisableAUBImageDump = false -UseNoRingFlushesKmdMode = false \ No newline at end of file +UseNoRingFlushesKmdMode = false +OverrideThreadArbitrationPolicy = -1