From 012b8bd73ccd537669a10e8b13c7b3a0919e85a5 Mon Sep 17 00:00:00 2001 From: "Hoppe, Mateusz" Date: Thu, 8 Feb 2018 13:41:02 +0100 Subject: [PATCH] Adding initial PreemptionMode::Initial - account for initial setting (when set mode was equal to initial(Disabled)) estimate size in cmdStreamCS, program MMIO Change-Id: Ice218ae986583c8f3bab4f4f6979e38f03e30d7e --- runtime/command_stream/command_stream_receiver.h | 2 +- runtime/command_stream/preemption.cpp | 2 +- runtime/helpers/hw_info.h | 3 ++- runtime/os_interface/DebugVariables.def | 4 ++-- unit_tests/command_queue/command_queue_hw_tests.cpp | 2 +- .../command_stream/command_stream_receiver_hw_tests.cpp | 7 +++++++ unit_tests/device/device_caps_tests.cpp | 2 +- unit_tests/main.cpp | 2 +- unit_tests/os_interface/windows/wddm_tests.cpp | 8 ++++---- unit_tests/preemption/preemption_tests.cpp | 2 +- unit_tests/test_files/igdrcl.config | 2 +- 11 files changed, 22 insertions(+), 14 deletions(-) diff --git a/runtime/command_stream/command_stream_receiver.h b/runtime/command_stream/command_stream_receiver.h index ea57dd8880..c209824926 100644 --- a/runtime/command_stream/command_stream_receiver.h +++ b/runtime/command_stream/command_stream_receiver.h @@ -143,7 +143,7 @@ class CommandStreamReceiver { uint32_t lastSentL3Config = 0; int8_t lastSentCoherencyRequest = -1; int8_t lastMediaSamplerConfig = -1; - PreemptionMode lastPreemptionMode = PreemptionMode::Disabled; + PreemptionMode lastPreemptionMode = PreemptionMode::Initial; uint32_t latestSentStatelessMocsConfig; LinearStream commandStream; diff --git a/runtime/command_stream/preemption.cpp b/runtime/command_stream/preemption.cpp index ee8922d7aa..fb6c1f5297 100644 --- a/runtime/command_stream/preemption.cpp +++ b/runtime/command_stream/preemption.cpp @@ -138,7 +138,7 @@ bool PreemptionHelper::isValidInstructionHeapForMidThreadPreemption(const Linear } PreemptionMode PreemptionHelper::getDefaultPreemptionMode(const HardwareInfo &hwInfo) { - return DebugManager.flags.ForcePreemptionMode.get() == 0 + return DebugManager.flags.ForcePreemptionMode.get() == -1 ? hwInfo.capabilityTable.defaultPreemptionMode : static_cast(DebugManager.flags.ForcePreemptionMode.get()); } diff --git a/runtime/helpers/hw_info.h b/runtime/helpers/hw_info.h index d2f80dc7bd..9d8a115fa7 100644 --- a/runtime/helpers/hw_info.h +++ b/runtime/helpers/hw_info.h @@ -31,10 +31,11 @@ namespace OCLRT { enum class PreemptionMode : uint32_t { // Keep in sync with ForcePreemptionMode debug variable + Initial = 0, Disabled = 1, MidBatch, ThreadGroup, - MidThread + MidThread, }; struct WhitelistedRegisters { diff --git a/runtime/os_interface/DebugVariables.def b/runtime/os_interface/DebugVariables.def index 3ce30241a4..143017d632 100644 --- a/runtime/os_interface/DebugVariables.def +++ b/runtime/os_interface/DebugVariables.def @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -77,6 +77,6 @@ DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib c DECLARE_DEBUG_VARIABLE(int32_t, CsrDispatchMode, 0, "Chooses DispatchMode for Csr") /*DRIVER TOGGLES*/ DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version") -DECLARE_DEBUG_VARIABLE(int32_t, ForcePreemptionMode, 0, "Keep this variable in sync with PreemptionMode enum. 0 - dont force, 1 - disable, 2 - midBatch, 3 - threadGroup, 4 - midThread") +DECLARE_DEBUG_VARIABLE(int32_t, ForcePreemptionMode, -1, "Keep this variable in sync with PreemptionMode enum. -1 - devices default mode, 1 - disable, 2 - midBatch, 3 - threadGroup, 4 - midThread") DECLARE_DEBUG_VARIABLE(int32_t, NodeOrdinal, -1, "-1: default do not override, 0: ENGINE_RCS") DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger") diff --git a/unit_tests/command_queue/command_queue_hw_tests.cpp b/unit_tests/command_queue/command_queue_hw_tests.cpp index db4db90826..b3d28fcb6c 100644 --- a/unit_tests/command_queue/command_queue_hw_tests.cpp +++ b/unit_tests/command_queue/command_queue_hw_tests.cpp @@ -386,7 +386,7 @@ HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUsedHeapsWhenBloc bool oldMemsetAllocationsFlag = MemoryManagement::memsetNewAllocations; MemoryManagement::memsetNewAllocations = true; - DebugManager.flags.ForcePreemptionMode.set(0); // allow default preemption mode + DebugManager.flags.ForcePreemptionMode.set(-1); // allow default preemption mode auto deviceWithDefaultPreemptionMode = std::unique_ptr(DeviceHelper<>::create(nullptr)); this->pDevice->setPreemptionMode(deviceWithDefaultPreemptionMode->getPreemptionMode()); this->pDevice->getCommandStreamReceiver().setPreemptionCsrAllocation(deviceWithDefaultPreemptionMode->getPreemptionAllocation()); diff --git a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp index cfde591e7c..f941dfbac3 100644 --- a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp @@ -166,6 +166,7 @@ struct UltCommandStreamReceiverTest auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; + commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.overrideMediaVFEStateDirty(false); commandStreamReceiver.latestSentStatelessMocsConfig = CacheSettings::l3CacheOn; commandStreamReceiver.lastSentL3Config = L3Config; @@ -373,6 +374,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushNotRequiredTh waTable = const_cast(pDevice->getWaTable()); commandStreamReceiver.isPreambleSent = true; + commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired); configureCSRtoNonDirtyState(); commandStreamReceiver.taskLevel = taskLevel; @@ -761,9 +763,11 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPr size_t sizeNeededForPreamble = getSizeRequiredPreambleCS(MockDevice(commandStreamReceiver.hwInfo)); size_t sizeNeededForStateBaseAddress = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL); size_t sizeNeededForPipeControl = commandStreamReceiver.getRequiredPipeControlSize(); + size_t sizeNeededForPreemption = PreemptionHelper::getRequiredCmdStreamSize(pDevice->getPreemptionMode(), commandStreamReceiver.lastPreemptionMode); size_t sizeNeeded = sizeNeededForPreamble + sizeNeededForStateBaseAddress + sizeNeededForPipeControl + + sizeNeededForPreemption + sizeof(MI_BATCH_BUFFER_END); sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); @@ -794,9 +798,11 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPr size_t sizeNeededForPreamble = getSizeRequiredPreambleCS(MockDevice(commandStreamReceiver.hwInfo)); size_t sizeNeededForStateBaseAddress = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL); size_t sizeNeededForPipeControl = commandStreamReceiver.getRequiredPipeControlSize(); + size_t sizeNeededForPreemption = PreemptionHelper::getRequiredCmdStreamSize(pDevice->getPreemptionMode(), commandStreamReceiver.lastPreemptionMode); size_t sizeNeeded = sizeNeededForPreamble + sizeNeededForStateBaseAddress + sizeNeededForPipeControl + + sizeNeededForPreemption + sizeof(MI_BATCH_BUFFER_END); sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); @@ -1958,6 +1964,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithPCWhenPreambleSentAnd // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.isPreambleSent = true; + commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.lastMediaSamplerConfig = 0; commandStreamReceiver.lastSentThreadAribtrationPolicy = ThreadArbitrationPolicy::threadArbirtrationPolicyRoundRobin; diff --git a/unit_tests/device/device_caps_tests.cpp b/unit_tests/device/device_caps_tests.cpp index 81c09de6b6..e0ca7bde8b 100644 --- a/unit_tests/device/device_caps_tests.cpp +++ b/unit_tests/device/device_caps_tests.cpp @@ -213,7 +213,7 @@ TEST(DeviceGetCapsSimple, givenDeviceWhenEUCountIsZeroThenmaxWgsIsDefault) { TEST(Device_GetCaps, givenDontForcePreemptionModeDebugVariableWhenCreateDeviceThenSetDefaultHwPreemptionMode) { DebugManagerStateRestore dbgRestorer; { - DebugManager.flags.ForcePreemptionMode.set(0); + DebugManager.flags.ForcePreemptionMode.set(-1); auto device = std::unique_ptr(DeviceHelper<>::create(platformDevices[0])); EXPECT_TRUE(device->getHardwareInfo().capabilityTable.defaultPreemptionMode == device->getPreemptionMode()); diff --git a/unit_tests/main.cpp b/unit_tests/main.cpp index 91c0a33446..03e3765bc0 100644 --- a/unit_tests/main.cpp +++ b/unit_tests/main.cpp @@ -159,7 +159,7 @@ std::string getRunPath(char *argv0) { } extern int preemptionModeFromDebugManager; -int preemptionModeFromDebugManager = -1; +int preemptionModeFromDebugManager = -2; int main(int argc, char **argv) { int retVal = 0; diff --git a/unit_tests/os_interface/windows/wddm_tests.cpp b/unit_tests/os_interface/windows/wddm_tests.cpp index 7f218581e1..ec352d5bc3 100644 --- a/unit_tests/os_interface/windows/wddm_tests.cpp +++ b/unit_tests/os_interface/windows/wddm_tests.cpp @@ -577,7 +577,7 @@ HWTEST_F(WddmTest, dontCallCreateContextBeforeConfigureDeviceAddressSpace) { } HWTEST_F(WddmPreemptionTests, givenDevicePreemptionEnabledDebugFlagDontForceWhenPreemptionRegKeySetThenSetGpuTimeoutFlagOn) { - DebugManager.flags.ForcePreemptionMode.set(0); // dont force + DebugManager.flags.ForcePreemptionMode.set(-1); // dont force hwInfoTest.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; unsigned int expectedVal = 1u; createAndInitWddm(1u); @@ -585,7 +585,7 @@ HWTEST_F(WddmPreemptionTests, givenDevicePreemptionEnabledDebugFlagDontForceWhen } HWTEST_F(WddmPreemptionTests, givenDevicePreemptionDisabledDebugFlagDontForceWhenPreemptionRegKeySetThenSetGpuTimeoutFlagOff) { - DebugManager.flags.ForcePreemptionMode.set(0); // dont force + DebugManager.flags.ForcePreemptionMode.set(-1); // dont force hwInfoTest.capabilityTable.defaultPreemptionMode = PreemptionMode::Disabled; unsigned int expectedVal = 0u; createAndInitWddm(1u); @@ -593,7 +593,7 @@ HWTEST_F(WddmPreemptionTests, givenDevicePreemptionDisabledDebugFlagDontForceWhe } HWTEST_F(WddmPreemptionTests, givenDevicePreemptionEnabledDebugFlagDontForceWhenPreemptionRegKeyNotSetThenSetGpuTimeoutFlagOff) { - DebugManager.flags.ForcePreemptionMode.set(0); // dont force + DebugManager.flags.ForcePreemptionMode.set(-1); // dont force hwInfoTest.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; unsigned int expectedVal = 0u; createAndInitWddm(0u); @@ -601,7 +601,7 @@ HWTEST_F(WddmPreemptionTests, givenDevicePreemptionEnabledDebugFlagDontForceWhen } HWTEST_F(WddmPreemptionTests, givenDevicePreemptionDisabledDebugFlagDontForceWhenPreemptionRegKeyNotSetThenSetGpuTimeoutFlagOff) { - DebugManager.flags.ForcePreemptionMode.set(0); // dont force + DebugManager.flags.ForcePreemptionMode.set(-1); // dont force hwInfoTest.capabilityTable.defaultPreemptionMode = PreemptionMode::Disabled; unsigned int expectedVal = 0u; createAndInitWddm(0u); diff --git a/unit_tests/preemption/preemption_tests.cpp b/unit_tests/preemption/preemption_tests.cpp index 604c88a9bd..1aa8617f02 100644 --- a/unit_tests/preemption/preemption_tests.cpp +++ b/unit_tests/preemption/preemption_tests.cpp @@ -310,7 +310,7 @@ TEST_F(DevicePreemptionTests, setDefaultDisabledPreemptionNoMidBatchSupport) { } TEST(PreemptionTest, defaultMode) { - EXPECT_EQ(0, preemptionModeFromDebugManager); + EXPECT_EQ(-1, preemptionModeFromDebugManager); } TEST(PreemptionTest, whenPreemptionModeIsNotMidThreadThenInstructionHeapSipKernelReservedSizeIsEmpty) { diff --git a/unit_tests/test_files/igdrcl.config b/unit_tests/test_files/igdrcl.config index 2e007b5e77..d964edcf04 100644 --- a/unit_tests/test_files/igdrcl.config +++ b/unit_tests/test_files/igdrcl.config @@ -30,7 +30,7 @@ PrintEMDebugInformation = 0 SchedulerSimulationReturnInstance = 0 DisableConcurrentBlockExecution = 0 ResidencyDebugEnable = 0 -ForcePreemptionMode = 0 +ForcePreemptionMode = -1 EnableStatelessToStatefulBufferOffsetOpt = 0 TbxPort = 4321 TbxServer = 127.0.0.1