From 1638554a49db89d15e64e4a13c08dd72a0ce2b21 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Fri, 30 Jul 2021 09:56:58 +0000 Subject: [PATCH] Add direct submission termination mechanism Signed-off-by: Lukasz Jobczyk --- .../command_stream_receiver_tests.cpp | 32 ++++++ .../execution_environment_tests.cpp | 28 ++++- .../test/unit_test/test_files/igdrcl.config | 2 + .../command_stream/command_stream_receiver.h | 2 + .../command_stream_receiver_hw.h | 6 +- .../command_stream_receiver_hw_base.inl | 30 ++++-- .../debug_settings/debug_variables_base.inl | 2 + .../source/direct_submission/CMakeLists.txt | 4 +- .../direct_submission_controller.cpp | 88 +++++++++++++++ .../direct_submission_controller.h | 44 ++++++++ .../direct_submission_hw.inl | 29 ++--- .../linux/drm_direct_submission.inl | 7 +- .../execution_environment.cpp | 15 +++ .../execution_environment.h | 4 + .../common/mocks/mock_execution_environment.h | 2 + .../direct_submission/CMakeLists.txt | 2 + .../direct_submission_controller_mock.h | 21 ++++ .../direct_submission_controller_tests.cpp | 100 ++++++++++++++++++ .../direct_submission_tests.cpp | 68 +++++++++--- 19 files changed, 444 insertions(+), 42 deletions(-) create mode 100644 shared/source/direct_submission/direct_submission_controller.cpp create mode 100644 shared/source/direct_submission/direct_submission_controller.h create mode 100644 shared/test/unit_test/direct_submission/direct_submission_controller_mock.h create mode 100644 shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp index bf0ec187e8..93154ad6c1 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -28,6 +28,7 @@ #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/matchers.h" #include "shared/test/common/test_macros/test_checks_shared.h" +#include "shared/test/unit_test/direct_submission/direct_submission_controller_mock.h" #include "opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h" #include "opencl/source/mem_obj/buffer.h" @@ -393,6 +394,37 @@ struct InitDirectSubmissionFixture { using InitDirectSubmissionTest = Test; +HWTEST_F(InitDirectSubmissionTest, givenDirectSubmissionControllerEnabledWhenInitDirectSubmissionThenCsrIsRegistered) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableDirectSubmissionController.set(1); + + auto csr = std::make_unique>(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); + std::unique_ptr osContext(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, + EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, + PreemptionMode::ThreadGroup, device->getDeviceBitfield()))); + + auto controller = static_cast(device->executionEnvironment->getDirectSubmissionController()); + controller->keepControlling.store(false); + EXPECT_EQ(controller->directSubmissions.size(), 0u); + + osContext->ensureContextInitialized(); + osContext->setDefaultContext(true); + auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); + hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; + hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].submitOnInit = false; + + bool ret = csr->initDirectSubmission(*device, *osContext.get()); + EXPECT_TRUE(ret); + EXPECT_TRUE(csr->isDirectSubmissionEnabled()); + EXPECT_FALSE(csr->isBlitterDirectSubmissionEnabled()); + + EXPECT_EQ(controller->directSubmissions.size(), 1u); + EXPECT_TRUE(controller->directSubmissions.find(csr.get()) != controller->directSubmissions.end()); + + csr.reset(); + EXPECT_EQ(controller->directSubmissions.size(), 0u); +} + HWTEST_F(InitDirectSubmissionTest, whenDirectSubmissionEnabledOnRcsThenExpectFeatureAvailable) { auto csr = std::make_unique>(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); std::unique_ptr osContext(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, diff --git a/opencl/test/unit_test/execution_environment/execution_environment_tests.cpp b/opencl/test/unit_test/execution_environment/execution_environment_tests.cpp index f5b23aee8c..147622eaca 100644 --- a/opencl/test/unit_test/execution_environment/execution_environment_tests.cpp +++ b/opencl/test/unit_test/execution_environment/execution_environment_tests.cpp @@ -11,6 +11,7 @@ #include "shared/source/command_stream/preemption.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" +#include "shared/source/direct_submission/direct_submission_controller.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_helper.h" @@ -159,6 +160,24 @@ TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenInitializeMemoryManagerI EXPECT_EQ(enableLocalMemory, executionEnvironment->memoryManager->isLocalMemorySupported(device->getRootDeviceIndex())); } +TEST(ExecutionEnvironment, givenEnableDirectSubmissionControllerSetWhenGetDirectSubmissionControllerThenNotNull) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableDirectSubmissionController.set(1); + + auto controller = platform()->peekExecutionEnvironment()->getDirectSubmissionController(); + + EXPECT_NE(controller, nullptr); +} + +TEST(ExecutionEnvironment, givenEnableDirectSubmissionControllerSetZeroWhenGetDirectSubmissionControllerThenNull) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableDirectSubmissionController.set(0); + + auto controller = platform()->peekExecutionEnvironment()->getDirectSubmissionController(); + + EXPECT_EQ(controller, nullptr); +} + TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenInitializeMemoryManagerIsCalledThenItIsInitalized) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); @@ -167,6 +186,7 @@ TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenInitializeMemoryManagerI static_assert(sizeof(ExecutionEnvironment) == sizeof(std::unique_ptr) + sizeof(std::vector) + sizeof(std::unique_ptr) + + sizeof(std::unique_ptr) + sizeof(bool) + (is64bit ? 23 : 15), "New members detected in ExecutionEnvironment, please ensure that destruction sequence of objects is correct"); @@ -174,9 +194,12 @@ static_assert(sizeof(ExecutionEnvironment) == sizeof(std::unique_ptr { + struct MemoryMangerMock : public DestructorCounted { MemoryMangerMock(uint32_t &destructorId, ExecutionEnvironment &executionEnvironment) : DestructorCounted(destructorId, executionEnvironment) {} }; + struct DirectSubmissionControllerMock : public DestructorCounted { + DirectSubmissionControllerMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} + }; struct GmmHelperMock : public DestructorCounted { GmmHelperMock(uint32_t &destructorId, const HardwareInfo *hwInfo) : DestructorCounted(destructorId, nullptr, hwInfo) {} }; @@ -212,9 +235,10 @@ TEST(ExecutionEnvironment, givenExecutionEnvironmentWithVariousMembersWhenItIsDe executionEnvironment->rootDeviceEnvironments[0]->builtins = std::make_unique(destructorId); executionEnvironment->rootDeviceEnvironments[0]->compilerInterface = std::make_unique(destructorId); executionEnvironment->rootDeviceEnvironments[0]->debugger = std::make_unique(destructorId); + executionEnvironment->directSubmissionController = std::make_unique(destructorId); executionEnvironment.reset(nullptr); - EXPECT_EQ(8u, destructorId); + EXPECT_EQ(9u, destructorId); } TEST(ExecutionEnvironment, givenMultipleRootDevicesWhenTheyAreCreatedThenReuseMemoryManager) { diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index c8679eb12b..540de980a6 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -98,6 +98,8 @@ DirectSubmissionNewResourceTlbFlush = -1 DirectSubmissionDisableCacheFlush = -1 DirectSubmissionDisableMonitorFence = -1 USMEvictAfterMigration = 1 +EnableDirectSubmissionController = -1 +DirectSubmissionControllerTimeout = -1 UseVmBind = -1 PassBoundBOToExec = -1 EnableNullHardware = 0 diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index ded1b44b45..4c29581acb 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -241,6 +241,8 @@ class CommandStreamReceiver { return false; } + virtual void stopDirectSubmission() {} + bool isStaticWorkPartitioningEnabled() const { return staticWorkPartitioningEnabled; } diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index f5d9d40b62..5d2e2bda38 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -53,7 +53,9 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { static void addBatchBufferEnd(LinearStream &commandStream, void **patchLocation); void programEndingCmd(LinearStream &commandStream, Device &device, void **patchLocation, bool directSubmissionEnabled); void addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress, bool secondary); + static void alignToCacheLine(LinearStream &commandStream); + static void emitNoop(LinearStream &commandStream, size_t bytesToUpdate); size_t getRequiredStateBaseAddressSize(const Device &device) const; size_t getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags, Device &device); @@ -119,6 +121,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { return blitterDirectSubmission.get() != nullptr; } + void stopDirectSubmission() override; + virtual bool isKmdWaitModeActive() { return true; } bool initDirectSubmission(Device &device, OsContext &osContext) override; @@ -157,8 +161,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { uint64_t getScratchPatchAddress(); void createScratchSpaceController(); - static void emitNoop(LinearStream &commandStream, size_t bytesToUpdate); - bool detectInitProgrammingFlagsRequired(const DispatchFlags &dispatchFlags) const; bool checkPlatformSupportsNewResourceImplicitFlush() const; bool checkPlatformSupportsGpuIdleImplicitFlush() const; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 0ce4bed8b6..b16ff1218b 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -13,6 +13,7 @@ #include "shared/source/command_stream/stream_properties.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" +#include "shared/source/direct_submission/direct_submission_controller.h" #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/page_table_mngr.h" @@ -38,7 +39,12 @@ namespace NEO { template -CommandStreamReceiverHw::~CommandStreamReceiverHw() = default; +CommandStreamReceiverHw::~CommandStreamReceiverHw() { + auto directSubmissionController = executionEnvironment.getDirectSubmissionController(); + if (directSubmissionController) { + directSubmissionController->unregisterDirectSubmission(this); + } +} template CommandStreamReceiverHw::CommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment, @@ -1390,6 +1396,15 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForPrologue() const return 0u; } +template +inline void CommandStreamReceiverHw::stopDirectSubmission() { + if (EngineHelpers::isBcs(this->osContext->getEngineType())) { + this->blitterDirectSubmission->stopRingBuffer(); + } else { + this->directSubmission->stopRingBuffer(); + } +} + template inline bool CommandStreamReceiverHw::initDirectSubmission(Device &device, OsContext &osContext) { bool ret = true; @@ -1398,16 +1413,19 @@ inline bool CommandStreamReceiverHw::initDirectSubmission(Device &dev auto startDirect = osContext.isDirectSubmissionAvailable(device.getHardwareInfo(), submitOnInit); if (startDirect) { - if (EngineHelpers::isBcs(osContext.getEngineType())) { - if (!this->isBlitterDirectSubmissionEnabled()) { + if (!this->isBlitterDirectSubmissionEnabled() && !this->isDirectSubmissionEnabled()) { + if (EngineHelpers::isBcs(osContext.getEngineType())) { blitterDirectSubmission = DirectSubmissionHw>::create(device, osContext); ret = blitterDirectSubmission->initialize(submitOnInit); - } - } else { - if (!this->isDirectSubmissionEnabled()) { + + } else { directSubmission = DirectSubmissionHw>::create(device, osContext); ret = directSubmission->initialize(submitOnInit); } + auto directSubmissionController = executionEnvironment.getDirectSubmissionController(); + if (directSubmissionController) { + directSubmissionController->registerDirectSubmission(this); + } } osContext.setDirectSubmissionActive(); } diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index f31c2bfac5..732891472e 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -235,6 +235,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCacheFlush, -1, "-1: driv DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionNewResourceTlbFlush, -1, "-1: driver default - flush when new resource is bound, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableMonitorFence, -1, "Disable dispatching monitor fence commands") DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, true, "Evict USM allocation after implicit migration to GPU") +DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmissionController, -1, "Enable direct submission terminating after given timeout, -1: default, 0: disabled, 1: enabled") +DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerTimeout, -1, "Set direct submission controller timeout, -1: default 5 ms, >=0: timeout in ms") /*FEATURE FLAGS*/ DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension") diff --git a/shared/source/direct_submission/CMakeLists.txt b/shared/source/direct_submission/CMakeLists.txt index ff1139e0f9..f86079e8db 100644 --- a/shared/source/direct_submission/CMakeLists.txt +++ b/shared/source/direct_submission/CMakeLists.txt @@ -1,11 +1,13 @@ # -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_DIRECT_SUBMISSION ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_controller.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_controller.h ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_hw_diagnostic_mode.cpp diff --git a/shared/source/direct_submission/direct_submission_controller.cpp b/shared/source/direct_submission/direct_submission_controller.cpp new file mode 100644 index 0000000000..5ece67c4ac --- /dev/null +++ b/shared/source/direct_submission/direct_submission_controller.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2019-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/direct_submission/direct_submission_controller.h" + +#include "shared/source/command_stream/command_stream_receiver.h" + +#include + +namespace NEO { + +DirectSubmissionController::DirectSubmissionController() { + timeout = 5; + + if (DebugManager.flags.DirectSubmissionControllerTimeout.get() != -1) { + timeout = DebugManager.flags.DirectSubmissionControllerTimeout.get(); + } + + directSubmissionControllingThread = std::thread(&DirectSubmissionController::controlDirectSubmissionsState, this); +}; + +DirectSubmissionController::~DirectSubmissionController() { + keepControlling.store(false); + if (directSubmissionControllingThread.joinable()) { + directSubmissionControllingThread.join(); + } +} + +void DirectSubmissionController::registerDirectSubmission(CommandStreamReceiver *csr) { + std::lock_guard lock(directSubmissionsMutex); + directSubmissions.insert(std::make_pair(csr, DirectSubmissionState{})); +} + +void DirectSubmissionController::unregisterDirectSubmission(CommandStreamReceiver *csr) { + std::lock_guard lock(directSubmissionsMutex); + directSubmissions.erase(csr); +} + +void DirectSubmissionController::controlDirectSubmissionsState() { + while (true) { + + auto start = std::chrono::steady_clock::now(); + int diff = 0u; + do { + if (!keepControlling.load()) { + return; + } + + auto timestamp = std::chrono::steady_clock::now(); + diff = static_cast(std::chrono::duration_cast(timestamp - start).count()); + } while (diff <= timeout); + + this->checkNewSubmissions(); + } +} + +void DirectSubmissionController::checkNewSubmissions() { + std::lock_guard lock(this->directSubmissionsMutex); + + for (auto &directSubmission : this->directSubmissions) { + auto csr = directSubmission.first; + auto &state = directSubmission.second; + + auto taskCount = csr->peekTaskCount(); + if (taskCount <= *csr->getTagAddress()) { + if (taskCount == state.taskCount) { + if (state.isStopped) { + continue; + } else { + auto lock = csr->obtainUniqueOwnership(); + csr->stopDirectSubmission(); + state.isStopped = true; + } + } else { + state.isStopped = false; + state.taskCount = taskCount; + } + } else { + state.isStopped = false; + } + } +} + +} // namespace NEO \ No newline at end of file diff --git a/shared/source/direct_submission/direct_submission_controller.h b/shared/source/direct_submission/direct_submission_controller.h new file mode 100644 index 0000000000..c660a78487 --- /dev/null +++ b/shared/source/direct_submission/direct_submission_controller.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2019-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include +#include +#include +#include + +namespace NEO { +class MemoryManager; +class CommandStreamReceiver; + +class DirectSubmissionController { + public: + DirectSubmissionController(); + virtual ~DirectSubmissionController(); + + void registerDirectSubmission(CommandStreamReceiver *csr); + void unregisterDirectSubmission(CommandStreamReceiver *csr); + + protected: + struct DirectSubmissionState { + bool isStopped = false; + uint32_t taskCount = 0u; + }; + + void controlDirectSubmissionsState(); + void checkNewSubmissions(); + + std::unordered_map directSubmissions; + std::mutex directSubmissionsMutex; + + std::thread directSubmissionControllingThread; + std::atomic_bool keepControlling = true; + + int timeout = 5; +}; +} // namespace NEO \ No newline at end of file diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 8eda0fe9da..6a92a02948 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" @@ -181,11 +182,18 @@ bool DirectSubmissionHw::stopRingBuffer() { Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, *hwInfo, false); } Dispatcher::dispatchStopCommandBuffer(ringCommandStream); + + auto bytesToPad = Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer(); + CommandStreamReceiverHw::emitNoop(ringCommandStream, bytesToPad); + CommandStreamReceiverHw::alignToCacheLine(ringCommandStream); + cpuCachelineFlush(flushPtr, getSizeEnd()); semaphoreData->QueueWorkCount = currentQueueWorkCount; cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize); + this->ringStart = false; + return true; } @@ -242,7 +250,9 @@ inline size_t DirectSubmissionHw::getSizeSwitchRingBuffer template inline size_t DirectSubmissionHw::getSizeEnd() { size_t size = Dispatcher::getSizeStopCommandBuffer() + - Dispatcher::getSizeCacheFlush(*hwInfo); + Dispatcher::getSizeCacheFlush(*hwInfo) + + (Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) + + MemoryConstants::cacheLineSize; if (disableMonitorFence) { size += Dispatcher::getSizeMonitorFence(*hwInfo); } @@ -332,6 +342,8 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe //for now workloads requiring cache coherency are not supported UNRECOVERABLE_IF(batchBuffer.requiresCoherency); + this->startRingBuffer(); + size_t dispatchSize = getSizeDispatch(); size_t cycleSize = getSizeSwitchRingBufferSection(); size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd(); @@ -348,24 +360,15 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe void *currentPosition = dispatchWorkloadSection(batchBuffer); - if (ringStart) { - cpuCachelineFlush(currentPosition, dispatchSize); - handleResidency(); - } + cpuCachelineFlush(currentPosition, dispatchSize); + handleResidency(); //unblock GPU semaphoreData->QueueWorkCount = currentQueueWorkCount; cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize); currentQueueWorkCount++; DirectSubmissionDiagnostics::diagnosticModeOneSubmit(diagnostic.get()); - //when ring buffer is not started at init or being restarted - if (!ringStart) { - size_t submitSize = dispatchSize; - if (buffersSwitched) { - submitSize = cycleSize; - } - ringStart = submit(startGpuVa, submitSize); - } + uint64_t flushValue = updateTagValue(); flushStamp.setStamp(flushValue); diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index 9f9371183a..1066390ecb 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -40,8 +40,6 @@ inline DrmDirectSubmission::~DrmDirectSubmission() { this->currentTagData.tagValue++; } this->wait(static_cast(this->currentTagData.tagValue)); - auto bb = static_cast(this->ringBuffer)->getBO(); - bb->wait(-1); } this->deallocateResources(); } @@ -66,12 +64,15 @@ bool DrmDirectSubmission::submit(uint64_t gpuAddress, siz this->handleResidency(); + auto currentBase = this->ringCommandStream.getGraphicsAllocation()->getGpuAddress(); + auto offset = ptrDiff(gpuAddress, currentBase); + bool ret = false; uint32_t drmContextId = 0u; for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) { if (osContextLinux->getDeviceBitfield().test(drmIterator)) { ret |= !!bb->exec(static_cast(size), - 0, + offset, execFlags, false, &this->osContext, diff --git a/shared/source/execution_environment/execution_environment.cpp b/shared/source/execution_environment/execution_environment.cpp index 915b826942..a20055f73e 100644 --- a/shared/source/execution_environment/execution_environment.cpp +++ b/shared/source/execution_environment/execution_environment.cpp @@ -9,6 +9,7 @@ #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" +#include "shared/source/direct_submission/direct_submission_controller.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/affinity_mask.h" #include "shared/source/helpers/hw_helper.h" @@ -80,6 +81,20 @@ void ExecutionEnvironment::calculateMaxOsContextCount() { } } +DirectSubmissionController *ExecutionEnvironment::getDirectSubmissionController() { + auto initializeDirectSubmissionController = false; + + if (DebugManager.flags.EnableDirectSubmissionController.get() != -1) { + initializeDirectSubmissionController = DebugManager.flags.EnableDirectSubmissionController.get(); + } + + if (initializeDirectSubmissionController && this->directSubmissionController == nullptr) { + this->directSubmissionController = std::make_unique(); + } + + return directSubmissionController.get(); +} + void ExecutionEnvironment::prepareRootDeviceEnvironments(uint32_t numRootDevices) { if (rootDeviceEnvironments.size() < numRootDevices) { rootDeviceEnvironments.resize(numRootDevices); diff --git a/shared/source/execution_environment/execution_environment.h b/shared/source/execution_environment/execution_environment.h index 069419f578..4c2658b8d6 100644 --- a/shared/source/execution_environment/execution_environment.h +++ b/shared/source/execution_environment/execution_environment.h @@ -11,6 +11,7 @@ #include namespace NEO { +class DirectSubmissionController; class MemoryManager; struct OsEnvironment; struct RootDeviceEnvironment; @@ -30,12 +31,15 @@ class ExecutionEnvironment : public ReferenceTrackedObject debuggingEnabled = true; } bool isDebuggingEnabled() { return debuggingEnabled; } + DirectSubmissionController *getDirectSubmissionController(); std::unique_ptr memoryManager; std::unique_ptr osEnvironment; std::vector> rootDeviceEnvironments; protected: + std::unique_ptr directSubmissionController; + bool debuggingEnabled = false; }; } // namespace NEO diff --git a/shared/test/common/mocks/mock_execution_environment.h b/shared/test/common/mocks/mock_execution_environment.h index 10c9bb6992..5dd97df3aa 100644 --- a/shared/test/common/mocks/mock_execution_environment.h +++ b/shared/test/common/mocks/mock_execution_environment.h @@ -36,6 +36,8 @@ struct MockRootDeviceEnvironment : public RootDeviceEnvironment { }; struct MockExecutionEnvironment : ExecutionEnvironment { + using ExecutionEnvironment::directSubmissionController; + ~MockExecutionEnvironment() override = default; MockExecutionEnvironment() : MockExecutionEnvironment(defaultHwInfo.get()) {} MockExecutionEnvironment(const HardwareInfo *hwInfo) : MockExecutionEnvironment(hwInfo, true, 1u) { diff --git a/shared/test/unit_test/direct_submission/CMakeLists.txt b/shared/test/unit_test/direct_submission/CMakeLists.txt index 118bdf22e7..4417c74679 100644 --- a/shared/test/unit_test/direct_submission/CMakeLists.txt +++ b/shared/test/unit_test/direct_submission/CMakeLists.txt @@ -6,6 +6,8 @@ target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_controller_mock.h + ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_controller_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_tests.cpp ) diff --git a/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h b/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h new file mode 100644 index 0000000000..287ea6834f --- /dev/null +++ b/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2019-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/direct_submission/direct_submission_controller.h" + +namespace NEO { +struct DirectSubmissionControllerMock : public DirectSubmissionController { + using DirectSubmissionController::checkNewSubmissions; + using DirectSubmissionController::directSubmissionControllingThread; + using DirectSubmissionController::directSubmissions; + using DirectSubmissionController::directSubmissionsMutex; + using DirectSubmissionController::keepControlling; + using DirectSubmissionController::timeout; +}; +} // namespace NEO \ No newline at end of file diff --git a/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp b/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp new file mode 100644 index 0000000000..8a36ffa40a --- /dev/null +++ b/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2019-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_command_stream_receiver.h" +#include "shared/test/common/mocks/mock_execution_environment.h" +#include "shared/test/unit_test/direct_submission/direct_submission_controller_mock.h" + +#include "test.h" + +namespace NEO { + +TEST(DirectSubmissionControllerTests, givenDirectSubmissionControllerTimeoutWhenCreateObjectThenTimeoutIsEqualWithDebugFlag) { + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionControllerTimeout.set(14); + + DirectSubmissionControllerMock controller; + + EXPECT_EQ(controller.timeout, 14); +} + +TEST(DirectSubmissionControllerTests, givenDirectSubmissionControllerWhenRegisterDirectSubmissionWorksThenItIsMonitoringItsState) { + MockExecutionEnvironment executionEnvironment; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.initializeMemoryManager(); + + DeviceBitfield deviceBitfield(1); + MockCommandStreamReceiver csr(executionEnvironment, 0, deviceBitfield); + csr.initializeTagAllocation(); + *csr.tagAddress = 0u; + csr.taskCount.store(5u); + + DirectSubmissionControllerMock controller; + controller.keepControlling.store(false); + controller.directSubmissionControllingThread.join(); + controller.registerDirectSubmission(&csr); + + controller.checkNewSubmissions(); + EXPECT_FALSE(controller.directSubmissions[&csr].isStopped); + EXPECT_EQ(controller.directSubmissions[&csr].taskCount, 0u); + + *csr.tagAddress = 5u; + controller.checkNewSubmissions(); + EXPECT_FALSE(controller.directSubmissions[&csr].isStopped); + EXPECT_EQ(controller.directSubmissions[&csr].taskCount, 5u); + + csr.taskCount.store(6u); + controller.checkNewSubmissions(); + EXPECT_FALSE(controller.directSubmissions[&csr].isStopped); + EXPECT_EQ(controller.directSubmissions[&csr].taskCount, 5u); + + *csr.tagAddress = 6u; + controller.checkNewSubmissions(); + EXPECT_FALSE(controller.directSubmissions[&csr].isStopped); + EXPECT_EQ(controller.directSubmissions[&csr].taskCount, 6u); + + controller.checkNewSubmissions(); + EXPECT_TRUE(controller.directSubmissions[&csr].isStopped); + EXPECT_EQ(controller.directSubmissions[&csr].taskCount, 6u); + + controller.checkNewSubmissions(); + EXPECT_TRUE(controller.directSubmissions[&csr].isStopped); + EXPECT_EQ(controller.directSubmissions[&csr].taskCount, 6u); + + csr.taskCount.store(8u); + controller.checkNewSubmissions(); + EXPECT_FALSE(controller.directSubmissions[&csr].isStopped); + EXPECT_EQ(controller.directSubmissions[&csr].taskCount, 6u); + + controller.unregisterDirectSubmission(&csr); +} + +TEST(DirectSubmissionControllerTests, givenDirectSubmissionControllerWhenTimeoutThenDirectSubmissionsAreChecked) { + MockExecutionEnvironment executionEnvironment; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.initializeMemoryManager(); + + DeviceBitfield deviceBitfield(1); + MockCommandStreamReceiver csr(executionEnvironment, 0, deviceBitfield); + csr.initializeTagAllocation(); + *csr.tagAddress = 9u; + csr.taskCount.store(9u); + + DirectSubmissionControllerMock controller; + controller.registerDirectSubmission(&csr); + + std::this_thread::sleep_for(std::chrono::milliseconds(10 * controller.timeout)); + + EXPECT_TRUE(controller.directSubmissionControllingThread.joinable()); + EXPECT_TRUE(controller.directSubmissions[&csr].isStopped); + EXPECT_EQ(controller.directSubmissions[&csr].taskCount, 9u); + + controller.unregisterDirectSubmission(&csr); +} + +} // namespace NEO \ No newline at end of file diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests.cpp index 18ed6d3a2e..86a5cb6c3e 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests.cpp @@ -58,6 +58,42 @@ HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledNotSetThenExpectCpuCac EXPECT_EQ(expectedPtrVal, CpuIntrinsicsTests::lastClFlushedPtr); } +HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenStopThenRingIsNotStarted) { + MockDirectSubmissionHw> directSubmission(*pDevice, + *osContext.get()); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.directSubmission.reset(&directSubmission); + + bool ret = directSubmission.initialize(true); + EXPECT_TRUE(ret); + EXPECT_TRUE(directSubmission.ringStart); + + csr.stopDirectSubmission(); + EXPECT_FALSE(directSubmission.ringStart); + + csr.directSubmission.release(); +} + +HWTEST_F(DirectSubmissionTest, givenBlitterDirectSubmissionWhenStopThenRingIsNotStarted) { + MockDirectSubmissionHw> directSubmission(*pDevice, + *osContext.get()); + auto &csr = pDevice->getUltCommandStreamReceiver(); + std::unique_ptr osContext(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, + EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, + PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield()))); + csr.blitterDirectSubmission.reset(&directSubmission); + csr.setupContext(*osContext.get()); + + bool ret = directSubmission.initialize(true); + EXPECT_TRUE(ret); + EXPECT_TRUE(directSubmission.ringStart); + + csr.stopDirectSubmission(); + EXPECT_FALSE(directSubmission.ringStart); + + csr.blitterDirectSubmission.release(); +} + HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStartedThenExpectAllocationsCreatedAndCommandsDispatched) { MockDirectSubmissionHw> directSubmission(*pDevice, *osContext.get()); @@ -221,7 +257,8 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStopWhenStopRingIsCalledThen directSubmission.stopRingBuffer(); size_t expectedDispatchSize = alreadyDispatchedSize + directSubmission.getSizeEnd(); - EXPECT_EQ(expectedDispatchSize, directSubmission.ringCommandStream.getUsed()); + EXPECT_LE(directSubmission.ringCommandStream.getUsed(), expectedDispatchSize); + EXPECT_GE(directSubmission.ringCommandStream.getUsed() + MemoryConstants::cacheLineSize, expectedDispatchSize); EXPECT_EQ(oldQueueCount + 1, directSubmission.semaphoreData->QueueWorkCount); } @@ -252,7 +289,8 @@ HWTEST_F(DirectSubmissionTest, directSubmission.tagAddressSetValue = 0xBEEF00000ull; directSubmission.stopRingBuffer(); size_t expectedDispatchSize = disabledSizeEnd; - EXPECT_EQ(expectedDispatchSize, directSubmission.ringCommandStream.getUsed()); + EXPECT_LE(directSubmission.ringCommandStream.getUsed(), expectedDispatchSize); + EXPECT_GE(directSubmission.ringCommandStream.getUsed() + MemoryConstants::cacheLineSize, expectedDispatchSize); HardwareParse hwParse; hwParse.parsePipeControl = true; @@ -623,7 +661,9 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetEndSizeThenExpectCorr *osContext.get()); size_t expectedSize = Dispatcher::getSizeStopCommandBuffer() + - Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo); + Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) + + (Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) + + MemoryConstants::cacheLineSize; size_t actualSize = directSubmission.getSizeEnd(); EXPECT_EQ(expectedSize, actualSize); } @@ -741,15 +781,15 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, ret = directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); EXPECT_TRUE(ret); EXPECT_EQ(oldRingAllocation, directSubmission.ringCommandStream.getGraphicsAllocation()); - EXPECT_EQ(1u, directSubmission.semaphoreData->QueueWorkCount); - EXPECT_EQ(2u, directSubmission.currentQueueWorkCount); + EXPECT_EQ(2u, directSubmission.semaphoreData->QueueWorkCount); + EXPECT_EQ(3u, directSubmission.currentQueueWorkCount); EXPECT_EQ(1u, directSubmission.submitCount); - size_t submitSize = directSubmission.getSizeDispatch(); + size_t submitSize = directSubmission.getSizeSemaphoreSection(); EXPECT_EQ(submitSize, directSubmission.submitSize); EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress); - EXPECT_EQ(1u, directSubmission.handleResidencyCount); + EXPECT_EQ(2u, directSubmission.handleResidencyCount); - EXPECT_EQ(directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed()); + EXPECT_EQ(directSubmission.getSizeDispatch() + directSubmission.getSizeSemaphoreSection(), directSubmission.ringCommandStream.getUsed()); EXPECT_TRUE(directSubmission.ringStart); } @@ -806,20 +846,18 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, GraphicsAllocation *oldRingAllocation = directSubmission.ringCommandStream.getGraphicsAllocation(); directSubmission.ringCommandStream.getSpace(directSubmission.ringCommandStream.getAvailableSpace() - directSubmission.getSizeSwitchRingBufferSection()); - uint64_t submitGpuVa = oldRingAllocation->getGpuAddress() + directSubmission.ringCommandStream.getUsed(); ret = directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); EXPECT_TRUE(ret); EXPECT_NE(oldRingAllocation, directSubmission.ringCommandStream.getGraphicsAllocation()); - EXPECT_EQ(1u, directSubmission.semaphoreData->QueueWorkCount); - EXPECT_EQ(2u, directSubmission.currentQueueWorkCount); + EXPECT_EQ(2u, directSubmission.semaphoreData->QueueWorkCount); + EXPECT_EQ(3u, directSubmission.currentQueueWorkCount); EXPECT_EQ(1u, directSubmission.submitCount); - size_t submitSize = directSubmission.getSizeSwitchRingBufferSection(); + size_t submitSize = directSubmission.getSizeSemaphoreSection(); EXPECT_EQ(submitSize, directSubmission.submitSize); - EXPECT_EQ(submitGpuVa, directSubmission.submitGpuAddress); - EXPECT_EQ(1u, directSubmission.handleResidencyCount); + EXPECT_EQ(2u, directSubmission.handleResidencyCount); - EXPECT_EQ(directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed()); + EXPECT_EQ(directSubmission.getSizeDispatch() + directSubmission.getSizeSemaphoreSection(), directSubmission.ringCommandStream.getUsed()); EXPECT_TRUE(directSubmission.ringStart); }