diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp index c8f7dfa675..c0926ebc41 100644 --- a/level_zero/core/source/cmdlist/cmdlist.cpp +++ b/level_zero/core/source/cmdlist/cmdlist.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -57,8 +57,7 @@ void CommandList::removeHostPtrAllocations() { if (restartDirectSubmission) { const auto &engines = memoryManager->getRegisteredEngines(device->getRootDeviceIndex()); for (const auto &engine : engines) { - auto lock = engine.commandStreamReceiver->obtainUniqueOwnership(); - engine.commandStreamReceiver->stopDirectSubmission(false); + engine.commandStreamReceiver->stopDirectSubmission(false, true); } } diff --git a/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp b/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp index bf9d4b779c..5dfbc61fb6 100644 --- a/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp +++ b/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -207,7 +207,7 @@ void GlArbSyncEvent::unblockEventBy(Event &event, TaskCountType taskLevel, int32 return; } - event.getCommandQueue()->getGpgpuCommandStreamReceiver().stopDirectSubmission(true); + event.getCommandQueue()->getGpgpuCommandStreamReceiver().stopDirectSubmission(true, true); ctx->getSharing()->glArbSyncObjectSignal(event.getCommandQueue()->getGpgpuCommandStreamReceiver().getOsContext(), *glSyncInfo); ctx->getSharing()->glArbSyncObjectWaitServer(*osInterface, *glSyncInfo); } diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 22f24a85b4..1b9ff5275b 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -356,7 +356,7 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass { MOCKABLE_VIRTUAL void startControllingDirectSubmissions(); - bool isAnyDirectSubmissionEnabled() const { + MOCKABLE_VIRTUAL bool isAnyDirectSubmissionEnabled() const { return this->isDirectSubmissionEnabled() || isBlitterDirectSubmissionEnabled(); } @@ -382,7 +382,7 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass { return false; } - virtual void stopDirectSubmission(bool blocking) {} + virtual void stopDirectSubmission(bool blocking, bool needsLock) {} virtual QueueThrottle getLastDirectSubmissionThrottle() = 0; diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 418dd64402..0b696e23cd 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -159,7 +159,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { bool directSubmissionRelaxedOrderingEnabled() const override; uint32_t getDirectSubmissionRelaxedOrderingQueueDepth() const override; - void stopDirectSubmission(bool blocking) override; + void stopDirectSubmission(bool blocking, bool needsLock) override; QueueThrottle getLastDirectSubmissionThrottle() override; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 7d7fe167e3..773578710f 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1375,8 +1375,12 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForPrologue() const } template -inline void CommandStreamReceiverHw::stopDirectSubmission(bool blocking) { +inline void CommandStreamReceiverHw::stopDirectSubmission(bool blocking, bool needsLock) { if (this->isAnyDirectSubmissionEnabled()) { + std::unique_lock lock; + if (needsLock) { + lock = obtainUniqueOwnership(); + } if (EngineHelpers::isBcs(this->osContext->getEngineType())) { this->blitterDirectSubmission->stopRingBuffer(blocking); } else { diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 4caefe41ee..7d5440edaf 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -1069,8 +1069,7 @@ void Device::stopDirectSubmissionAndWaitForCompletion() { for (auto &engine : allEngines) { auto csr = engine.commandStreamReceiver; if (csr->isAnyDirectSubmissionEnabled()) { - auto lock = csr->obtainUniqueOwnership(); - csr->stopDirectSubmission(true); + csr->stopDirectSubmission(true, true); } } } @@ -1272,8 +1271,7 @@ void Device::stopDirectSubmissionForCopyEngine() { } auto regularBcs = regularBcsEngine->commandStreamReceiver; if (regularBcs->isAnyDirectSubmissionEnabled()) { - auto lock = regularBcs->obtainUniqueOwnership(); - regularBcs->stopDirectSubmission(false); + regularBcs->stopDirectSubmission(false, true); } } diff --git a/shared/source/direct_submission/direct_submission_controller.cpp b/shared/source/direct_submission/direct_submission_controller.cpp index 968bba1dba..f4071f5470 100644 --- a/shared/source/direct_submission/direct_submission_controller.cpp +++ b/shared/source/direct_submission/direct_submission_controller.cpp @@ -162,7 +162,7 @@ void DirectSubmissionController::checkNewSubmissions() { } auto lock = csr->obtainUniqueOwnership(); if (!isCsrIdleDetectionEnabled || isDirectSubmissionIdle(csr, lock)) { - csr->stopDirectSubmission(false); + csr->stopDirectSubmission(false, false); state.isStopped = true; shouldRecalculateTimeout = true; this->lowestThrottleSubmitted = QueueThrottle::HIGH; diff --git a/shared/source/os_interface/linux/drm_buffer_object.cpp b/shared/source/os_interface/linux/drm_buffer_object.cpp index 707feb1000..8e5a4b0785 100644 --- a/shared/source/os_interface/linux/drm_buffer_object.cpp +++ b/shared/source/os_interface/linux/drm_buffer_object.cpp @@ -352,8 +352,7 @@ int BufferObject::validateHostPtr(BufferObject *const boToPin[], size_t numberOf const auto &engines = this->drm->getRootDeviceEnvironment().executionEnvironment.memoryManager->getRegisteredEngines(osContext->getRootDeviceIndex()); for (const auto &engine : engines) { if (engine.osContext->isDirectSubmissionLightActive()) { - auto lock = engine.commandStreamReceiver->obtainUniqueOwnership(); - engine.commandStreamReceiver->stopDirectSubmission(false); + engine.commandStreamReceiver->stopDirectSubmission(false, true); } } } diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp index 5b676f3371..0a7c871026 100644 --- a/shared/source/os_interface/linux/drm_neo.cpp +++ b/shared/source/os_interface/linux/drm_neo.cpp @@ -875,8 +875,7 @@ int Drm::waitHandle(uint32_t waitHandle, int64_t timeout) { for (const auto &engines : mulitEngines) { for (const auto &engine : engines) { if (engine.osContext->isDirectSubmissionLightActive()) { - auto lock = engine.commandStreamReceiver->obtainUniqueOwnership(); - engine.commandStreamReceiver->stopDirectSubmission(false); + engine.commandStreamReceiver->stopDirectSubmission(false, true); } } } diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index c69c0852a5..6798ee82b2 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -743,8 +743,7 @@ void WddmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation if (engine.commandStreamReceiver->pageTableManager.get()) { std::unique_lock lock; if (engine.commandStreamReceiver->isAnyDirectSubmissionEnabled()) { - lock = engine.commandStreamReceiver->obtainUniqueOwnership(); - engine.commandStreamReceiver->stopDirectSubmission(true); + engine.commandStreamReceiver->stopDirectSubmission(true, true); } [[maybe_unused]] auto status = engine.commandStreamReceiver->pageTableManager->updateAuxTable(input->getGpuAddress(), defaultGmm, false); DEBUG_BREAK_IF(!status); diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 8fecd5fc1b..263d02e3db 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -17,6 +17,7 @@ #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" +#include "shared/test/common/test_macros/mock_method_macros.h" #include #include @@ -524,11 +525,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { return *flushReturnValue; } - void stopDirectSubmission(bool blocking) override { + void stopDirectSubmission(bool blocking, bool needsLock) override { stopDirectSubmissionCalled = true; stopDirectSubmissionCalledBlocking = blocking; if (this->callBaseStopDirectSubmission) { - BaseClass::stopDirectSubmission(blocking); + BaseClass::stopDirectSubmission(blocking, needsLock); } } @@ -563,7 +564,12 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { flushHandlerCalled++; return BaseClass::flushHandler(batchBuffer, allocationsForResidency); } - + bool isAnyDirectSubmissionEnabled() const override { + if (isAnyDirectSubmissionEnabledCallBase) { + return BaseClass::isAnyDirectSubmissionEnabled(); + } + return isAnyDirectSubmissionEnabledResult; + } std::vector aubCommentMessages; BatchBuffer latestFlushedBatchBuffer = {}; @@ -597,6 +603,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { uint32_t initializeDeviceWithFirstSubmissionCalled = 0; uint32_t drainPagingFenceQueueCalled = 0; uint32_t flushHandlerCalled = 0; + uint32_t obtainUniqueOwnershipCalledTimes = 0; mutable uint32_t checkGpuHangDetectedCalled = 0; int ensureCommandBufferAllocationCalled = 0; DispatchFlags recordedDispatchFlags; @@ -645,6 +652,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { bool stopDirectSubmissionCalledBlocking = false; bool registeredDcFlushForDcFlushMitigation = false; bool isUserFenceWaitSupported = false; + bool isAnyDirectSubmissionEnabledCallBase = true; + bool isAnyDirectSubmissionEnabledResult = true; }; } // namespace NEO diff --git a/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h b/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h index 0c6d3fe293..d0efaffe45 100644 --- a/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h +++ b/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h @@ -129,7 +129,7 @@ class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiverdrm = proxyDrm; } - void stopDirectSubmission(bool blocking) override { + void stopDirectSubmission(bool blocking, bool needsLock) override { stopDirectSubmissionCalled = true; } diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index c9817203e7..aad5add46a 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -202,7 +202,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { programStallingCommandsForBarrierCalled = true; } - void stopDirectSubmission(bool blocking) override { + void stopDirectSubmission(bool blocking, bool needsLock) override { this->blockingStopDirectSubmissionCalled = blocking; stopDirectSubmissionCalledTimes++; } diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 033e1fab95..a896dbb0fa 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -903,6 +903,37 @@ HWTEST_F(CommandStreamReceiverTest, whenClearColorAllocationIsCreatedThenItIsDes EXPECT_EQ(nullptr, csr.clearColorAllocation); } +HWTEST_F(CommandStreamReceiverTest, givenCsrWhenUllsEnabledAndStopDirectSubmissionCalledThenObtainOwnershipIsCalled) { + auto &csr = pDevice->getUltCommandStreamReceiver(); + auto ownershipCalledBefore = csr.recursiveLockCounter.load(); + csr.isAnyDirectSubmissionEnabledResult = true; + csr.isAnyDirectSubmissionEnabledCallBase = false; + auto directSubmission = new MockDirectSubmissionHw>(csr); + csr.directSubmission.reset(directSubmission); + csr.stopDirectSubmission(false, true); + EXPECT_EQ(csr.recursiveLockCounter, ownershipCalledBefore + 1u); +} + +HWTEST_F(CommandStreamReceiverTest, givenCsrWhenUllsEnabledAndStopDirectSubmissionCalledWithLockNotNeededThenObtainOwnershipIsNotCalled) { + auto &csr = pDevice->getUltCommandStreamReceiver(); + auto ownershipCalledBefore = csr.recursiveLockCounter.load(); + csr.isAnyDirectSubmissionEnabledResult = true; + csr.isAnyDirectSubmissionEnabledCallBase = false; + auto directSubmission = new MockDirectSubmissionHw>(csr); + csr.directSubmission.reset(directSubmission); + csr.stopDirectSubmission(false, false); + EXPECT_EQ(csr.recursiveLockCounter, ownershipCalledBefore); +} + +HWTEST_F(CommandStreamReceiverTest, givenCsrWhenUllsDisabledAndStopDirectSubmissionCalledThenObtainOwnershipIsNotCalled) { + auto &csr = pDevice->getUltCommandStreamReceiver(); + auto ownershipCalledBefore = csr.recursiveLockCounter.load(); + csr.isAnyDirectSubmissionEnabledResult = false; + csr.isAnyDirectSubmissionEnabledCallBase = false; + csr.stopDirectSubmission(false, true); + EXPECT_EQ(csr.recursiveLockCounter, ownershipCalledBefore); +} + HWTEST_F(CommandStreamReceiverTest, givenNoDirectSubmissionWhenCheckTaskCountFromWaitEnabledThenReturnsFalse) { auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(csr.isUpdateTagFromWaitEnabled()); diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp index 3bf205b6c3..b04582bc45 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp @@ -74,7 +74,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionDisabledWhenStopThenRingIsNo EXPECT_TRUE(ret); EXPECT_TRUE(directSubmission.ringStart); - csr.stopDirectSubmission(false); + csr.stopDirectSubmission(false, false); EXPECT_TRUE(directSubmission.ringStart); csr.directSubmission.release(); @@ -91,7 +91,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenStopThenRingIsNotStarted EXPECT_TRUE(ret); EXPECT_TRUE(directSubmission.ringStart); - csr.stopDirectSubmission(false); + csr.stopDirectSubmission(false, false); EXPECT_FALSE(directSubmission.ringStart); csr.directSubmission.release(); @@ -112,7 +112,7 @@ HWTEST_F(DirectSubmissionTest, givenBlitterDirectSubmissionWhenStopThenRingIsNot EXPECT_TRUE(ret); EXPECT_TRUE(directSubmission.ringStart); - csr.stopDirectSubmission(false); + csr.stopDirectSubmission(false, false); EXPECT_FALSE(directSubmission.ringStart); csr.blitterDirectSubmission.release();