diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index f4f19d58d6..0d49c9f3b4 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -970,6 +970,9 @@ bool CommandStreamReceiver::createPreemptionAllocation() { std::unique_lock CommandStreamReceiver::obtainUniqueOwnership() { return std::unique_lock(this->ownershipMutex); } +std::unique_lock CommandStreamReceiver::tryObtainUniqueOwnership() { + return std::unique_lock(this->ownershipMutex, std::try_to_lock); +} std::unique_lock CommandStreamReceiver::obtainHostPtrSurfaceCreationLock() { return std::unique_lock(this->hostPtrSurfaceCreationMutex); } diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 0a8f625adb..25eeb759e6 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -266,6 +266,7 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass { MOCKABLE_VIRTUAL bool createPreemptionAllocation(); MOCKABLE_VIRTUAL bool createPerDssBackedBuffer(Device &device); [[nodiscard]] MOCKABLE_VIRTUAL std::unique_lock obtainUniqueOwnership(); + [[nodiscard]] MOCKABLE_VIRTUAL std::unique_lock tryObtainUniqueOwnership(); bool peekTimestampPacketWriteEnabled() const { return timestampPacketWriteEnabled; } diff --git a/shared/source/direct_submission/direct_submission_controller.cpp b/shared/source/direct_submission/direct_submission_controller.cpp index 7e4130ac84..9253138924 100644 --- a/shared/source/direct_submission/direct_submission_controller.cpp +++ b/shared/source/direct_submission/direct_submission_controller.cpp @@ -136,7 +136,10 @@ void DirectSubmissionController::checkNewSubmissions() { if (!isBcs && csr->getProductHelper().checkBcsForDirectSubmissionStop()) { isCopyEngineIdle = isCopyEngineOnDeviceIdle(csr->getRootDeviceIndex(), bcsTaskCount); } - auto lock = csr->obtainUniqueOwnership(); + auto lock = csr->tryObtainUniqueOwnership(); + if (!lock.owns_lock()) { + continue; // Skip this CSR if contended - avoid deadlock + } if (!isCsrIdleDetectionEnabled || (isDirectSubmissionIdle(csr, lock) && isCopyEngineIdle)) { csr->stopDirectSubmission(false, false); state.isStopped = true; @@ -206,7 +209,11 @@ bool DirectSubmissionController::isDirectSubmissionIdle(CommandStreamReceiver *c auto otherKey = ContextGroupKey{otherCsr->getRootDeviceIndex(), otherCsr->getContextGroupId()}; if (otherKey == myKey) { - auto otherLock = otherCsr->obtainUniqueOwnership(); + auto otherLock = otherCsr->tryObtainUniqueOwnership(); + if (!otherLock.owns_lock()) { + allOthersIdle = false; + break; // Treat contended CSR as active - avoid deadlock + } if (!checkCSRIdle(otherCsr, otherLock)) { allOthersIdle = false; break; // Early exit for performance diff --git a/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp b/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp index ca71c40004..b234aacb30 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp @@ -459,6 +459,75 @@ TEST_F(DirectSubmissionIdleDetectionTests, givenDebugFlagSetWhenTaskCountNotUpda EXPECT_EQ(0u, csr->flushTagUpdateCalledTimes); } +TEST_F(DirectSubmissionIdleDetectionTests, givenSimulatedCsrLockContentionWhenCheckNewSubmissionsCalledThenTryLockSkipsCsr) { + // Setup: Make CSR appear idle (should normally be stopped) + csr->taskCount.store(10u); + csr->setLatestFlushedTaskCount(10u); + csr->isBusyReturnValue = false; + + // First, verify normal operation works + controller->checkNewSubmissions(); + EXPECT_TRUE(controller->directSubmissions[csr.get()].isStopped); + EXPECT_EQ(1u, csr->stopDirectSubmissionCalledTimes); + + // Reset for contention simulation + controller->directSubmissions[csr.get()].isStopped = false; + csr->stopDirectSubmissionCalledTimes = 0; + + // Create a contention-simulating CSR that overrides tryObtainUniqueOwnership + struct ContentionSimulatingCsr : public TagUpdateMockCommandStreamReceiver { + using TagUpdateMockCommandStreamReceiver::TagUpdateMockCommandStreamReceiver; + + bool simulateContention = false; + + std::unique_lock tryObtainUniqueOwnership() override { + if (simulateContention) { + return std::unique_lock(); + } + return TagUpdateMockCommandStreamReceiver::tryObtainUniqueOwnership(); + } + }; + + // Replace the existing CSR with our contention-simulating one + controller->unregisterDirectSubmission(csr.get()); + + DeviceBitfield deviceBitfield(1); + auto contentionCsr = std::make_unique(executionEnvironment, 0, deviceBitfield); + auto newOsContext = std::unique_ptr(OsContext::create(nullptr, 0, 1, + EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::regular}, + PreemptionMode::ThreadGroup, deviceBitfield))); + contentionCsr->setupContext(*newOsContext); + + contentionCsr->taskCount.store(10u); + contentionCsr->setLatestFlushedTaskCount(10u); + contentionCsr->isBusyReturnValue = false; + + controller->registerDirectSubmission(contentionCsr.get()); + controller->checkNewSubmissions(); + controller->directSubmissions[contentionCsr.get()].isStopped = false; + contentionCsr->stopDirectSubmissionCalledTimes = 0; + + // Enable contention simulation - this will make tryObtainUniqueOwnership() fail + contentionCsr->simulateContention = true; + + controller->checkNewSubmissions(); + + // Verify CSR was NOT stopped due to simulated contended lock (conservative behavior) + EXPECT_FALSE(controller->directSubmissions[contentionCsr.get()].isStopped); + EXPECT_EQ(0u, contentionCsr->stopDirectSubmissionCalledTimes); + + // Disable contention simulation and verify normal operation resumes + contentionCsr->simulateContention = false; + + controller->checkNewSubmissions(); + + // Now it should be stopped since no contention exists + EXPECT_TRUE(controller->directSubmissions[contentionCsr.get()].isStopped); + EXPECT_EQ(1u, contentionCsr->stopDirectSubmissionCalledTimes); + + controller->unregisterDirectSubmission(contentionCsr.get()); +} + struct DirectSubmissionCheckForCopyEngineIdleTests : public ::testing::Test { void SetUp() override { controller = std::make_unique();