mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
Revert "fix: Fix DirectSubmissionController deadlock with try_lock pattern"
Related-To: NEO-13325
This reverts commit 40ce5eb55e.
Signed-off-by: Slawomir Milczarek <slawomir.milczarek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
286f973d0c
commit
393c4d0985
@@ -136,10 +136,7 @@ void DirectSubmissionController::checkNewSubmissions() {
|
||||
if (!isBcs && csr->getProductHelper().checkBcsForDirectSubmissionStop()) {
|
||||
isCopyEngineIdle = isCopyEngineOnDeviceIdle(csr->getRootDeviceIndex(), bcsTaskCount);
|
||||
}
|
||||
auto lock = csr->tryObtainUniqueOwnership();
|
||||
if (!lock.owns_lock()) {
|
||||
continue; // Skip this CSR if contended - avoid deadlock
|
||||
}
|
||||
auto lock = csr->obtainUniqueOwnership();
|
||||
if (!isCsrIdleDetectionEnabled || (isDirectSubmissionIdle(csr, lock) && isCopyEngineIdle)) {
|
||||
csr->stopDirectSubmission(false, false);
|
||||
state.isStopped = true;
|
||||
@@ -209,11 +206,7 @@ bool DirectSubmissionController::isDirectSubmissionIdle(CommandStreamReceiver *c
|
||||
|
||||
auto otherKey = ContextGroupKey{otherCsr->getRootDeviceIndex(), otherCsr->getContextGroupId()};
|
||||
if (otherKey == myKey) {
|
||||
auto otherLock = otherCsr->tryObtainUniqueOwnership();
|
||||
if (!otherLock.owns_lock()) {
|
||||
allOthersIdle = false;
|
||||
break; // Treat contended CSR as active - avoid deadlock
|
||||
}
|
||||
auto otherLock = otherCsr->obtainUniqueOwnership();
|
||||
if (!checkCSRIdle(otherCsr, otherLock)) {
|
||||
allOthersIdle = false;
|
||||
break; // Early exit for performance
|
||||
|
||||
@@ -459,75 +459,6 @@ TEST_F(DirectSubmissionIdleDetectionTests, givenDebugFlagSetWhenTaskCountNotUpda
|
||||
EXPECT_EQ(0u, csr->flushTagUpdateCalledTimes);
|
||||
}
|
||||
|
||||
TEST_F(DirectSubmissionIdleDetectionTests, givenSimulatedCsrLockContentionWhenCheckNewSubmissionsCalledThenTryLockSkipsCsr) {
|
||||
// Setup: Make CSR appear idle (should normally be stopped)
|
||||
csr->taskCount.store(10u);
|
||||
csr->setLatestFlushedTaskCount(10u);
|
||||
csr->isBusyReturnValue = false;
|
||||
|
||||
// First, verify normal operation works
|
||||
controller->checkNewSubmissions();
|
||||
EXPECT_TRUE(controller->directSubmissions[csr.get()].isStopped);
|
||||
EXPECT_EQ(1u, csr->stopDirectSubmissionCalledTimes);
|
||||
|
||||
// Reset for contention simulation
|
||||
controller->directSubmissions[csr.get()].isStopped = false;
|
||||
csr->stopDirectSubmissionCalledTimes = 0;
|
||||
|
||||
// Create a contention-simulating CSR that overrides tryObtainUniqueOwnership
|
||||
struct ContentionSimulatingCsr : public TagUpdateMockCommandStreamReceiver {
|
||||
using TagUpdateMockCommandStreamReceiver::TagUpdateMockCommandStreamReceiver;
|
||||
|
||||
bool simulateContention = false;
|
||||
|
||||
std::unique_lock<CommandStreamReceiver::MutexType> tryObtainUniqueOwnership() override {
|
||||
if (simulateContention) {
|
||||
return std::unique_lock<CommandStreamReceiver::MutexType>();
|
||||
}
|
||||
return TagUpdateMockCommandStreamReceiver::tryObtainUniqueOwnership();
|
||||
}
|
||||
};
|
||||
|
||||
// Replace the existing CSR with our contention-simulating one
|
||||
controller->unregisterDirectSubmission(csr.get());
|
||||
|
||||
DeviceBitfield deviceBitfield(1);
|
||||
auto contentionCsr = std::make_unique<ContentionSimulatingCsr>(executionEnvironment, 0, deviceBitfield);
|
||||
auto newOsContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0, 1,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::regular},
|
||||
PreemptionMode::ThreadGroup, deviceBitfield)));
|
||||
contentionCsr->setupContext(*newOsContext);
|
||||
|
||||
contentionCsr->taskCount.store(10u);
|
||||
contentionCsr->setLatestFlushedTaskCount(10u);
|
||||
contentionCsr->isBusyReturnValue = false;
|
||||
|
||||
controller->registerDirectSubmission(contentionCsr.get());
|
||||
controller->checkNewSubmissions();
|
||||
controller->directSubmissions[contentionCsr.get()].isStopped = false;
|
||||
contentionCsr->stopDirectSubmissionCalledTimes = 0;
|
||||
|
||||
// Enable contention simulation - this will make tryObtainUniqueOwnership() fail
|
||||
contentionCsr->simulateContention = true;
|
||||
|
||||
controller->checkNewSubmissions();
|
||||
|
||||
// Verify CSR was NOT stopped due to simulated contended lock (conservative behavior)
|
||||
EXPECT_FALSE(controller->directSubmissions[contentionCsr.get()].isStopped);
|
||||
EXPECT_EQ(0u, contentionCsr->stopDirectSubmissionCalledTimes);
|
||||
|
||||
// Disable contention simulation and verify normal operation resumes
|
||||
contentionCsr->simulateContention = false;
|
||||
|
||||
controller->checkNewSubmissions();
|
||||
|
||||
// Now it should be stopped since no contention exists
|
||||
EXPECT_TRUE(controller->directSubmissions[contentionCsr.get()].isStopped);
|
||||
EXPECT_EQ(1u, contentionCsr->stopDirectSubmissionCalledTimes);
|
||||
|
||||
controller->unregisterDirectSubmission(contentionCsr.get());
|
||||
}
|
||||
|
||||
TEST_F(DirectSubmissionIdleDetectionTests, givenTryLockContentionThenIsCopyEngineOnDeviceIdleReturnsFalse) {
|
||||
struct ContentionSimulatingCsr : public TagUpdateMockCommandStreamReceiver {
|
||||
using TagUpdateMockCommandStreamReceiver::TagUpdateMockCommandStreamReceiver;
|
||||
|
||||
Reference in New Issue
Block a user