From 244dd9b0b44c02ad5cbc50a4c99236c2bc993911 Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Sat, 11 Oct 2025 03:04:39 +0200 Subject: [PATCH] Revert "fix: use condition variables instead of busy waits in worker threads" This reverts commit db0b4a616ced0d599033b3991dca93e4f6116d29. Signed-off-by: Compute-Runtime-Validation --- .../direct_submission_controller_tests_mt.cpp | 96 +++++++++---------- .../unified_memory_reuse_cleaner_tests_mt.cpp | 48 ++-------- .../command_stream_receiver.cpp | 7 ++ .../command_stream/command_stream_receiver.h | 2 + .../command_stream_receiver_hw_base.inl | 4 +- .../direct_submission_controller.cpp | 61 ++++++------ .../direct_submission_controller.h | 10 +- .../direct_submission_hw.inl | 4 - .../direct_submission_controller_linux.cpp | 2 +- .../direct_submission_controller_windows.cpp | 2 +- .../memory_manager/unified_memory_manager.cpp | 13 +-- .../memory_manager/unified_memory_manager.h | 2 - .../unified_memory_reuse_cleaner.cpp | 29 +++--- .../unified_memory_reuse_cleaner.h | 13 +-- .../mocks/mock_usm_memory_reuse_cleaner.h | 16 +--- .../command_stream_receiver_tests.cpp | 8 +- .../direct_submission_controller_mock.h | 15 +-- .../windows/device_command_stream_tests.cpp | 6 ++ 18 files changed, 137 insertions(+), 201 deletions(-) diff --git a/opencl/test/unit_test/mt_tests/direct_submission/direct_submission_controller_tests_mt.cpp b/opencl/test/unit_test/mt_tests/direct_submission/direct_submission_controller_tests_mt.cpp index b99775466c..ecb86b92fb 100644 --- a/opencl/test/unit_test/mt_tests/direct_submission/direct_submission_controller_tests_mt.cpp +++ b/opencl/test/unit_test/mt_tests/direct_submission/direct_submission_controller_tests_mt.cpp @@ -16,7 +16,7 @@ namespace NEO { -TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenNewSubmissionThenDirectSubmissionsAreChecked) { +TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenTimeoutThenDirectSubmissionsAreChecked) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.initializeMemoryManager(); @@ -28,57 +28,54 @@ TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenNewSu EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::regular}, PreemptionMode::ThreadGroup, deviceBitfield))); csr.setupContext(*osContext.get()); + csr.initializeTagAllocation(); + *csr.tagAddress = 9u; + csr.taskCount.store(9u); DirectSubmissionControllerMock controller; executionEnvironment.directSubmissionController.reset(&controller); controller.timeoutElapsedReturnValue.store(TimeoutElapsedMode::fullyElapsed); - controller.registerDirectSubmission(&csr); controller.startThread(); - // Nothing to do, we are deep sleeping on condition var - while (!controller.inDeepSleep.load()) { + csr.startControllingDirectSubmissions(); + controller.registerDirectSubmission(&csr); + + while (controller.directSubmissions[&csr].taskCount != 9u) { std::this_thread::yield(); } - - EXPECT_TRUE(controller.inDeepSleep.load()); - EXPECT_FALSE(controller.handlePagingFenceRequestsCalled.load()); - EXPECT_FALSE(controller.sleepCalled.load()); - EXPECT_FALSE(controller.checkNewSubmissionCalled.load()); - - // Wake up controller with new submission, work should be done and wait again - csr.taskCount = 10; - controller.notifyNewSubmission(); - - // We need to sleep here to give worker thread a chance to wake up, we can not check inDeepSleep immediately here - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - while (!controller.inDeepSleep.load()) { + while (!controller.directSubmissions[&csr].isStopped) { std::this_thread::yield(); } - - // Work is done, verify results - EXPECT_TRUE(controller.inDeepSleep.load()); - EXPECT_TRUE(controller.handlePagingFenceRequestsCalled.load()); - EXPECT_TRUE(controller.sleepCalled.load()); - EXPECT_TRUE(controller.checkNewSubmissionCalled.load()); - EXPECT_NE(controller.directSubmissionControllingThread.get(), nullptr); - EXPECT_TRUE(controller.directSubmissions[&csr].isStopped.load()); - EXPECT_EQ(10u, controller.directSubmissions[&csr].taskCount.load()); - EXPECT_EQ(10u, csr.peekTaskCount()); - + { + std::lock_guard lock(controller.directSubmissionsMutex); + EXPECT_NE(controller.directSubmissionControllingThread.get(), nullptr); + EXPECT_TRUE(controller.directSubmissions[&csr].isStopped); + EXPECT_EQ(controller.directSubmissions[&csr].taskCount, 9u); + } controller.stopThread(); controller.unregisterDirectSubmission(&csr); executionEnvironment.directSubmissionController.release(); } -TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenShuttingDownThenNoHang) { +TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWithStartedControllingWhenShuttingDownThenNoHang) { + DirectSubmissionControllerMock controller; + controller.startThread(); + EXPECT_NE(controller.directSubmissionControllingThread.get(), nullptr); + controller.startControlling(); + + while (!controller.sleepCalled) { + std::this_thread::yield(); + } + controller.stopThread(); +} + +TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWithNotStartedControllingWhenShuttingDownThenNoHang) { DirectSubmissionControllerMock controller; controller.startThread(); EXPECT_NE(controller.directSubmissionControllingThread.get(), nullptr); - while (!controller.inDeepSleep.load()) { + while (!controller.sleepCalled) { std::this_thread::yield(); } - - EXPECT_TRUE(controller.inDeepSleep.load()); controller.stopThread(); } @@ -94,32 +91,29 @@ TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenEnque DirectSubmissionControllerMock controller; controller.sleepCalled.store(false); controller.startThread(); - - // Nothing to do, deep sleep - while (!controller.inDeepSleep.load()) { + while (!controller.sleepCalled) { std::this_thread::yield(); } - - EXPECT_TRUE(controller.inDeepSleep.load()); - EXPECT_FALSE(controller.handlePagingFenceRequestsCalled.load()); - EXPECT_FALSE(controller.sleepCalled.load()); - EXPECT_FALSE(controller.checkNewSubmissionCalled.load()); - - // Wake up controller with paging fence, work should be done and wait again - controller.inDeepSleep.store(false); - EXPECT_FALSE(controller.inDeepSleep.load()); + EXPECT_EQ(0u, csr.pagingFenceValueToUnblock); controller.enqueueWaitForPagingFence(&csr, 10u); - while (!controller.inDeepSleep.load()) { + // Wait until csr is not updated + while (csr.pagingFenceValueToUnblock == 0u) { std::this_thread::yield(); } - - EXPECT_TRUE(controller.inDeepSleep.load()); - EXPECT_TRUE(controller.sleepCalled.load()); - EXPECT_TRUE(controller.handlePagingFenceRequestsCalled.load()); - EXPECT_TRUE(controller.checkNewSubmissionCalled.load()); EXPECT_EQ(10u, csr.pagingFenceValueToUnblock); + + // Verify that controller is able to handle requests during controlling + controller.startControlling(); + + controller.enqueueWaitForPagingFence(&csr, 20u); + + while (csr.pagingFenceValueToUnblock == 10u) { + std::this_thread::yield(); + } + EXPECT_EQ(20u, csr.pagingFenceValueToUnblock); + controller.stopThread(); } -} // namespace NEO +} // namespace NEO \ No newline at end of file diff --git a/opencl/test/unit_test/mt_tests/memory_manager/unified_memory_reuse_cleaner_tests_mt.cpp b/opencl/test/unit_test/mt_tests/memory_manager/unified_memory_reuse_cleaner_tests_mt.cpp index bd355886fe..03c6fdcac5 100644 --- a/opencl/test/unit_test/mt_tests/memory_manager/unified_memory_reuse_cleaner_tests_mt.cpp +++ b/opencl/test/unit_test/mt_tests/memory_manager/unified_memory_reuse_cleaner_tests_mt.cpp @@ -5,16 +5,12 @@ * */ -#include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h" #include "shared/test/common/test_macros/test.h" namespace NEO { -TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWhenCachesAreEmptyThenWorkerThreadIsWaitingOnConditionVar) { - MockMemoryManager mockMemoryManager; - mockMemoryManager.executionEnvironment.unifiedMemoryReuseCleaner.reset(new MockUnifiedMemoryReuseCleaner(false)); - MockUnifiedMemoryReuseCleaner &cleaner = *static_cast(mockMemoryManager.executionEnvironment.unifiedMemoryReuseCleaner.get()); - +TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWhenSleepExpiredThenTrimOldInCachesIsCalled) { + MockUnifiedMemoryReuseCleaner cleaner(false); cleaner.callBaseStartThread = true; cleaner.callBaseTrimOldInCaches = false; EXPECT_EQ(nullptr, cleaner.unifiedMemoryReuseCleanerThread); @@ -26,49 +22,23 @@ TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWhenCachesA cleaner.startThread(); EXPECT_EQ(cleanerThread, cleaner.unifiedMemoryReuseCleanerThread.get()); } + EXPECT_FALSE(cleaner.runCleaning.load()); EXPECT_TRUE(cleaner.keepCleaning.load()); - // Nothing to do, sleeping on condition var - while (!cleaner.waitOnConditionVar.load()) { - std::this_thread::yield(); - } - EXPECT_TRUE(cleaner.waitOnConditionVar.load()); - EXPECT_TRUE(cleaner.isEmpty()); EXPECT_FALSE(cleaner.trimOldInCachesCalled); + cleaner.registerSvmAllocationCache(nullptr); + EXPECT_TRUE(cleaner.runCleaning.load()); - auto svmAllocCache = std::make_unique(); - - constexpr size_t svmAllocSize = 1024; - mockMemoryManager.usmReuseInfo.init(svmAllocSize, svmAllocSize); - svmAllocCache->memoryManager = &mockMemoryManager; - cleaner.registerSvmAllocationCache(svmAllocCache.get()); - - // Caches are empty, ensure cleaner thread is still waiting on condition var - while (!cleaner.waitOnConditionVar.load()) { + while (false == cleaner.trimOldInCachesCalled) { std::this_thread::yield(); } - EXPECT_TRUE(cleaner.waitOnConditionVar.load()); - EXPECT_TRUE(cleaner.isEmpty()); - EXPECT_FALSE(cleaner.trimOldInCachesCalled); - - // Wake cleaner thread to proceed some data - cleaner.waitOnConditionVar.store(false); - EXPECT_FALSE(cleaner.waitOnConditionVar.load()); - SvmAllocationData allocData{0}; - svmAllocCache->insert(svmAllocSize, nullptr, &allocData, false); - while (!cleaner.waitOnConditionVar.load()) { - std::this_thread::yield(); - } - EXPECT_TRUE(cleaner.waitOnConditionVar.load()); - EXPECT_TRUE(cleaner.isEmpty()); - EXPECT_TRUE(cleaner.trimOldInCachesCalled); - cleaner.stopThread(); EXPECT_EQ(nullptr, cleaner.unifiedMemoryReuseCleanerThread); + EXPECT_FALSE(cleaner.runCleaning.load()); EXPECT_FALSE(cleaner.keepCleaning.load()); } -TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWhenShuttingDownThenNoHang) { +TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWithNotStartedCleaningWhenShuttingDownThenNoHang) { MockUnifiedMemoryReuseCleaner cleaner(false); cleaner.callBaseStartThread = true; cleaner.callBaseTrimOldInCaches = false; @@ -79,4 +49,4 @@ TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWhenShuttin cleaner.stopThread(); } -} // namespace NEO +} // namespace NEO \ No newline at end of file diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index d7f7eb68a1..f21008aa7c 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -653,6 +653,13 @@ void CommandStreamReceiver::downloadAllocation(GraphicsAllocation &gfxAllocation } } +void CommandStreamReceiver::startControllingDirectSubmissions() { + auto controller = this->executionEnvironment.directSubmissionController.get(); + if (controller) { + controller->startControlling(); + } +} + bool CommandStreamReceiver::enqueueWaitForPagingFence(uint64_t pagingFenceValue) { auto controller = this->executionEnvironment.directSubmissionController.get(); if (this->isAnyDirectSubmissionEnabled() && controller) { diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index fd8013e0a9..95073c5266 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -348,6 +348,8 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass { uint32_t getRootDeviceIndex() const { return rootDeviceIndex; } + MOCKABLE_VIRTUAL void startControllingDirectSubmissions(); + MOCKABLE_VIRTUAL bool isAnyDirectSubmissionEnabled() const { return this->isDirectSubmissionEnabled() || isBlitterDirectSubmissionEnabled(); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 14428446af..83a6d387e3 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1302,7 +1302,7 @@ SubmissionStatus CommandStreamReceiverHw::flushSmallTask(LinearStream this->latestSentTaskCount = taskCount + 1; auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations()); if (submissionStatus == SubmissionStatus::success) { - ++taskCount; + taskCount++; } return submissionStatus; } @@ -1465,6 +1465,7 @@ inline bool CommandStreamReceiverHw::initDirectSubmission() { if (directSubmissionController) { directSubmissionController->registerDirectSubmission(this); } + this->startControllingDirectSubmissions(); if (this->isUpdateTagFromWaitEnabled()) { this->overrideDispatchPolicy(DispatchMode::immediateDispatch); } @@ -1477,7 +1478,6 @@ inline bool CommandStreamReceiverHw::initDirectSubmission() { } } } - return ret; } diff --git a/shared/source/direct_submission/direct_submission_controller.cpp b/shared/source/direct_submission/direct_submission_controller.cpp index bccaac1250..9d6fbec34a 100644 --- a/shared/source/direct_submission/direct_submission_controller.cpp +++ b/shared/source/direct_submission/direct_submission_controller.cpp @@ -62,49 +62,49 @@ void DirectSubmissionController::startThread() { } void DirectSubmissionController::stopThread() { + runControlling.store(false); keepControlling.store(false); - { - std::lock_guard lock(condVarMutex); - condVar.notify_one(); - } if (directSubmissionControllingThread) { directSubmissionControllingThread->join(); directSubmissionControllingThread.reset(); } } +void DirectSubmissionController::startControlling() { + this->runControlling.store(true); +} + void *DirectSubmissionController::controlDirectSubmissionsState(void *self) { auto controller = reinterpret_cast(self); - controller->timeSinceLastCheck = controller->getCpuTimestamp(); - controller->lastHangCheckTime = std::chrono::high_resolution_clock::now(); - - while (controller->keepControlling.load()) { + while (!controller->runControlling.load()) { + if (!controller->keepControlling.load()) { + return nullptr; + } std::unique_lock lock(controller->condVarMutex); - controller->wait(lock); - controller->handlePagingFenceRequests(lock, true); - if (controller->sleep(lock)) { // Paging Fence Request - controller->handlePagingFenceRequests(lock, true); - } else { // Timeout - lock.unlock(); - controller->checkNewSubmissions(); + controller->handlePagingFenceRequests(lock, false); + + auto isControllerNotified = controller->sleep(lock); + if (isControllerNotified) { + controller->handlePagingFenceRequests(lock, false); } } - return nullptr; -} + controller->timeSinceLastCheck = controller->getCpuTimestamp(); + controller->lastHangCheckTime = std::chrono::high_resolution_clock::now(); + while (true) { + if (!controller->keepControlling.load()) { + return nullptr; + } + std::unique_lock lock(controller->condVarMutex); + controller->handlePagingFenceRequests(lock, true); -void DirectSubmissionController::wait(std::unique_lock &lock) { - inDeepSleep = true; - condVar.wait(lock, [&]() { return !keepControlling.load() || !pagingFenceRequests.empty() || activeSubmissionsCount > 0; }); - inDeepSleep = false; -} - -void DirectSubmissionController::notifyNewSubmission() { - ++activeSubmissionsCount; - if (inDeepSleep) { - std::lock_guard lock(condVarMutex); - condVar.notify_one(); + auto isControllerNotified = controller->sleep(lock); + if (isControllerNotified) { + controller->handlePagingFenceRequests(lock, true); + } + lock.unlock(); + controller->checkNewSubmissions(); } } @@ -141,7 +141,6 @@ void DirectSubmissionController::checkNewSubmissions() { csr->stopDirectSubmission(false, false); state.isStopped = true; shouldRecalculateTimeout = true; - --activeSubmissionsCount; } state.taskCount = csr->peekTaskCount(); } else { @@ -279,13 +278,13 @@ void DirectSubmissionController::recalculateTimeout() { } void DirectSubmissionController::enqueueWaitForPagingFence(CommandStreamReceiver *csr, uint64_t pagingFenceValue) { - std::lock_guard lock(condVarMutex); + std::lock_guard lock(this->condVarMutex); pagingFenceRequests.push({csr, pagingFenceValue}); condVar.notify_one(); } void DirectSubmissionController::drainPagingFenceQueue() { - std::lock_guard lock(condVarMutex); + std::lock_guard lock(this->condVarMutex); while (!pagingFenceRequests.empty()) { auto request = pagingFenceRequests.front(); diff --git a/shared/source/direct_submission/direct_submission_controller.h b/shared/source/direct_submission/direct_submission_controller.h index 30e53bed9c..3448b5572c 100644 --- a/shared/source/direct_submission/direct_submission_controller.h +++ b/shared/source/direct_submission/direct_submission_controller.h @@ -61,13 +61,13 @@ class DirectSubmissionController { void unregisterDirectSubmission(CommandStreamReceiver *csr); void startThread(); + void startControlling(); void stopThread(); static bool isSupported(); void enqueueWaitForPagingFence(CommandStreamReceiver *csr, uint64_t pagingFenceValue); void drainPagingFenceQueue(); - void notifyNewSubmission(); protected: struct DirectSubmissionState { @@ -95,11 +95,10 @@ class DirectSubmissionController { }; static void *controlDirectSubmissionsState(void *self); - MOCKABLE_VIRTUAL void checkNewSubmissions(); + void checkNewSubmissions(); bool isDirectSubmissionIdle(CommandStreamReceiver *csr, std::unique_lock &csrLock); bool isCopyEngineOnDeviceIdle(uint32_t rootDeviceIndex, std::optional &bcsTaskCount); MOCKABLE_VIRTUAL bool sleep(std::unique_lock &lock); - MOCKABLE_VIRTUAL void wait(std::unique_lock &lock); MOCKABLE_VIRTUAL SteadyClock::time_point getCpuTimestamp(); MOCKABLE_VIRTUAL void overrideDirectSubmissionTimeouts(const ProductHelper &productHelper); @@ -108,7 +107,7 @@ class DirectSubmissionController { void updateLastSubmittedThrottle(QueueThrottle throttle); size_t getTimeoutParamsMapKey(QueueThrottle throttle, bool acLineStatus); - MOCKABLE_VIRTUAL void handlePagingFenceRequests(std::unique_lock &lock, bool checkForNewSubmissions); + void handlePagingFenceRequests(std::unique_lock &lock, bool checkForNewSubmissions); MOCKABLE_VIRTUAL TimeoutElapsedMode timeoutElapsed(); std::chrono::microseconds getSleepValue() const { return std::chrono::microseconds(this->timeout / this->bcsTimeoutDivisor); } @@ -119,8 +118,7 @@ class DirectSubmissionController { std::unique_ptr directSubmissionControllingThread; std::atomic_bool keepControlling = true; - std::atomic_bool inDeepSleep = false; - std::atomic_uint activeSubmissionsCount = 0; + std::atomic_bool runControlling = false; SteadyClock::time_point timeSinceLastCheck{}; SteadyClock::time_point lastTerminateCpuTimestamp{}; diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index b58ccccdcd..e65517f17d 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -10,7 +10,6 @@ #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" -#include "shared/source/direct_submission/direct_submission_controller.h" #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/direct_submission/relaxed_ordering_helper.h" #include "shared/source/execution_environment/execution_environment.h" @@ -589,9 +588,6 @@ template bool DirectSubmissionHw::submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size, bool needWait, const ResidencyContainer *allocationsForResidency) { if (needStart) { this->ringStart = this->submit(gpuAddress, size, allocationsForResidency); - if (auto controller = rootDeviceEnvironment.executionEnvironment.directSubmissionController.get()) { - controller->notifyNewSubmission(); - } return this->ringStart; } else { if (needWait) { diff --git a/shared/source/direct_submission/linux/direct_submission_controller_linux.cpp b/shared/source/direct_submission/linux/direct_submission_controller_linux.cpp index 57d46b7687..679b5d14a6 100644 --- a/shared/source/direct_submission/linux/direct_submission_controller_linux.cpp +++ b/shared/source/direct_submission/linux/direct_submission_controller_linux.cpp @@ -17,4 +17,4 @@ bool DirectSubmissionController::sleep(std::unique_lock &lock) { void DirectSubmissionController::overrideDirectSubmissionTimeouts(const ProductHelper &productHelper) { } -} // namespace NEO +} // namespace NEO \ No newline at end of file diff --git a/shared/source/direct_submission/windows/direct_submission_controller_windows.cpp b/shared/source/direct_submission/windows/direct_submission_controller_windows.cpp index 4ca7aa9979..b6f675ebd0 100644 --- a/shared/source/direct_submission/windows/direct_submission_controller_windows.cpp +++ b/shared/source/direct_submission/windows/direct_submission_controller_windows.cpp @@ -28,4 +28,4 @@ void DirectSubmissionController::overrideDirectSubmissionTimeouts(const ProductH this->maxTimeout = std::chrono::microseconds(maxTimeoutUs); } -} // namespace NEO +} // namespace NEO \ No newline at end of file diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 6653b13663..783ae62bdd 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -67,7 +67,7 @@ bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr, SvmAll return false; } - std::unique_lock lock(this->mtx); + std::lock_guard lock(this->mtx); if (svmData->device ? svmData->device->shouldLimitAllocationsReuse() : memoryManager->shouldLimitAllocationsReuse()) { return false; } @@ -99,11 +99,8 @@ bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr, SvmAll } svmData->isSavedForReuse = true; allocations.emplace(std::lower_bound(allocations.begin(), allocations.end(), size), size, ptr, svmData, waitForCompletion); - empty = false; - if (auto usmReuseCleaner = this->memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner.get()) { - lock.unlock(); - usmReuseCleaner->startThread(); - usmReuseCleaner->notifySvmAllocationsCacheUpdate(); + if (memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner) { + memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner->startThread(); } } if (enablePerformanceLogging) { @@ -113,7 +110,6 @@ bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr, SvmAll .operationType = CacheOperationType::insert, .isSuccess = isSuccess}); } - return isSuccess; } @@ -185,7 +181,6 @@ void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemory svmAllocsManager->reinsertToAllocsForIndirectAccess(*allocationIter->svmData); } allocations.erase(allocationIter); - empty = allocations.empty(); return allocationPtr; } } @@ -220,7 +215,6 @@ void SVMAllocsManager::SvmAllocationCache::trim() { svmAllocsManager->freeSVMAllocImpl(cachedAllocationInfo.allocation, FreePolicyType::blocking, cachedAllocationInfo.svmData); } this->allocations.clear(); - empty = true; } void SVMAllocsManager::SvmAllocationCache::cleanup() { @@ -305,7 +299,6 @@ void SVMAllocsManager::SvmAllocationCache::trimOldAllocs(std::chrono::high_resol if (trimAll) { std::erase_if(allocations, SvmCacheAllocationInfo::isMarkedForDelete); } - empty = allocations.empty(); } SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) { diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 957f0ac5b7..47bee3b5f9 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -221,7 +221,6 @@ class SVMAllocsManager { static bool allocUtilizationAllows(size_t requestedSize, size_t reuseCandidateSize); static bool alignmentAllows(void *ptr, size_t alignment); bool isInUse(SvmCacheAllocationInfo &cacheAllocInfo); - bool isEmpty() { return empty; }; void *get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties); void trim(); void trimOldAllocs(std::chrono::high_resolution_clock::time_point trimTimePoint, bool trimAll); @@ -235,7 +234,6 @@ class SVMAllocsManager { MemoryManager *memoryManager = nullptr; bool enablePerformanceLogging = false; bool requireUpdatingAllocsForIndirectAccess = false; - std::atomic_bool empty = true; }; enum class FreePolicyType : uint32_t { diff --git a/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp b/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp index fa1e660a87..8d05c2610e 100644 --- a/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp +++ b/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp @@ -24,10 +24,7 @@ UnifiedMemoryReuseCleaner::~UnifiedMemoryReuseCleaner() { void UnifiedMemoryReuseCleaner::stopThread() { keepCleaning.store(false); - { - std::lock_guard lock(condVarMutex); - condVar.notify_one(); - } + runCleaning.store(false); if (unifiedMemoryReuseCleanerThread) { unifiedMemoryReuseCleanerThread->join(); unifiedMemoryReuseCleanerThread.reset(); @@ -36,24 +33,26 @@ void UnifiedMemoryReuseCleaner::stopThread() { void *UnifiedMemoryReuseCleaner::cleanUnifiedMemoryReuse(void *self) { auto cleaner = reinterpret_cast(self); - while (cleaner->keepCleaning.load()) { - std::unique_lock lock(cleaner->condVarMutex); - cleaner->wait(lock); - lock.unlock(); - cleaner->trimOldInCaches(); + while (!cleaner->runCleaning.load()) { + if (!cleaner->keepCleaning.load()) { + return nullptr; + } NEO::sleep(sleepTime); } - return nullptr; -} -void UnifiedMemoryReuseCleaner::notifySvmAllocationsCacheUpdate() { - std::lock_guard lock(condVarMutex); - condVar.notify_one(); + while (true) { + if (!cleaner->keepCleaning.load()) { + return nullptr; + } + NEO::sleep(sleepTime); + cleaner->trimOldInCaches(); + } } void UnifiedMemoryReuseCleaner::registerSvmAllocationCache(SvmAllocationCache *cache) { std::lock_guard lockSvmAllocationCaches(this->svmAllocationCachesMutex); this->svmAllocationCaches.push_back(cache); + this->startCleaning(); } void UnifiedMemoryReuseCleaner::unregisterSvmAllocationCache(SvmAllocationCache *cache) { @@ -86,4 +85,4 @@ void UnifiedMemoryReuseCleaner::startThread() { }); } -} // namespace NEO +} // namespace NEO \ No newline at end of file diff --git a/shared/source/memory_manager/unified_memory_reuse_cleaner.h b/shared/source/memory_manager/unified_memory_reuse_cleaner.h index fda42af133..86803aa7a7 100644 --- a/shared/source/memory_manager/unified_memory_reuse_cleaner.h +++ b/shared/source/memory_manager/unified_memory_reuse_cleaner.h @@ -11,7 +11,6 @@ #include "shared/source/memory_manager/unified_memory_manager.h" #include -#include #include #include #include @@ -34,16 +33,9 @@ class UnifiedMemoryReuseCleaner : NEO::NonCopyableAndNonMovableClass { void registerSvmAllocationCache(SvmAllocationCache *cache); void unregisterSvmAllocationCache(SvmAllocationCache *cache); - MOCKABLE_VIRTUAL void wait(std::unique_lock &lock) { - condVar.wait(lock, [&]() { return !keepCleaning.load() || !isEmpty(); }); - } - MOCKABLE_VIRTUAL bool isEmpty() { - std::unique_lock lock(svmAllocationCachesMutex); - return std::all_of(svmAllocationCaches.begin(), svmAllocationCaches.end(), [](const auto &it) { return it->isEmpty(); }); - } - void notifySvmAllocationsCacheUpdate(); protected: + void startCleaning() { runCleaning.store(true); }; static void *cleanUnifiedMemoryReuse(void *self); MOCKABLE_VIRTUAL void trimOldInCaches(); std::unique_ptr unifiedMemoryReuseCleanerThread; @@ -52,8 +44,7 @@ class UnifiedMemoryReuseCleaner : NEO::NonCopyableAndNonMovableClass { std::mutex svmAllocationCachesMutex; std::once_flag startThreadOnce; - std::mutex condVarMutex; - std::condition_variable condVar; + std::atomic_bool runCleaning = false; std::atomic_bool keepCleaning = true; bool trimAllAllocations = false; diff --git a/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h b/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h index 0564a00442..a2810ed24d 100644 --- a/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h +++ b/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h @@ -11,6 +11,7 @@ namespace NEO { struct MockUnifiedMemoryReuseCleaner : public UnifiedMemoryReuseCleaner { public: using UnifiedMemoryReuseCleaner::keepCleaning; + using UnifiedMemoryReuseCleaner::runCleaning; using UnifiedMemoryReuseCleaner::svmAllocationCaches; using UnifiedMemoryReuseCleaner::UnifiedMemoryReuseCleaner; using UnifiedMemoryReuseCleaner::unifiedMemoryReuseCleanerThread; @@ -19,8 +20,6 @@ struct MockUnifiedMemoryReuseCleaner : public UnifiedMemoryReuseCleaner { trimOldInCachesCalled = true; if (callBaseTrimOldInCaches) { UnifiedMemoryReuseCleaner::trimOldInCaches(); - } else { - clearCaches(); } } void startThread() override { @@ -29,18 +28,9 @@ struct MockUnifiedMemoryReuseCleaner : public UnifiedMemoryReuseCleaner { UnifiedMemoryReuseCleaner::startThread(); } }; - void wait(std::unique_lock &lock) override { - waitOnConditionVar.store(true); - UnifiedMemoryReuseCleaner::wait(lock); - }; - void clearCaches() { - std::lock_guard lock(svmAllocationCachesMutex); - svmAllocationCaches.clear(); - } - std::atomic_bool trimOldInCachesCalled = false; - std::atomic_bool waitOnConditionVar = false; + bool trimOldInCachesCalled = false; bool startThreadCalled = false; bool callBaseStartThread = false; bool callBaseTrimOldInCaches = true; }; -} // namespace NEO +} // namespace NEO \ No newline at end of file diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index a124300754..20e7da23c0 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -1168,7 +1168,12 @@ class CommandStreamReceiverHwDirectSubmissionMock : public CommandStreamReceiver return CommandStreamReceiverHw::obtainUniqueOwnership(); } + void startControllingDirectSubmissions() override { + startControllingDirectSubmissionsCalled = true; + } + uint32_t recursiveLockCounter = 0; + bool startControllingDirectSubmissionsCalled = false; }; HWTEST_F(InitDirectSubmissionTest, whenCallInitDirectSubmissionAgainThenItIsNotReinitialized) { @@ -1199,7 +1204,7 @@ HWTEST_F(InitDirectSubmissionTest, whenCallInitDirectSubmissionAgainThenItIsNotR csr.reset(); } -HWTEST_F(InitDirectSubmissionTest, whenCallInitDirectSubmissionThenObtainLock) { +HWTEST_F(InitDirectSubmissionTest, whenCallInitDirectSubmissionThenObtainLockAndInitController) { auto csr = std::make_unique>(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); std::unique_ptr osContext(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), device->getRootDeviceIndex(), 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::regular}, @@ -1213,6 +1218,7 @@ HWTEST_F(InitDirectSubmissionTest, whenCallInitDirectSubmissionThenObtainLock) { csr->initializeTagAllocation(); csr->initDirectSubmission(); EXPECT_EQ(1u, csr->recursiveLockCounter); + EXPECT_TRUE(csr->startControllingDirectSubmissionsCalled); csr.reset(); } diff --git a/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h b/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h index 69d2590b62..f450be5452 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h +++ b/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h @@ -19,7 +19,6 @@ struct DirectSubmissionControllerMock : public DirectSubmissionController { using DirectSubmissionController::directSubmissionsMutex; using DirectSubmissionController::getSleepValue; using DirectSubmissionController::handlePagingFenceRequests; - using DirectSubmissionController::inDeepSleep; using DirectSubmissionController::isCopyEngineOnDeviceIdle; using DirectSubmissionController::isCsrsContextGroupIdleDetectionEnabled; using DirectSubmissionController::isDirectSubmissionIdle; @@ -43,16 +42,6 @@ struct DirectSubmissionControllerMock : public DirectSubmissionController { } } - void handlePagingFenceRequests(std::unique_lock &lock, bool checkForNewSubmissions) override { - handlePagingFenceRequestsCalled.store(true); - DirectSubmissionController::handlePagingFenceRequests(lock, checkForNewSubmissions); - } - - void checkNewSubmissions() override { - checkNewSubmissionCalled.store(true); - DirectSubmissionController::checkNewSubmissions(); - } - SteadyClock::time_point getCpuTimestamp() override { return cpuTimestamp; } @@ -70,8 +59,6 @@ struct DirectSubmissionControllerMock : public DirectSubmissionController { std::atomic sleepReturnValue{false}; std::atomic timeoutElapsedReturnValue{TimeoutElapsedMode::notElapsed}; std::atomic timeoutElapsedCallBase{false}; - std::atomic checkNewSubmissionCalled{false}; - std::atomic handlePagingFenceRequestsCalled{false}; bool callBaseSleepMethod = false; }; -} // namespace NEO +} // namespace NEO \ No newline at end of file diff --git a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp index b1f81e3418..870893f2af 100644 --- a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp @@ -127,12 +127,16 @@ struct MockWddmCsr : public WddmCommandStreamReceiver { } return ret; } + void startControllingDirectSubmissions() override { + directSubmissionControllerStarted = true; + } uint32_t flushCalledCount = 0; std::unique_ptr recordedCommandBuffer; bool callParentInitDirectSubmission = true; bool initBlitterDirectSubmission = false; uint32_t fillReusableAllocationsListCalled = 0; + bool directSubmissionControllerStarted = false; }; class WddmCommandStreamMockGdiTest : public ::testing::Test { @@ -1178,6 +1182,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnR EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); EXPECT_FALSE(csr->isBlitterDirectSubmissionEnabled()); + EXPECT_FALSE(mockCsr->directSubmissionControllerStarted); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); @@ -1224,6 +1229,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnB EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); EXPECT_TRUE(csr->isBlitterDirectSubmissionEnabled()); + EXPECT_FALSE(mockCsr->directSubmissionControllerStarted); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer);