From db0b4a616ced0d599033b3991dca93e4f6116d29 Mon Sep 17 00:00:00 2001 From: Igor Venevtsev Date: Fri, 10 Oct 2025 17:18:05 +0000 Subject: [PATCH] fix: use condition variables instead of busy waits in worker threads Resolves: NEO-16085, GSD-11678, HSD-14025819208 Signed-off-by: Igor Venevtsev --- .../direct_submission_controller_tests_mt.cpp | 98 ++++++++++--------- .../unified_memory_reuse_cleaner_tests_mt.cpp | 52 +++++++--- .../command_stream_receiver.cpp | 7 -- .../command_stream/command_stream_receiver.h | 2 - .../command_stream_receiver_hw_base.inl | 4 +- .../direct_submission_controller.cpp | 61 ++++++------ .../direct_submission_controller.h | 10 +- .../direct_submission_hw.inl | 4 + .../direct_submission_controller_linux.cpp | 2 +- .../direct_submission_controller_windows.cpp | 2 +- .../memory_manager/unified_memory_manager.cpp | 13 ++- .../memory_manager/unified_memory_manager.h | 2 + .../unified_memory_reuse_cleaner.cpp | 31 +++--- .../unified_memory_reuse_cleaner.h | 13 ++- .../mocks/mock_usm_memory_reuse_cleaner.h | 16 ++- .../command_stream_receiver_tests.cpp | 8 +- .../direct_submission_controller_mock.h | 15 ++- .../windows/device_command_stream_tests.cpp | 6 -- 18 files changed, 205 insertions(+), 141 deletions(-) diff --git a/opencl/test/unit_test/mt_tests/direct_submission/direct_submission_controller_tests_mt.cpp b/opencl/test/unit_test/mt_tests/direct_submission/direct_submission_controller_tests_mt.cpp index ecb86b92fb..b99775466c 100644 --- a/opencl/test/unit_test/mt_tests/direct_submission/direct_submission_controller_tests_mt.cpp +++ b/opencl/test/unit_test/mt_tests/direct_submission/direct_submission_controller_tests_mt.cpp @@ -16,7 +16,7 @@ namespace NEO { -TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenTimeoutThenDirectSubmissionsAreChecked) { +TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenNewSubmissionThenDirectSubmissionsAreChecked) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.initializeMemoryManager(); @@ -28,54 +28,57 @@ TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenTimeo EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::regular}, PreemptionMode::ThreadGroup, deviceBitfield))); csr.setupContext(*osContext.get()); - csr.initializeTagAllocation(); - *csr.tagAddress = 9u; - csr.taskCount.store(9u); DirectSubmissionControllerMock controller; executionEnvironment.directSubmissionController.reset(&controller); controller.timeoutElapsedReturnValue.store(TimeoutElapsedMode::fullyElapsed); - controller.startThread(); - csr.startControllingDirectSubmissions(); controller.registerDirectSubmission(&csr); + controller.startThread(); + // Nothing to do, we are deep sleeping on condition var + while (!controller.inDeepSleep.load()) { + std::this_thread::yield(); + } - while (controller.directSubmissions[&csr].taskCount != 9u) { + EXPECT_TRUE(controller.inDeepSleep.load()); + EXPECT_FALSE(controller.handlePagingFenceRequestsCalled.load()); + EXPECT_FALSE(controller.sleepCalled.load()); + EXPECT_FALSE(controller.checkNewSubmissionCalled.load()); + + // Wake up controller with new submission, work should be done and wait again + csr.taskCount = 10; + controller.notifyNewSubmission(); + + // We need to sleep here to give worker thread a chance to wake up, we can not check inDeepSleep immediately here + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + while (!controller.inDeepSleep.load()) { std::this_thread::yield(); } - while (!controller.directSubmissions[&csr].isStopped) { - std::this_thread::yield(); - } - { - std::lock_guard lock(controller.directSubmissionsMutex); - EXPECT_NE(controller.directSubmissionControllingThread.get(), nullptr); - EXPECT_TRUE(controller.directSubmissions[&csr].isStopped); - EXPECT_EQ(controller.directSubmissions[&csr].taskCount, 9u); - } + + // Work is done, verify results + EXPECT_TRUE(controller.inDeepSleep.load()); + EXPECT_TRUE(controller.handlePagingFenceRequestsCalled.load()); + EXPECT_TRUE(controller.sleepCalled.load()); + EXPECT_TRUE(controller.checkNewSubmissionCalled.load()); + EXPECT_NE(controller.directSubmissionControllingThread.get(), nullptr); + EXPECT_TRUE(controller.directSubmissions[&csr].isStopped.load()); + EXPECT_EQ(10u, controller.directSubmissions[&csr].taskCount.load()); + EXPECT_EQ(10u, csr.peekTaskCount()); + controller.stopThread(); controller.unregisterDirectSubmission(&csr); executionEnvironment.directSubmissionController.release(); } -TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWithStartedControllingWhenShuttingDownThenNoHang) { - DirectSubmissionControllerMock controller; - controller.startThread(); - EXPECT_NE(controller.directSubmissionControllingThread.get(), nullptr); - controller.startControlling(); - - while (!controller.sleepCalled) { - std::this_thread::yield(); - } - controller.stopThread(); -} - -TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWithNotStartedControllingWhenShuttingDownThenNoHang) { +TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenShuttingDownThenNoHang) { DirectSubmissionControllerMock controller; controller.startThread(); EXPECT_NE(controller.directSubmissionControllingThread.get(), nullptr); - while (!controller.sleepCalled) { + while (!controller.inDeepSleep.load()) { std::this_thread::yield(); } + + EXPECT_TRUE(controller.inDeepSleep.load()); controller.stopThread(); } @@ -91,29 +94,32 @@ TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenEnque DirectSubmissionControllerMock controller; controller.sleepCalled.store(false); controller.startThread(); - while (!controller.sleepCalled) { + + // Nothing to do, deep sleep + while (!controller.inDeepSleep.load()) { std::this_thread::yield(); } - EXPECT_EQ(0u, csr.pagingFenceValueToUnblock); + + EXPECT_TRUE(controller.inDeepSleep.load()); + EXPECT_FALSE(controller.handlePagingFenceRequestsCalled.load()); + EXPECT_FALSE(controller.sleepCalled.load()); + EXPECT_FALSE(controller.checkNewSubmissionCalled.load()); + + // Wake up controller with paging fence, work should be done and wait again + controller.inDeepSleep.store(false); + EXPECT_FALSE(controller.inDeepSleep.load()); controller.enqueueWaitForPagingFence(&csr, 10u); - // Wait until csr is not updated - while (csr.pagingFenceValueToUnblock == 0u) { + while (!controller.inDeepSleep.load()) { std::this_thread::yield(); } + + EXPECT_TRUE(controller.inDeepSleep.load()); + EXPECT_TRUE(controller.sleepCalled.load()); + EXPECT_TRUE(controller.handlePagingFenceRequestsCalled.load()); + EXPECT_TRUE(controller.checkNewSubmissionCalled.load()); EXPECT_EQ(10u, csr.pagingFenceValueToUnblock); - - // Verify that controller is able to handle requests during controlling - controller.startControlling(); - - controller.enqueueWaitForPagingFence(&csr, 20u); - - while (csr.pagingFenceValueToUnblock == 10u) { - std::this_thread::yield(); - } - EXPECT_EQ(20u, csr.pagingFenceValueToUnblock); - controller.stopThread(); } -} // namespace NEO \ No newline at end of file +} // namespace NEO diff --git a/opencl/test/unit_test/mt_tests/memory_manager/unified_memory_reuse_cleaner_tests_mt.cpp b/opencl/test/unit_test/mt_tests/memory_manager/unified_memory_reuse_cleaner_tests_mt.cpp index 03c6fdcac5..bd355886fe 100644 --- a/opencl/test/unit_test/mt_tests/memory_manager/unified_memory_reuse_cleaner_tests_mt.cpp +++ b/opencl/test/unit_test/mt_tests/memory_manager/unified_memory_reuse_cleaner_tests_mt.cpp @@ -5,12 +5,16 @@ * */ +#include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h" #include "shared/test/common/test_macros/test.h" namespace NEO { -TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWhenSleepExpiredThenTrimOldInCachesIsCalled) { - MockUnifiedMemoryReuseCleaner cleaner(false); +TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWhenCachesAreEmptyThenWorkerThreadIsWaitingOnConditionVar) { + MockMemoryManager mockMemoryManager; + mockMemoryManager.executionEnvironment.unifiedMemoryReuseCleaner.reset(new MockUnifiedMemoryReuseCleaner(false)); + MockUnifiedMemoryReuseCleaner &cleaner = *static_cast(mockMemoryManager.executionEnvironment.unifiedMemoryReuseCleaner.get()); + cleaner.callBaseStartThread = true; cleaner.callBaseTrimOldInCaches = false; EXPECT_EQ(nullptr, cleaner.unifiedMemoryReuseCleanerThread); @@ -22,23 +26,49 @@ TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWhenSleepEx cleaner.startThread(); EXPECT_EQ(cleanerThread, cleaner.unifiedMemoryReuseCleanerThread.get()); } - EXPECT_FALSE(cleaner.runCleaning.load()); EXPECT_TRUE(cleaner.keepCleaning.load()); - EXPECT_FALSE(cleaner.trimOldInCachesCalled); - cleaner.registerSvmAllocationCache(nullptr); - EXPECT_TRUE(cleaner.runCleaning.load()); - - while (false == cleaner.trimOldInCachesCalled) { + // Nothing to do, sleeping on condition var + while (!cleaner.waitOnConditionVar.load()) { std::this_thread::yield(); } + EXPECT_TRUE(cleaner.waitOnConditionVar.load()); + EXPECT_TRUE(cleaner.isEmpty()); + EXPECT_FALSE(cleaner.trimOldInCachesCalled); + + auto svmAllocCache = std::make_unique(); + + constexpr size_t svmAllocSize = 1024; + mockMemoryManager.usmReuseInfo.init(svmAllocSize, svmAllocSize); + svmAllocCache->memoryManager = &mockMemoryManager; + cleaner.registerSvmAllocationCache(svmAllocCache.get()); + + // Caches are empty, ensure cleaner thread is still waiting on condition var + while (!cleaner.waitOnConditionVar.load()) { + std::this_thread::yield(); + } + EXPECT_TRUE(cleaner.waitOnConditionVar.load()); + EXPECT_TRUE(cleaner.isEmpty()); + EXPECT_FALSE(cleaner.trimOldInCachesCalled); + + // Wake cleaner thread to proceed some data + cleaner.waitOnConditionVar.store(false); + EXPECT_FALSE(cleaner.waitOnConditionVar.load()); + SvmAllocationData allocData{0}; + svmAllocCache->insert(svmAllocSize, nullptr, &allocData, false); + while (!cleaner.waitOnConditionVar.load()) { + std::this_thread::yield(); + } + EXPECT_TRUE(cleaner.waitOnConditionVar.load()); + EXPECT_TRUE(cleaner.isEmpty()); + EXPECT_TRUE(cleaner.trimOldInCachesCalled); + cleaner.stopThread(); EXPECT_EQ(nullptr, cleaner.unifiedMemoryReuseCleanerThread); - EXPECT_FALSE(cleaner.runCleaning.load()); EXPECT_FALSE(cleaner.keepCleaning.load()); } -TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWithNotStartedCleaningWhenShuttingDownThenNoHang) { +TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWhenShuttingDownThenNoHang) { MockUnifiedMemoryReuseCleaner cleaner(false); cleaner.callBaseStartThread = true; cleaner.callBaseTrimOldInCaches = false; @@ -49,4 +79,4 @@ TEST(UnifiedMemoryReuseCleanerTestsMt, givenUnifiedMemoryReuseCleanerWithNotStar cleaner.stopThread(); } -} // namespace NEO \ No newline at end of file +} // namespace NEO diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index f10b924171..867c53e7dd 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -653,13 +653,6 @@ void CommandStreamReceiver::downloadAllocation(GraphicsAllocation &gfxAllocation } } -void CommandStreamReceiver::startControllingDirectSubmissions() { - auto controller = this->executionEnvironment.directSubmissionController.get(); - if (controller) { - controller->startControlling(); - } -} - bool CommandStreamReceiver::enqueueWaitForPagingFence(uint64_t pagingFenceValue) { auto controller = this->executionEnvironment.directSubmissionController.get(); if (this->isAnyDirectSubmissionEnabled() && controller) { diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 95073c5266..fd8013e0a9 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -348,8 +348,6 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass { uint32_t getRootDeviceIndex() const { return rootDeviceIndex; } - MOCKABLE_VIRTUAL void startControllingDirectSubmissions(); - MOCKABLE_VIRTUAL bool isAnyDirectSubmissionEnabled() const { return this->isDirectSubmissionEnabled() || isBlitterDirectSubmissionEnabled(); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index ed5616a2ba..acdc4d1d38 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1302,7 +1302,7 @@ SubmissionStatus CommandStreamReceiverHw::flushSmallTask(LinearStream this->latestSentTaskCount = taskCount + 1; auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations()); if (submissionStatus == SubmissionStatus::success) { - taskCount++; + ++taskCount; } return submissionStatus; } @@ -1465,7 +1465,6 @@ inline bool CommandStreamReceiverHw::initDirectSubmission() { if (directSubmissionController) { directSubmissionController->registerDirectSubmission(this); } - this->startControllingDirectSubmissions(); if (this->isUpdateTagFromWaitEnabled()) { this->overrideDispatchPolicy(DispatchMode::immediateDispatch); } @@ -1478,6 +1477,7 @@ inline bool CommandStreamReceiverHw::initDirectSubmission() { } } } + return ret; } diff --git a/shared/source/direct_submission/direct_submission_controller.cpp b/shared/source/direct_submission/direct_submission_controller.cpp index 9d6fbec34a..bccaac1250 100644 --- a/shared/source/direct_submission/direct_submission_controller.cpp +++ b/shared/source/direct_submission/direct_submission_controller.cpp @@ -62,49 +62,49 @@ void DirectSubmissionController::startThread() { } void DirectSubmissionController::stopThread() { - runControlling.store(false); keepControlling.store(false); + { + std::lock_guard lock(condVarMutex); + condVar.notify_one(); + } if (directSubmissionControllingThread) { directSubmissionControllingThread->join(); directSubmissionControllingThread.reset(); } } -void DirectSubmissionController::startControlling() { - this->runControlling.store(true); -} - void *DirectSubmissionController::controlDirectSubmissionsState(void *self) { auto controller = reinterpret_cast(self); - while (!controller->runControlling.load()) { - if (!controller->keepControlling.load()) { - return nullptr; - } - std::unique_lock lock(controller->condVarMutex); - controller->handlePagingFenceRequests(lock, false); + controller->timeSinceLastCheck = controller->getCpuTimestamp(); + controller->lastHangCheckTime = std::chrono::high_resolution_clock::now(); - auto isControllerNotified = controller->sleep(lock); - if (isControllerNotified) { - controller->handlePagingFenceRequests(lock, false); + while (controller->keepControlling.load()) { + std::unique_lock lock(controller->condVarMutex); + controller->wait(lock); + controller->handlePagingFenceRequests(lock, true); + if (controller->sleep(lock)) { // Paging Fence Request + controller->handlePagingFenceRequests(lock, true); + } else { // Timeout + lock.unlock(); + controller->checkNewSubmissions(); } } - controller->timeSinceLastCheck = controller->getCpuTimestamp(); - controller->lastHangCheckTime = std::chrono::high_resolution_clock::now(); - while (true) { - if (!controller->keepControlling.load()) { - return nullptr; - } - std::unique_lock lock(controller->condVarMutex); - controller->handlePagingFenceRequests(lock, true); + return nullptr; +} - auto isControllerNotified = controller->sleep(lock); - if (isControllerNotified) { - controller->handlePagingFenceRequests(lock, true); - } - lock.unlock(); - controller->checkNewSubmissions(); +void DirectSubmissionController::wait(std::unique_lock &lock) { + inDeepSleep = true; + condVar.wait(lock, [&]() { return !keepControlling.load() || !pagingFenceRequests.empty() || activeSubmissionsCount > 0; }); + inDeepSleep = false; +} + +void DirectSubmissionController::notifyNewSubmission() { + ++activeSubmissionsCount; + if (inDeepSleep) { + std::lock_guard lock(condVarMutex); + condVar.notify_one(); } } @@ -141,6 +141,7 @@ void DirectSubmissionController::checkNewSubmissions() { csr->stopDirectSubmission(false, false); state.isStopped = true; shouldRecalculateTimeout = true; + --activeSubmissionsCount; } state.taskCount = csr->peekTaskCount(); } else { @@ -278,13 +279,13 @@ void DirectSubmissionController::recalculateTimeout() { } void DirectSubmissionController::enqueueWaitForPagingFence(CommandStreamReceiver *csr, uint64_t pagingFenceValue) { - std::lock_guard lock(this->condVarMutex); + std::lock_guard lock(condVarMutex); pagingFenceRequests.push({csr, pagingFenceValue}); condVar.notify_one(); } void DirectSubmissionController::drainPagingFenceQueue() { - std::lock_guard lock(this->condVarMutex); + std::lock_guard lock(condVarMutex); while (!pagingFenceRequests.empty()) { auto request = pagingFenceRequests.front(); diff --git a/shared/source/direct_submission/direct_submission_controller.h b/shared/source/direct_submission/direct_submission_controller.h index 3448b5572c..30e53bed9c 100644 --- a/shared/source/direct_submission/direct_submission_controller.h +++ b/shared/source/direct_submission/direct_submission_controller.h @@ -61,13 +61,13 @@ class DirectSubmissionController { void unregisterDirectSubmission(CommandStreamReceiver *csr); void startThread(); - void startControlling(); void stopThread(); static bool isSupported(); void enqueueWaitForPagingFence(CommandStreamReceiver *csr, uint64_t pagingFenceValue); void drainPagingFenceQueue(); + void notifyNewSubmission(); protected: struct DirectSubmissionState { @@ -95,10 +95,11 @@ class DirectSubmissionController { }; static void *controlDirectSubmissionsState(void *self); - void checkNewSubmissions(); + MOCKABLE_VIRTUAL void checkNewSubmissions(); bool isDirectSubmissionIdle(CommandStreamReceiver *csr, std::unique_lock &csrLock); bool isCopyEngineOnDeviceIdle(uint32_t rootDeviceIndex, std::optional &bcsTaskCount); MOCKABLE_VIRTUAL bool sleep(std::unique_lock &lock); + MOCKABLE_VIRTUAL void wait(std::unique_lock &lock); MOCKABLE_VIRTUAL SteadyClock::time_point getCpuTimestamp(); MOCKABLE_VIRTUAL void overrideDirectSubmissionTimeouts(const ProductHelper &productHelper); @@ -107,7 +108,7 @@ class DirectSubmissionController { void updateLastSubmittedThrottle(QueueThrottle throttle); size_t getTimeoutParamsMapKey(QueueThrottle throttle, bool acLineStatus); - void handlePagingFenceRequests(std::unique_lock &lock, bool checkForNewSubmissions); + MOCKABLE_VIRTUAL void handlePagingFenceRequests(std::unique_lock &lock, bool checkForNewSubmissions); MOCKABLE_VIRTUAL TimeoutElapsedMode timeoutElapsed(); std::chrono::microseconds getSleepValue() const { return std::chrono::microseconds(this->timeout / this->bcsTimeoutDivisor); } @@ -118,7 +119,8 @@ class DirectSubmissionController { std::unique_ptr directSubmissionControllingThread; std::atomic_bool keepControlling = true; - std::atomic_bool runControlling = false; + std::atomic_bool inDeepSleep = false; + std::atomic_uint activeSubmissionsCount = 0; SteadyClock::time_point timeSinceLastCheck{}; SteadyClock::time_point lastTerminateCpuTimestamp{}; diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index e65517f17d..b58ccccdcd 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -10,6 +10,7 @@ #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" +#include "shared/source/direct_submission/direct_submission_controller.h" #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/direct_submission/relaxed_ordering_helper.h" #include "shared/source/execution_environment/execution_environment.h" @@ -588,6 +589,9 @@ template bool DirectSubmissionHw::submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size, bool needWait, const ResidencyContainer *allocationsForResidency) { if (needStart) { this->ringStart = this->submit(gpuAddress, size, allocationsForResidency); + if (auto controller = rootDeviceEnvironment.executionEnvironment.directSubmissionController.get()) { + controller->notifyNewSubmission(); + } return this->ringStart; } else { if (needWait) { diff --git a/shared/source/direct_submission/linux/direct_submission_controller_linux.cpp b/shared/source/direct_submission/linux/direct_submission_controller_linux.cpp index 679b5d14a6..57d46b7687 100644 --- a/shared/source/direct_submission/linux/direct_submission_controller_linux.cpp +++ b/shared/source/direct_submission/linux/direct_submission_controller_linux.cpp @@ -17,4 +17,4 @@ bool DirectSubmissionController::sleep(std::unique_lock &lock) { void DirectSubmissionController::overrideDirectSubmissionTimeouts(const ProductHelper &productHelper) { } -} // namespace NEO \ No newline at end of file +} // namespace NEO diff --git a/shared/source/direct_submission/windows/direct_submission_controller_windows.cpp b/shared/source/direct_submission/windows/direct_submission_controller_windows.cpp index b6f675ebd0..4ca7aa9979 100644 --- a/shared/source/direct_submission/windows/direct_submission_controller_windows.cpp +++ b/shared/source/direct_submission/windows/direct_submission_controller_windows.cpp @@ -28,4 +28,4 @@ void DirectSubmissionController::overrideDirectSubmissionTimeouts(const ProductH this->maxTimeout = std::chrono::microseconds(maxTimeoutUs); } -} // namespace NEO \ No newline at end of file +} // namespace NEO diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 9126b501cd..c8617ab069 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -67,7 +67,7 @@ bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr, SvmAll return false; } - std::lock_guard lock(this->mtx); + std::unique_lock lock(this->mtx); if (svmData->device ? svmData->device->shouldLimitAllocationsReuse() : memoryManager->shouldLimitAllocationsReuse()) { return false; } @@ -99,8 +99,11 @@ bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr, SvmAll } svmData->isSavedForReuse = true; allocations.emplace(std::lower_bound(allocations.begin(), allocations.end(), size), size, ptr, svmData, waitForCompletion); - if (memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner) { - memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner->startThread(); + empty = false; + if (auto usmReuseCleaner = this->memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner.get()) { + lock.unlock(); + usmReuseCleaner->startThread(); + usmReuseCleaner->notifySvmAllocationsCacheUpdate(); } } if (enablePerformanceLogging) { @@ -110,6 +113,7 @@ bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr, SvmAll .operationType = CacheOperationType::insert, .isSuccess = isSuccess}); } + return isSuccess; } @@ -181,6 +185,7 @@ void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemory svmAllocsManager->reinsertToAllocsForIndirectAccess(*allocationIter->svmData); } allocations.erase(allocationIter); + empty = allocations.empty(); return allocationPtr; } } @@ -215,6 +220,7 @@ void SVMAllocsManager::SvmAllocationCache::trim() { svmAllocsManager->freeSVMAllocImpl(cachedAllocationInfo.allocation, FreePolicyType::none, cachedAllocationInfo.svmData); } this->allocations.clear(); + empty = true; } void SVMAllocsManager::SvmAllocationCache::cleanup() { @@ -299,6 +305,7 @@ void SVMAllocsManager::SvmAllocationCache::trimOldAllocs(std::chrono::high_resol if (trimAll) { std::erase_if(allocations, SvmCacheAllocationInfo::isMarkedForDelete); } + empty = allocations.empty(); } SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) { diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 47bee3b5f9..957f0ac5b7 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -221,6 +221,7 @@ class SVMAllocsManager { static bool allocUtilizationAllows(size_t requestedSize, size_t reuseCandidateSize); static bool alignmentAllows(void *ptr, size_t alignment); bool isInUse(SvmCacheAllocationInfo &cacheAllocInfo); + bool isEmpty() { return empty; }; void *get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties); void trim(); void trimOldAllocs(std::chrono::high_resolution_clock::time_point trimTimePoint, bool trimAll); @@ -234,6 +235,7 @@ class SVMAllocsManager { MemoryManager *memoryManager = nullptr; bool enablePerformanceLogging = false; bool requireUpdatingAllocsForIndirectAccess = false; + std::atomic_bool empty = true; }; enum class FreePolicyType : uint32_t { diff --git a/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp b/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp index 8d05c2610e..fa1e660a87 100644 --- a/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp +++ b/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp @@ -24,7 +24,10 @@ UnifiedMemoryReuseCleaner::~UnifiedMemoryReuseCleaner() { void UnifiedMemoryReuseCleaner::stopThread() { keepCleaning.store(false); - runCleaning.store(false); + { + std::lock_guard lock(condVarMutex); + condVar.notify_one(); + } if (unifiedMemoryReuseCleanerThread) { unifiedMemoryReuseCleanerThread->join(); unifiedMemoryReuseCleanerThread.reset(); @@ -33,26 +36,24 @@ void UnifiedMemoryReuseCleaner::stopThread() { void *UnifiedMemoryReuseCleaner::cleanUnifiedMemoryReuse(void *self) { auto cleaner = reinterpret_cast(self); - while (!cleaner->runCleaning.load()) { - if (!cleaner->keepCleaning.load()) { - return nullptr; - } - NEO::sleep(sleepTime); - } - - while (true) { - if (!cleaner->keepCleaning.load()) { - return nullptr; - } - NEO::sleep(sleepTime); + while (cleaner->keepCleaning.load()) { + std::unique_lock lock(cleaner->condVarMutex); + cleaner->wait(lock); + lock.unlock(); cleaner->trimOldInCaches(); + NEO::sleep(sleepTime); } + return nullptr; +} + +void UnifiedMemoryReuseCleaner::notifySvmAllocationsCacheUpdate() { + std::lock_guard lock(condVarMutex); + condVar.notify_one(); } void UnifiedMemoryReuseCleaner::registerSvmAllocationCache(SvmAllocationCache *cache) { std::lock_guard lockSvmAllocationCaches(this->svmAllocationCachesMutex); this->svmAllocationCaches.push_back(cache); - this->startCleaning(); } void UnifiedMemoryReuseCleaner::unregisterSvmAllocationCache(SvmAllocationCache *cache) { @@ -85,4 +86,4 @@ void UnifiedMemoryReuseCleaner::startThread() { }); } -} // namespace NEO \ No newline at end of file +} // namespace NEO diff --git a/shared/source/memory_manager/unified_memory_reuse_cleaner.h b/shared/source/memory_manager/unified_memory_reuse_cleaner.h index 86803aa7a7..fda42af133 100644 --- a/shared/source/memory_manager/unified_memory_reuse_cleaner.h +++ b/shared/source/memory_manager/unified_memory_reuse_cleaner.h @@ -11,6 +11,7 @@ #include "shared/source/memory_manager/unified_memory_manager.h" #include +#include #include #include #include @@ -33,9 +34,16 @@ class UnifiedMemoryReuseCleaner : NEO::NonCopyableAndNonMovableClass { void registerSvmAllocationCache(SvmAllocationCache *cache); void unregisterSvmAllocationCache(SvmAllocationCache *cache); + MOCKABLE_VIRTUAL void wait(std::unique_lock &lock) { + condVar.wait(lock, [&]() { return !keepCleaning.load() || !isEmpty(); }); + } + MOCKABLE_VIRTUAL bool isEmpty() { + std::unique_lock lock(svmAllocationCachesMutex); + return std::all_of(svmAllocationCaches.begin(), svmAllocationCaches.end(), [](const auto &it) { return it->isEmpty(); }); + } + void notifySvmAllocationsCacheUpdate(); protected: - void startCleaning() { runCleaning.store(true); }; static void *cleanUnifiedMemoryReuse(void *self); MOCKABLE_VIRTUAL void trimOldInCaches(); std::unique_ptr unifiedMemoryReuseCleanerThread; @@ -44,7 +52,8 @@ class UnifiedMemoryReuseCleaner : NEO::NonCopyableAndNonMovableClass { std::mutex svmAllocationCachesMutex; std::once_flag startThreadOnce; - std::atomic_bool runCleaning = false; + std::mutex condVarMutex; + std::condition_variable condVar; std::atomic_bool keepCleaning = true; bool trimAllAllocations = false; diff --git a/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h b/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h index a2810ed24d..0564a00442 100644 --- a/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h +++ b/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h @@ -11,7 +11,6 @@ namespace NEO { struct MockUnifiedMemoryReuseCleaner : public UnifiedMemoryReuseCleaner { public: using UnifiedMemoryReuseCleaner::keepCleaning; - using UnifiedMemoryReuseCleaner::runCleaning; using UnifiedMemoryReuseCleaner::svmAllocationCaches; using UnifiedMemoryReuseCleaner::UnifiedMemoryReuseCleaner; using UnifiedMemoryReuseCleaner::unifiedMemoryReuseCleanerThread; @@ -20,6 +19,8 @@ struct MockUnifiedMemoryReuseCleaner : public UnifiedMemoryReuseCleaner { trimOldInCachesCalled = true; if (callBaseTrimOldInCaches) { UnifiedMemoryReuseCleaner::trimOldInCaches(); + } else { + clearCaches(); } } void startThread() override { @@ -28,9 +29,18 @@ struct MockUnifiedMemoryReuseCleaner : public UnifiedMemoryReuseCleaner { UnifiedMemoryReuseCleaner::startThread(); } }; - bool trimOldInCachesCalled = false; + void wait(std::unique_lock &lock) override { + waitOnConditionVar.store(true); + UnifiedMemoryReuseCleaner::wait(lock); + }; + void clearCaches() { + std::lock_guard lock(svmAllocationCachesMutex); + svmAllocationCaches.clear(); + } + std::atomic_bool trimOldInCachesCalled = false; + std::atomic_bool waitOnConditionVar = false; bool startThreadCalled = false; bool callBaseStartThread = false; bool callBaseTrimOldInCaches = true; }; -} // namespace NEO \ No newline at end of file +} // namespace NEO diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 20e7da23c0..a124300754 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -1168,12 +1168,7 @@ class CommandStreamReceiverHwDirectSubmissionMock : public CommandStreamReceiver return CommandStreamReceiverHw::obtainUniqueOwnership(); } - void startControllingDirectSubmissions() override { - startControllingDirectSubmissionsCalled = true; - } - uint32_t recursiveLockCounter = 0; - bool startControllingDirectSubmissionsCalled = false; }; HWTEST_F(InitDirectSubmissionTest, whenCallInitDirectSubmissionAgainThenItIsNotReinitialized) { @@ -1204,7 +1199,7 @@ HWTEST_F(InitDirectSubmissionTest, whenCallInitDirectSubmissionAgainThenItIsNotR csr.reset(); } -HWTEST_F(InitDirectSubmissionTest, whenCallInitDirectSubmissionThenObtainLockAndInitController) { +HWTEST_F(InitDirectSubmissionTest, whenCallInitDirectSubmissionThenObtainLock) { auto csr = std::make_unique>(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); std::unique_ptr osContext(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), device->getRootDeviceIndex(), 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::regular}, @@ -1218,7 +1213,6 @@ HWTEST_F(InitDirectSubmissionTest, whenCallInitDirectSubmissionThenObtainLockAnd csr->initializeTagAllocation(); csr->initDirectSubmission(); EXPECT_EQ(1u, csr->recursiveLockCounter); - EXPECT_TRUE(csr->startControllingDirectSubmissionsCalled); csr.reset(); } diff --git a/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h b/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h index f450be5452..69d2590b62 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h +++ b/shared/test/unit_test/direct_submission/direct_submission_controller_mock.h @@ -19,6 +19,7 @@ struct DirectSubmissionControllerMock : public DirectSubmissionController { using DirectSubmissionController::directSubmissionsMutex; using DirectSubmissionController::getSleepValue; using DirectSubmissionController::handlePagingFenceRequests; + using DirectSubmissionController::inDeepSleep; using DirectSubmissionController::isCopyEngineOnDeviceIdle; using DirectSubmissionController::isCsrsContextGroupIdleDetectionEnabled; using DirectSubmissionController::isDirectSubmissionIdle; @@ -42,6 +43,16 @@ struct DirectSubmissionControllerMock : public DirectSubmissionController { } } + void handlePagingFenceRequests(std::unique_lock &lock, bool checkForNewSubmissions) override { + handlePagingFenceRequestsCalled.store(true); + DirectSubmissionController::handlePagingFenceRequests(lock, checkForNewSubmissions); + } + + void checkNewSubmissions() override { + checkNewSubmissionCalled.store(true); + DirectSubmissionController::checkNewSubmissions(); + } + SteadyClock::time_point getCpuTimestamp() override { return cpuTimestamp; } @@ -59,6 +70,8 @@ struct DirectSubmissionControllerMock : public DirectSubmissionController { std::atomic sleepReturnValue{false}; std::atomic timeoutElapsedReturnValue{TimeoutElapsedMode::notElapsed}; std::atomic timeoutElapsedCallBase{false}; + std::atomic checkNewSubmissionCalled{false}; + std::atomic handlePagingFenceRequestsCalled{false}; bool callBaseSleepMethod = false; }; -} // namespace NEO \ No newline at end of file +} // namespace NEO diff --git a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp index 870893f2af..b1f81e3418 100644 --- a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp @@ -127,16 +127,12 @@ struct MockWddmCsr : public WddmCommandStreamReceiver { } return ret; } - void startControllingDirectSubmissions() override { - directSubmissionControllerStarted = true; - } uint32_t flushCalledCount = 0; std::unique_ptr recordedCommandBuffer; bool callParentInitDirectSubmission = true; bool initBlitterDirectSubmission = false; uint32_t fillReusableAllocationsListCalled = 0; - bool directSubmissionControllerStarted = false; }; class WddmCommandStreamMockGdiTest : public ::testing::Test { @@ -1182,7 +1178,6 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnR EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); EXPECT_FALSE(csr->isBlitterDirectSubmissionEnabled()); - EXPECT_FALSE(mockCsr->directSubmissionControllerStarted); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); @@ -1229,7 +1224,6 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnB EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); EXPECT_TRUE(csr->isBlitterDirectSubmissionEnabled()); - EXPECT_FALSE(mockCsr->directSubmissionControllerStarted); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer);