From 9c696f1d294858dd0dc5e215c6fc6366e42fb3d8 Mon Sep 17 00:00:00 2001 From: Maciej Dziuban Date: Tue, 16 Oct 2018 15:53:39 +0200 Subject: [PATCH] Create STL-friendly SpinLock implementation Change-Id: Ic04d34d2962ad6953fe3282a10f6cba16c0e07b3 Signed-off-by: Maciej Dziuban --- runtime/gtpin/gtpin_callbacks.cpp | 23 +++---- .../windows/wddm_memory_manager.cpp | 26 +++----- .../windows/wddm_residency_controller.cpp | 29 ++------- .../windows/wddm_residency_controller.h | 13 ++-- runtime/utilities/spinlock.h | 25 ++++++-- unit_tests/utilities/spinlock_tests.cpp | 62 ++++++++++--------- 6 files changed, 79 insertions(+), 99 deletions(-) diff --git a/runtime/gtpin/gtpin_callbacks.cpp b/runtime/gtpin/gtpin_callbacks.cpp index 630eaeafc3..9ee0f8cf6a 100644 --- a/runtime/gtpin/gtpin_callbacks.cpp +++ b/runtime/gtpin/gtpin_callbacks.cpp @@ -35,7 +35,7 @@ const igc_info_t *pIgcInfo = nullptr; std::atomic sequenceCount(1); CommandQueue *pCmdQueueForFlushTask = nullptr; std::deque kernelExecQueue; -std::atomic_flag kernelExecQueueLock = ATOMIC_FLAG_INIT; +SpinLock kernelExecQueueLock; void gtpinNotifyContextCreate(cl_context context) { if (isGTPinInitialized) { @@ -122,10 +122,9 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { kExec.gtpinResource = (cl_mem)resource; kExec.commandBuffer = commandBuffer; kExec.pCommandQueue = (CommandQueue *)pCmdQueue; - SpinLock lock; - lock.enter(kernelExecQueueLock); + std::unique_lock lock{kernelExecQueueLock}; kernelExecQueue.push_back(kExec); - lock.leave(kernelExecQueueLock); + lock.unlock(); // Patch SSH[gtpinBTI] with GT-Pin resource if (!resource) { return; @@ -150,8 +149,7 @@ void gtpinNotifyPreFlushTask(void *pCmdQueue) { void gtpinNotifyFlushTask(uint32_t flushedTaskCount) { if (isGTPinInitialized) { - SpinLock lock; - lock.enter(kernelExecQueueLock); + std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems; n++) { if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid) { @@ -161,15 +159,13 @@ void gtpinNotifyFlushTask(uint32_t flushedTaskCount) { break; } } - lock.leave(kernelExecQueueLock); pCmdQueueForFlushTask = nullptr; } } void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) { if (isGTPinInitialized) { - SpinLock lock; - lock.enter(kernelExecQueueLock); + std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems;) { if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) { @@ -182,14 +178,12 @@ void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) { n++; } } - lock.leave(kernelExecQueueLock); } } void gtpinNotifyMakeResident(void *pKernel, void *pCSR) { if (isGTPinInitialized) { - SpinLock lock; - lock.enter(kernelExecQueueLock); + std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems; n++) { if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) { @@ -203,14 +197,12 @@ void gtpinNotifyMakeResident(void *pKernel, void *pCSR) { break; } } - lock.leave(kernelExecQueueLock); } } void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) { if (isGTPinInitialized) { - SpinLock lock; - lock.enter(kernelExecQueueLock); + std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems; n++) { if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) { @@ -225,7 +217,6 @@ void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) { break; } } - lock.leave(kernelExecQueueLock); } } diff --git a/runtime/os_interface/windows/wddm_memory_manager.cpp b/runtime/os_interface/windows/wddm_memory_manager.cpp index ed6965d512..11f9c4cb4e 100644 --- a/runtime/os_interface/windows/wddm_memory_manager.cpp +++ b/runtime/os_interface/windows/wddm_memory_manager.cpp @@ -32,13 +32,13 @@ WddmMemoryManager::~WddmMemoryManager() { for (auto osContext : this->registeredOsContexts) { if (osContext) { auto &residencyController = osContext->get()->getResidencyController(); - residencyController.acquireTrimCallbackLock(); + + auto lock = residencyController.acquireTrimCallbackLock(); wddm->unregisterTrimCallback(trimCallback, this->trimCallbackHandle); - residencyController.releaseTrimCallbackLock(); + lock.unlock(); // Wait for lock to ensure trimCallback ended - residencyController.acquireTrimCallbackLock(); - residencyController.releaseTrimCallbackLock(); + lock.lock(); } } } @@ -62,9 +62,8 @@ void APIENTRY WddmMemoryManager::trimCallback(_Inout_ D3DKMT_TRIMNOTIFICATION *t return; } - wddmMemMngr->getRegisteredOsContext(0)->get()->getResidencyController().acquireTrimCallbackLock(); + auto lock = wddmMemMngr->getRegisteredOsContext(0)->get()->getResidencyController().acquireTrimCallbackLock(); wddmMemMngr->getRegisteredOsContext(0)->get()->getResidencyController().trimResidency(trimNotification->Flags, trimNotification->NumBytesToTrim); - wddmMemMngr->getRegisteredOsContext(0)->get()->getResidencyController().releaseTrimCallbackLock(); } GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryForImage(ImageInfo &imgInfo, Gmm *gmm) { @@ -315,9 +314,8 @@ void WddmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation for (auto &osContext : this->registeredOsContexts) { if (osContext) { auto &residencyController = osContext->get()->getResidencyController(); - residencyController.acquireLock(); + auto lock = residencyController.acquireLock(); residencyController.removeFromTrimCandidateListIfUsed(input, true); - residencyController.releaseLock(); } } @@ -480,7 +478,7 @@ bool WddmMemoryManager::makeResidentResidencyAllocations(ResidencyContainer &all uint32_t totalHandlesCount = 0; - osContext.get()->getResidencyController().acquireLock(); + auto lock = osContext.get()->getResidencyController().acquireLock(); DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "currentFenceValue =", osContext.get()->getResidencyController().getMonitoredFence().currentFenceValue); @@ -550,23 +548,17 @@ bool WddmMemoryManager::makeResidentResidencyAllocations(ResidencyContainer &all } } - osContext.get()->getResidencyController().releaseLock(); - return result; } void WddmMemoryManager::makeNonResidentEvictionAllocations(ResidencyContainer &evictionAllocations, OsContext &osContext) { - - osContext.get()->getResidencyController().acquireLock(); - - size_t residencyCount = evictionAllocations.size(); + auto lock = osContext.get()->getResidencyController().acquireLock(); + const size_t residencyCount = evictionAllocations.size(); for (uint32_t i = 0; i < residencyCount; i++) { WddmAllocation *allocation = reinterpret_cast(evictionAllocations[i]); osContext.get()->getResidencyController().addToTrimCandidateList(allocation); } - - osContext.get()->getResidencyController().releaseLock(); } bool WddmMemoryManager::mapAuxGpuVA(GraphicsAllocation *graphicsAllocation) { diff --git a/runtime/os_interface/windows/wddm_residency_controller.cpp b/runtime/os_interface/windows/wddm_residency_controller.cpp index 5d13b8e28b..875b531864 100644 --- a/runtime/os_interface/windows/wddm_residency_controller.cpp +++ b/runtime/os_interface/windows/wddm_residency_controller.cpp @@ -15,24 +15,12 @@ namespace OCLRT { WddmResidencyController::WddmResidencyController(Wddm &wddm, uint32_t osContextId) : wddm(wddm), osContextId(osContextId) {} -void WddmResidencyController::acquireLock() { - bool previousLockValue = false; - while (!lock.compare_exchange_weak(previousLockValue, true)) - previousLockValue = false; +std::unique_lock WddmResidencyController::acquireLock() { + return std::unique_lock{this->lock}; } -void WddmResidencyController::releaseLock() { - lock = false; -} - -void WddmResidencyController::acquireTrimCallbackLock() { - SpinLock spinLock; - spinLock.enter(this->trimCallbackLock); -} - -void WddmResidencyController::releaseTrimCallbackLock() { - SpinLock spinLock; - spinLock.leave(this->trimCallbackLock); +std::unique_lock WddmResidencyController::acquireTrimCallbackLock() { + return std::unique_lock{this->trimCallbackLock}; } WddmAllocation *WddmResidencyController::getTrimCandidateHead() { @@ -177,6 +165,7 @@ void WddmResidencyController::trimResidency(D3DDDI_TRIMRESIDENCYSET_FLAGS flags, bool periodicTrimDone = false; D3DKMT_HANDLE fragmentEvictHandles[3] = {0}; uint64_t sizeToTrim = 0; + auto lock = this->acquireLock(); WddmAllocation *wddmAllocation = nullptr; while ((wddmAllocation = this->getTrimCandidateHead()) != nullptr) { @@ -221,17 +210,11 @@ void WddmResidencyController::trimResidency(D3DDDI_TRIMRESIDENCYSET_FLAGS flags, if (this->checkTrimCandidateListCompaction()) { this->compactTrimCandidateList(); } - - this->releaseLock(); } if (flags.TrimToBudget) { - - this->acquireLock(); - + auto lock = this->acquireLock(); trimResidencyToBudget(bytes); - - this->releaseLock(); } if (flags.PeriodicTrim || flags.RestartPeriodicTrim) { diff --git a/runtime/os_interface/windows/wddm_residency_controller.h b/runtime/os_interface/windows/wddm_residency_controller.h index 330e8e8bbe..56926403cd 100644 --- a/runtime/os_interface/windows/wddm_residency_controller.h +++ b/runtime/os_interface/windows/wddm_residency_controller.h @@ -10,8 +10,10 @@ #include "runtime/memory_manager/residency_container.h" #include "runtime/os_interface/windows/windows_wrapper.h" #include "runtime/os_interface/windows/windows_defs.h" +#include "runtime/utilities/spinlock.h" #include +#include namespace OCLRT { @@ -23,11 +25,8 @@ class WddmResidencyController { public: WddmResidencyController(Wddm &wddm, uint32_t osContextId); - void acquireLock(); - void releaseLock(); - - void acquireTrimCallbackLock(); - void releaseTrimCallbackLock(); + std::unique_lock acquireLock(); + std::unique_lock acquireTrimCallbackLock(); WddmAllocation *getTrimCandidateHead(); void addToTrimCandidateList(GraphicsAllocation *allocation); @@ -54,8 +53,8 @@ class WddmResidencyController { uint32_t osContextId; MonitoredFence monitoredFence = {}; - std::atomic lock = false; - std::atomic_flag trimCallbackLock = ATOMIC_FLAG_INIT; + SpinLock lock; + SpinLock trimCallbackLock; uint64_t lastTrimFenceValue = 0u; ResidencyContainer trimCandidateList; diff --git a/runtime/utilities/spinlock.h b/runtime/utilities/spinlock.h index 0a028d9fcd..b44a4a7abb 100644 --- a/runtime/utilities/spinlock.h +++ b/runtime/utilities/spinlock.h @@ -7,19 +7,32 @@ #pragma once +#include "runtime/helpers/properties_helper.h" + #include namespace OCLRT { -class SpinLock { +class SpinLock : NonCopyableOrMovableClass { public: - void enter(std::atomic_flag &spinLock) { - while (spinLock.test_and_set(std::memory_order_acquire)) { - }; + SpinLock() = default; + ~SpinLock() = default; + + void lock() { + while (flag.test_and_set(std::memory_order_acquire)) + ; } - void leave(std::atomic_flag &spinLock) { - spinLock.clear(std::memory_order_release); + + bool try_lock() { // NOLINT + return flag.test_and_set(std::memory_order_acquire) == false; } + + void unlock() { + flag.clear(std::memory_order_release); + } + + protected: + std::atomic_flag flag = ATOMIC_FLAG_INIT; }; } // namespace OCLRT diff --git a/unit_tests/utilities/spinlock_tests.cpp b/unit_tests/utilities/spinlock_tests.cpp index a3f8cb041a..55edccaeb7 100644 --- a/unit_tests/utilities/spinlock_tests.cpp +++ b/unit_tests/utilities/spinlock_tests.cpp @@ -1,64 +1,66 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (C) 2017-2018 Intel Corporation * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: + * SPDX-License-Identifier: MIT * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. */ #include "runtime/utilities/spinlock.h" #include "gtest/gtest.h" #include +#include using namespace OCLRT; TEST(SpinLockTest, givenTwoThreadsThenVerifyThatTheySynchronizeWithSpinLock) { - std::atomic_flag syncLock = ATOMIC_FLAG_INIT; std::atomic threadStarted(false); std::atomic threadFinished(false); - SpinLock lock1; + SpinLock spinLock; int sharedCount = 0; // Initially acquire spin lock so the worker thread will wait - lock1.enter(syncLock); + std::unique_lock lock1{spinLock}; // Start worker thread - std::thread t([&]() { + std::thread workerThread([&]() { threadStarted = true; - SpinLock lock2; - lock2.enter(syncLock); + std::unique_lock lock2{spinLock}; sharedCount++; EXPECT_EQ(2, sharedCount); - lock2.leave(syncLock); + lock2.unlock(); threadFinished = true; }); // Wait till worker thread is started - while (!threadStarted) { - }; + while (!threadStarted) + ; sharedCount++; EXPECT_EQ(1, sharedCount); // Release spin lock thus allowing worker thread to proceed - lock1.leave(syncLock); + lock1.unlock(); // Wait till worker thread finishes - while (!threadFinished) { - }; + while (!threadFinished) + ; EXPECT_EQ(2, sharedCount); - t.join(); + workerThread.join(); +} + +TEST(SpinLockTest, givenSpinLockThenAttemptedLockingWorks) { + SpinLock spinLock; + auto workerThreadFunction = [&spinLock](bool expectedLockAcquired) { + std::unique_lock lock{spinLock, std::defer_lock}; + auto lockAcquired = lock.try_lock(); + EXPECT_EQ(expectedLockAcquired, lockAcquired); + }; + + // Expect locking to fail when lock is already taken + std::unique_lock lock{spinLock}; + std::thread workerThread1(workerThreadFunction, false); + workerThread1.join(); + + lock.unlock(); + std::thread workerThread2(workerThreadFunction, true); + workerThread2.join(); }