From 9c696f1d294858dd0dc5e215c6fc6366e42fb3d8 Mon Sep 17 00:00:00 2001
From: Maciej Dziuban <maciej.dziuban@intel.com>
Date: Tue, 16 Oct 2018 15:53:39 +0200
Subject: [PATCH] Create STL-friendly SpinLock implementation

Change-Id: Ic04d34d2962ad6953fe3282a10f6cba16c0e07b3
Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
---
 runtime/gtpin/gtpin_callbacks.cpp             | 23 +++----
 .../windows/wddm_memory_manager.cpp           | 26 +++-----
 .../windows/wddm_residency_controller.cpp     | 29 ++-------
 .../windows/wddm_residency_controller.h       | 13 ++--
 runtime/utilities/spinlock.h                  | 25 ++++++--
 unit_tests/utilities/spinlock_tests.cpp       | 62 ++++++++++---------
 6 files changed, 79 insertions(+), 99 deletions(-)
diff --git a/runtime/gtpin/gtpin_callbacks.cpp b/runtime/gtpin/gtpin_callbacks.cpp
index 630eaeafc3..9ee0f8cf6a 100644
--- a/runtime/gtpin/gtpin_callbacks.cpp
+++ b/runtime/gtpin/gtpin_callbacks.cpp
@@ -35,7 +35,7 @@ const igc_info_t *pIgcInfo = nullptr;
 std::atomic<int> sequenceCount(1);
 CommandQueue *pCmdQueueForFlushTask = nullptr;
 std::deque<gtpinkexec_t> kernelExecQueue;
-std::atomic_flag kernelExecQueueLock = ATOMIC_FLAG_INIT;
+SpinLock kernelExecQueueLock;
 
 void gtpinNotifyContextCreate(cl_context context) {
     if (isGTPinInitialized) {
@@ -122,10 +122,9 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
         kExec.gtpinResource = (cl_mem)resource;
         kExec.commandBuffer = commandBuffer;
         kExec.pCommandQueue = (CommandQueue *)pCmdQueue;
-        SpinLock lock;
-        lock.enter(kernelExecQueueLock);
+        std::unique_lock<SpinLock> lock{kernelExecQueueLock};
         kernelExecQueue.push_back(kExec);
-        lock.leave(kernelExecQueueLock);
+        lock.unlock();
         // Patch SSH[gtpinBTI] with GT-Pin resource
         if (!resource) {
             return;
@@ -150,8 +149,7 @@ void gtpinNotifyPreFlushTask(void *pCmdQueue) {
 
 void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
     if (isGTPinInitialized) {
-        SpinLock lock;
-        lock.enter(kernelExecQueueLock);
+        std::unique_lock<SpinLock> lock{kernelExecQueueLock};
         size_t numElems = kernelExecQueue.size();
         for (size_t n = 0; n < numElems; n++) {
             if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid) {
@@ -161,15 +159,13 @@ void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
                 break;
             }
         }
-        lock.leave(kernelExecQueueLock);
         pCmdQueueForFlushTask = nullptr;
     }
 }
 
 void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
     if (isGTPinInitialized) {
-        SpinLock lock;
-        lock.enter(kernelExecQueueLock);
+        std::unique_lock<SpinLock> lock{kernelExecQueueLock};
         size_t numElems = kernelExecQueue.size();
         for (size_t n = 0; n < numElems;) {
             if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) {
@@ -182,14 +178,12 @@ void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
                 n++;
             }
         }
-        lock.leave(kernelExecQueueLock);
     }
 }
 
 void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
     if (isGTPinInitialized) {
-        SpinLock lock;
-        lock.enter(kernelExecQueueLock);
+        std::unique_lock<SpinLock> lock{kernelExecQueueLock};
         size_t numElems = kernelExecQueue.size();
         for (size_t n = 0; n < numElems; n++) {
             if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) {
@@ -203,14 +197,12 @@ void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
                 break;
             }
         }
-        lock.leave(kernelExecQueueLock);
     }
 }
 
 void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) {
     if (isGTPinInitialized) {
-        SpinLock lock;
-        lock.enter(kernelExecQueueLock);
+        std::unique_lock<SpinLock> lock{kernelExecQueueLock};
         size_t numElems = kernelExecQueue.size();
         for (size_t n = 0; n < numElems; n++) {
             if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) {
@@ -225,7 +217,6 @@ void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) {
                 break;
             }
         }
-        lock.leave(kernelExecQueueLock);
     }
 }
 
diff --git a/runtime/os_interface/windows/wddm_memory_manager.cpp b/runtime/os_interface/windows/wddm_memory_manager.cpp
index ed6965d512..11f9c4cb4e 100644
--- a/runtime/os_interface/windows/wddm_memory_manager.cpp
+++ b/runtime/os_interface/windows/wddm_memory_manager.cpp
@@ -32,13 +32,13 @@ WddmMemoryManager::~WddmMemoryManager() {
     for (auto osContext : this->registeredOsContexts) {
         if (osContext) {
             auto &residencyController = osContext->get()->getResidencyController();
-            residencyController.acquireTrimCallbackLock();
+
+            auto lock = residencyController.acquireTrimCallbackLock();
             wddm->unregisterTrimCallback(trimCallback, this->trimCallbackHandle);
-            residencyController.releaseTrimCallbackLock();
+            lock.unlock();
 
             // Wait for lock to ensure trimCallback ended
-            residencyController.acquireTrimCallbackLock();
-            residencyController.releaseTrimCallbackLock();
+            lock.lock();
         }
     }
 }
@@ -62,9 +62,8 @@ void APIENTRY WddmMemoryManager::trimCallback(_Inout_ D3DKMT_TRIMNOTIFICATION *t
         return;
     }
 
-    wddmMemMngr->getRegisteredOsContext(0)->get()->getResidencyController().acquireTrimCallbackLock();
+    auto lock = wddmMemMngr->getRegisteredOsContext(0)->get()->getResidencyController().acquireTrimCallbackLock();
     wddmMemMngr->getRegisteredOsContext(0)->get()->getResidencyController().trimResidency(trimNotification->Flags, trimNotification->NumBytesToTrim);
-    wddmMemMngr->getRegisteredOsContext(0)->get()->getResidencyController().releaseTrimCallbackLock();
 }
 
 GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryForImage(ImageInfo &imgInfo, Gmm *gmm) {
@@ -315,9 +314,8 @@ void WddmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation
     for (auto &osContext : this->registeredOsContexts) {
         if (osContext) {
             auto &residencyController = osContext->get()->getResidencyController();
-            residencyController.acquireLock();
+            auto lock = residencyController.acquireLock();
             residencyController.removeFromTrimCandidateListIfUsed(input, true);
-            residencyController.releaseLock();
         }
     }
 
@@ -480,7 +478,7 @@ bool WddmMemoryManager::makeResidentResidencyAllocations(ResidencyContainer &all
 
     uint32_t totalHandlesCount = 0;
 
-    osContext.get()->getResidencyController().acquireLock();
+    auto lock = osContext.get()->getResidencyController().acquireLock();
 
     DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "currentFenceValue =", osContext.get()->getResidencyController().getMonitoredFence().currentFenceValue);
 
@@ -550,23 +548,17 @@ bool WddmMemoryManager::makeResidentResidencyAllocations(ResidencyContainer &all
         }
     }
 
-    osContext.get()->getResidencyController().releaseLock();
-
     return result;
 }
 
 void WddmMemoryManager::makeNonResidentEvictionAllocations(ResidencyContainer &evictionAllocations, OsContext &osContext) {
-
-    osContext.get()->getResidencyController().acquireLock();
-
-    size_t residencyCount = evictionAllocations.size();
+    auto lock = osContext.get()->getResidencyController().acquireLock();
+    const size_t residencyCount = evictionAllocations.size();
 
     for (uint32_t i = 0; i < residencyCount; i++) {
         WddmAllocation *allocation = reinterpret_cast<WddmAllocation *>(evictionAllocations[i]);
         osContext.get()->getResidencyController().addToTrimCandidateList(allocation);
     }
-
-    osContext.get()->getResidencyController().releaseLock();
 }
 
 bool WddmMemoryManager::mapAuxGpuVA(GraphicsAllocation *graphicsAllocation) {
diff --git a/runtime/os_interface/windows/wddm_residency_controller.cpp b/runtime/os_interface/windows/wddm_residency_controller.cpp
index 5d13b8e28b..875b531864 100644
--- a/runtime/os_interface/windows/wddm_residency_controller.cpp
+++ b/runtime/os_interface/windows/wddm_residency_controller.cpp
@@ -15,24 +15,12 @@ namespace OCLRT {
 
 WddmResidencyController::WddmResidencyController(Wddm &wddm, uint32_t osContextId) : wddm(wddm), osContextId(osContextId) {}
 
-void WddmResidencyController::acquireLock() {
-    bool previousLockValue = false;
-    while (!lock.compare_exchange_weak(previousLockValue, true))
-        previousLockValue = false;
+std::unique_lock<SpinLock> WddmResidencyController::acquireLock() {
+    return std::unique_lock<SpinLock>{this->lock};
 }
 
-void WddmResidencyController::releaseLock() {
-    lock = false;
-}
-
-void WddmResidencyController::acquireTrimCallbackLock() {
-    SpinLock spinLock;
-    spinLock.enter(this->trimCallbackLock);
-}
-
-void WddmResidencyController::releaseTrimCallbackLock() {
-    SpinLock spinLock;
-    spinLock.leave(this->trimCallbackLock);
+std::unique_lock<SpinLock> WddmResidencyController::acquireTrimCallbackLock() {
+    return std::unique_lock<SpinLock>{this->trimCallbackLock};
 }
 
 WddmAllocation *WddmResidencyController::getTrimCandidateHead() {
@@ -177,6 +165,7 @@ void WddmResidencyController::trimResidency(D3DDDI_TRIMRESIDENCYSET_FLAGS flags,
         bool periodicTrimDone = false;
         D3DKMT_HANDLE fragmentEvictHandles[3] = {0};
         uint64_t sizeToTrim = 0;
+        auto lock = this->acquireLock();
 
         WddmAllocation *wddmAllocation = nullptr;
         while ((wddmAllocation = this->getTrimCandidateHead()) != nullptr) {
@@ -221,17 +210,11 @@ void WddmResidencyController::trimResidency(D3DDDI_TRIMRESIDENCYSET_FLAGS flags,
         if (this->checkTrimCandidateListCompaction()) {
             this->compactTrimCandidateList();
         }
-
-        this->releaseLock();
     }
 
     if (flags.TrimToBudget) {
-
-        this->acquireLock();
-
+        auto lock = this->acquireLock();
         trimResidencyToBudget(bytes);
-
-        this->releaseLock();
     }
 
     if (flags.PeriodicTrim || flags.RestartPeriodicTrim) {
diff --git a/runtime/os_interface/windows/wddm_residency_controller.h b/runtime/os_interface/windows/wddm_residency_controller.h
index 330e8e8bbe..56926403cd 100644
--- a/runtime/os_interface/windows/wddm_residency_controller.h
+++ b/runtime/os_interface/windows/wddm_residency_controller.h
@@ -10,8 +10,10 @@
 #include "runtime/memory_manager/residency_container.h"
 #include "runtime/os_interface/windows/windows_wrapper.h"
 #include "runtime/os_interface/windows/windows_defs.h"
+#include "runtime/utilities/spinlock.h"
 
 #include <atomic>
+#include <mutex>
 
 namespace OCLRT {
 
@@ -23,11 +25,8 @@ class WddmResidencyController {
   public:
     WddmResidencyController(Wddm &wddm, uint32_t osContextId);
 
-    void acquireLock();
-    void releaseLock();
-
-    void acquireTrimCallbackLock();
-    void releaseTrimCallbackLock();
+    std::unique_lock<SpinLock> acquireLock();
+    std::unique_lock<SpinLock> acquireTrimCallbackLock();
 
     WddmAllocation *getTrimCandidateHead();
     void addToTrimCandidateList(GraphicsAllocation *allocation);
@@ -54,8 +53,8 @@ class WddmResidencyController {
     uint32_t osContextId;
     MonitoredFence monitoredFence = {};
 
-    std::atomic<bool> lock = false;
-    std::atomic_flag trimCallbackLock = ATOMIC_FLAG_INIT;
+    SpinLock lock;
+    SpinLock trimCallbackLock;
 
     uint64_t lastTrimFenceValue = 0u;
     ResidencyContainer trimCandidateList;
diff --git a/runtime/utilities/spinlock.h b/runtime/utilities/spinlock.h
index 0a028d9fcd..b44a4a7abb 100644
--- a/runtime/utilities/spinlock.h
+++ b/runtime/utilities/spinlock.h
@@ -7,19 +7,32 @@
 
 #pragma once
 
+#include "runtime/helpers/properties_helper.h"
+
 #include <atomic>
 
 namespace OCLRT {
 
-class SpinLock {
+class SpinLock : NonCopyableOrMovableClass {
   public:
-    void enter(std::atomic_flag &spinLock) {
-        while (spinLock.test_and_set(std::memory_order_acquire)) {
-        };
+    SpinLock() = default;
+    ~SpinLock() = default;
+
+    void lock() {
+        while (flag.test_and_set(std::memory_order_acquire))
+            ;
     }
-    void leave(std::atomic_flag &spinLock) {
-        spinLock.clear(std::memory_order_release);
+
+    bool try_lock() { // NOLINT
+        return flag.test_and_set(std::memory_order_acquire) == false;
     }
+
+    void unlock() {
+        flag.clear(std::memory_order_release);
+    }
+
+  protected:
+    std::atomic_flag flag = ATOMIC_FLAG_INIT;
 };
 
 } // namespace OCLRT
diff --git a/unit_tests/utilities/spinlock_tests.cpp b/unit_tests/utilities/spinlock_tests.cpp
index a3f8cb041a..55edccaeb7 100644
--- a/unit_tests/utilities/spinlock_tests.cpp
+++ b/unit_tests/utilities/spinlock_tests.cpp
@@ -1,64 +1,66 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (C) 2017-2018 Intel Corporation
  *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
+ * SPDX-License-Identifier: MIT
  *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "runtime/utilities/spinlock.h"
 #include "gtest/gtest.h"
 #include <thread>
+#include <mutex>
 
 using namespace OCLRT;
 
 TEST(SpinLockTest, givenTwoThreadsThenVerifyThatTheySynchronizeWithSpinLock) {
-    std::atomic_flag syncLock = ATOMIC_FLAG_INIT;
     std::atomic<bool> threadStarted(false);
     std::atomic<bool> threadFinished(false);
-    SpinLock lock1;
+    SpinLock spinLock;
     int sharedCount = 0;
 
     // Initially acquire spin lock so the worker thread will wait
-    lock1.enter(syncLock);
+    std::unique_lock<SpinLock> lock1{spinLock};
 
     // Start worker thread
-    std::thread t([&]() {
+    std::thread workerThread([&]() {
         threadStarted = true;
-        SpinLock lock2;
-        lock2.enter(syncLock);
+        std::unique_lock<SpinLock> lock2{spinLock};
         sharedCount++;
         EXPECT_EQ(2, sharedCount);
-        lock2.leave(syncLock);
+        lock2.unlock();
         threadFinished = true;
     });
 
     // Wait till worker thread is started
-    while (!threadStarted) {
-    };
+    while (!threadStarted)
+        ;
     sharedCount++;
     EXPECT_EQ(1, sharedCount);
 
     // Release spin lock thus allowing worker thread to proceed
-    lock1.leave(syncLock);
+    lock1.unlock();
 
     // Wait till worker thread finishes
-    while (!threadFinished) {
-    };
+    while (!threadFinished)
+        ;
     EXPECT_EQ(2, sharedCount);
-    t.join();
+    workerThread.join();
+}
+
+TEST(SpinLockTest, givenSpinLockThenAttemptedLockingWorks) {
+    SpinLock spinLock;
+    auto workerThreadFunction = [&spinLock](bool expectedLockAcquired) {
+        std::unique_lock<SpinLock> lock{spinLock, std::defer_lock};
+        auto lockAcquired = lock.try_lock();
+        EXPECT_EQ(expectedLockAcquired, lockAcquired);
+    };
+
+    // Expect locking to fail when lock is already taken
+    std::unique_lock<SpinLock> lock{spinLock};
+    std::thread workerThread1(workerThreadFunction, false);
+    workerThread1.join();
+
+    lock.unlock();
+    std::thread workerThread2(workerThreadFunction, true);
+    workerThread2.join();
 }