Graphics Allocation: store task count per context id

Move definition of allocations list method to internal_allocation_storage.cpp Change-Id: I4c6038df8fd1b9335e8a74edbab33b78f9293d8f Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
2026-01-03 06:49:52 +08:00 · 2018-11-02 10:01:56 +01:00
parent ea2e634f7e
commit 815ae851b7
32 changed files with 239 additions and 160 deletions
--- a/runtime/memory_manager/allocations_list.h
+++ b/runtime/memory_manager/allocations_list.h
@@ -11,10 +11,11 @@

 namespace OCLRT {
 class GraphicsAllocation;
+class CommandStreamReceiver;

 class AllocationsList : public IDList<GraphicsAllocation, true, true> {
  public:
-    std::unique_ptr<GraphicsAllocation> detachAllocation(size_t requiredMinimalSize, volatile uint32_t *csrTagAddress, bool internalAllocationRequired);
+    std::unique_ptr<GraphicsAllocation> detachAllocation(size_t requiredMinimalSize, CommandStreamReceiver &commandStreamReceiver, bool internalAllocationRequired);

  private:
    GraphicsAllocation *detachAllocationImpl(GraphicsAllocation *, void *);
--- a/runtime/memory_manager/graphics_allocation.cpp
+++ b/runtime/memory_manager/graphics_allocation.cpp
@@ -5,13 +5,54 @@
 *
 */

-#include "graphics_allocation.h"
 #include "runtime/helpers/aligned_memory.h"
+#include "runtime/memory_manager/graphics_allocation.h"

-bool OCLRT::GraphicsAllocation::isL3Capable() {
+namespace OCLRT {
+bool GraphicsAllocation::isL3Capable() {
    auto ptr = ptrOffset(cpuPtr, static_cast<size_t>(this->allocationOffset));
    if (alignUp(ptr, MemoryConstants::cacheLineSize) == ptr && alignUp(this->size, MemoryConstants::cacheLineSize) == this->size) {
        return true;
    }
    return false;
 }
+GraphicsAllocation::GraphicsAllocation(void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn) : gpuBaseAddress(baseAddress),
+                                                                                                                   size(sizeIn),
+                                                                                                                   cpuPtr(cpuPtrIn),
+                                                                                                                   gpuAddress(gpuAddress),
+                                                                                                                   taskCounts(maxOsContextCount) {
+    initTaskCounts();
+}
+
+GraphicsAllocation::GraphicsAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn) : size(sizeIn),
+                                                                                                 cpuPtr(cpuPtrIn),
+                                                                                                 gpuAddress(castToUint64(cpuPtrIn)),
+                                                                                                 sharedHandle(sharedHandleIn),
+                                                                                                 taskCounts(maxOsContextCount) {
+    initTaskCounts();
+}
+GraphicsAllocation::~GraphicsAllocation() = default;
+
+bool GraphicsAllocation::peekWasUsed() const { return registeredContextsNum > 0; }
+
+void GraphicsAllocation::updateTaskCount(uint32_t newTaskCount, uint32_t contextId) {
+    UNRECOVERABLE_IF(contextId >= taskCounts.size());
+    if (taskCounts[contextId] == ObjectNotUsed) {
+        registeredContextsNum++;
+    }
+    if (newTaskCount == ObjectNotUsed) {
+        registeredContextsNum--;
+    }
+    taskCounts[contextId] = newTaskCount;
+}
+
+uint32_t GraphicsAllocation::getTaskCount(uint32_t contextId) const {
+    UNRECOVERABLE_IF(contextId >= taskCounts.size());
+    return taskCounts[contextId];
+}
+void GraphicsAllocation::initTaskCounts() {
+    for (auto i = 0u; i < taskCounts.size(); i++) {
+        taskCounts[i] = ObjectNotUsed;
+    }
+}
+} // namespace OCLRT
--- a/runtime/memory_manager/graphics_allocation.h
+++ b/runtime/memory_manager/graphics_allocation.h
@@ -17,6 +17,7 @@
 #include "runtime/memory_manager/memory_pool.h"
 #include "runtime/memory_manager/residency_container.h"
 #include "runtime/utilities/idlist.h"
+#include "runtime/utilities/stackvec.h"

 namespace OCLRT {

@@ -34,19 +35,7 @@ const uint32_t ObjectNotUsed = (uint32_t)-1;
 class Gmm;

 class GraphicsAllocation : public IDNode<GraphicsAllocation> {
-  protected:
-    size_t size = 0;
-    void *cpuPtr = nullptr;
-    uint64_t gpuAddress = 0;
-    bool coherent = false;
-    osHandle sharedHandle;
-    bool locked = false;
-    uint32_t reuseCount = 0; // GraphicsAllocation can be reused by shared resources
-    bool evictable = true;
-    MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
-
  public:
-    uint32_t taskCount = ObjectNotUsed;
    OsHandleStorage fragmentsStorage;
    bool is32BitAllocation = false;
    uint64_t gpuBaseAddress = 0;
@@ -81,20 +70,13 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
        SHARED_RESOURCE,
    };

-    virtual ~GraphicsAllocation() = default;
+    virtual ~GraphicsAllocation();
    GraphicsAllocation &operator=(const GraphicsAllocation &) = delete;
    GraphicsAllocation(const GraphicsAllocation &) = delete;

-    GraphicsAllocation(void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn) : size(sizeIn),
-                                                                                                   cpuPtr(cpuPtrIn),
-                                                                                                   gpuAddress(gpuAddress),
-                                                                                                   sharedHandle(Sharing::nonSharedResource),
-                                                                                                   gpuBaseAddress(baseAddress) {}
+    GraphicsAllocation(void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn);

-    GraphicsAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn) : size(sizeIn),
-                                                                                 cpuPtr(cpuPtrIn),
-                                                                                 gpuAddress(castToUint64(cpuPtrIn)),
-                                                                                 sharedHandle(sharedHandleIn) {}
+    GraphicsAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn);

    void *getUnderlyingBuffer() const { return cpuPtr; }
    void setCpuPtrAndGpuAddress(void *cpuPtr, uint64_t gpuAddress) {
@@ -141,14 +123,30 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
    MemoryPool::Type getMemoryPool() {
        return memoryPool;
    }
+    bool peekWasUsed() const;
+    void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
+    uint32_t getTaskCount(uint32_t contextId) const;

  protected:
+    void initTaskCounts();
+
    //this variable can only be modified from SubmissionAggregator
    friend class SubmissionAggregator;
+    size_t size = 0;
+    void *cpuPtr = nullptr;
+    uint64_t gpuAddress = 0;
+    bool coherent = false;
+    osHandle sharedHandle = Sharing::nonSharedResource;
+    bool locked = false;
+    uint32_t reuseCount = 0; // GraphicsAllocation can be reused by shared resources
+    bool evictable = true;
+    MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
    uint32_t inspectionId = 0;
    AllocationType allocationType = AllocationType::UNKNOWN;
    bool aubWritable = true;
    bool allocDumpable = false;
    bool memObjectsAllocationWithWritableFlags = false;
+    StackVec<uint32_t, maxOsContextCount> taskCounts;
+    std::atomic<uint32_t> registeredContextsNum{0};
 };
 } // namespace OCLRT
--- a/runtime/memory_manager/internal_allocation_storage.cpp
+++ b/runtime/memory_manager/internal_allocation_storage.cpp
@@ -10,9 +10,9 @@
 #include "runtime/memory_manager/memory_manager.h"

 namespace OCLRT {
-InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver) : commandStreamReceiver(commandStreamReceiver){};
+InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver) : commandStreamReceiver(commandStreamReceiver), contextId(commandStreamReceiver.getDeviceIndex()){};
 void InternalAllocationStorage::storeAllocation(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage) {
-    uint32_t taskCount = gfxAllocation->taskCount;
+    uint32_t taskCount = gfxAllocation->getTaskCount(contextId);

    if (allocationUsage == REUSABLE_ALLOCATION) {
        taskCount = commandStreamReceiver.peekTaskCount();
@@ -28,7 +28,7 @@ void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptr<Gra
        }
    }
    auto &allocationsList = (allocationUsage == TEMPORARY_ALLOCATION) ? temporaryAllocations : allocationsForReuse;
-    gfxAllocation->taskCount = taskCount;
+    gfxAllocation->updateTaskCount(taskCount, contextId);
    allocationsList.pushTailOne(*gfxAllocation.release());
 }

@@ -43,7 +43,7 @@ void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, Allo
    IDList<GraphicsAllocation, false, true> allocationsLeft;
    while (curr != nullptr) {
        auto *next = curr->next;
-        if (curr->taskCount <= waitTaskCount) {
+        if (curr->getTaskCount(contextId) <= waitTaskCount) {
            memoryManager->freeGraphicsMemory(curr);
        } else {
            allocationsLeft.pushTailOne(*curr);
@@ -57,8 +57,41 @@ void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, Allo
 }

 std::unique_ptr<GraphicsAllocation> InternalAllocationStorage::obtainReusableAllocation(size_t requiredSize, bool internalAllocation) {
-    auto allocation = allocationsForReuse.detachAllocation(requiredSize, commandStreamReceiver.getTagAddress(), internalAllocation);
+    auto allocation = allocationsForReuse.detachAllocation(requiredSize, commandStreamReceiver, internalAllocation);
    return allocation;
 }

+struct ReusableAllocationRequirements {
+    size_t requiredMinimalSize;
+    volatile uint32_t *csrTagAddress;
+    bool internalAllocationRequired;
+    uint32_t contextId;
+};
+
+std::unique_ptr<GraphicsAllocation> AllocationsList::detachAllocation(size_t requiredMinimalSize, CommandStreamReceiver &commandStreamReceiver, bool internalAllocationRequired) {
+    ReusableAllocationRequirements req;
+    req.requiredMinimalSize = requiredMinimalSize;
+    req.csrTagAddress = commandStreamReceiver.getTagAddress();
+    req.internalAllocationRequired = internalAllocationRequired;
+    req.contextId = commandStreamReceiver.getDeviceIndex();
+    GraphicsAllocation *a = nullptr;
+    GraphicsAllocation *retAlloc = processLocked<AllocationsList, &AllocationsList::detachAllocationImpl>(a, static_cast<void *>(&req));
+    return std::unique_ptr<GraphicsAllocation>(retAlloc);
+}
+
+GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, void *data) {
+    ReusableAllocationRequirements *req = static_cast<ReusableAllocationRequirements *>(data);
+    auto *curr = head;
+    while (curr != nullptr) {
+        auto currentTagValue = *req->csrTagAddress;
+        if ((req->internalAllocationRequired == curr->is32BitAllocation) &&
+            (curr->getUnderlyingBufferSize() >= req->requiredMinimalSize) &&
+            ((currentTagValue > curr->getTaskCount(req->contextId)) || (curr->getTaskCount(req->contextId) == 0))) {
+            return removeOneImpl(curr, nullptr);
+        }
+        curr = curr->next;
+    }
+    return nullptr;
+}
+
 } // namespace OCLRT
--- a/runtime/memory_manager/internal_allocation_storage.h
+++ b/runtime/memory_manager/internal_allocation_storage.h
@@ -27,6 +27,7 @@ class InternalAllocationStorage {
  protected:
    void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList);
    CommandStreamReceiver &commandStreamReceiver;
+    const uint32_t contextId;

    AllocationsList temporaryAllocations;
    AllocationsList allocationsForReuse;
--- a/runtime/memory_manager/memory_manager.cpp
+++ b/runtime/memory_manager/memory_manager.cpp
@@ -27,37 +27,6 @@
 namespace OCLRT {
 constexpr size_t TagCount = 512;

-struct ReusableAllocationRequirements {
-    size_t requiredMinimalSize;
-    volatile uint32_t *csrTagAddress;
-    bool internalAllocationRequired;
-};
-
-std::unique_ptr<GraphicsAllocation> AllocationsList::detachAllocation(size_t requiredMinimalSize, volatile uint32_t *csrTagAddress, bool internalAllocationRequired) {
-    ReusableAllocationRequirements req;
-    req.requiredMinimalSize = requiredMinimalSize;
-    req.csrTagAddress = csrTagAddress;
-    req.internalAllocationRequired = internalAllocationRequired;
-    GraphicsAllocation *a = nullptr;
-    GraphicsAllocation *retAlloc = processLocked<AllocationsList, &AllocationsList::detachAllocationImpl>(a, static_cast<void *>(&req));
-    return std::unique_ptr<GraphicsAllocation>(retAlloc);
-}
-
-GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, void *data) {
-    ReusableAllocationRequirements *req = static_cast<ReusableAllocationRequirements *>(data);
-    auto *curr = head;
-    while (curr != nullptr) {
-        auto currentTagValue = req->csrTagAddress ? *req->csrTagAddress : -1;
-        if ((req->internalAllocationRequired == curr->is32BitAllocation) &&
-            (curr->getUnderlyingBufferSize() >= req->requiredMinimalSize) &&
-            ((currentTagValue > curr->taskCount) || (curr->taskCount == 0))) {
-            return removeOneImpl(curr, nullptr);
-        }
-        curr = curr->next;
-    }
-    return nullptr;
-}
-
 MemoryManager::MemoryManager(bool enable64kbpages, bool enableLocalMemory,
                             ExecutionEnvironment &executionEnvironment) : allocator32Bit(nullptr), enable64kbpages(enable64kbpages),
                                                                           localMemorySupported(enableLocalMemory),
@@ -198,7 +167,7 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
 //if not in use destroy in place
 //if in use pass to temporary allocation list that is cleaned on blocking calls
 void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
-    if (gfxAllocation->taskCount == ObjectNotUsed || gfxAllocation->taskCount <= *getCommandStreamReceiver(0)->getTagAddress()) {
+    if (!gfxAllocation->peekWasUsed() || gfxAllocation->getTaskCount(0u) <= *getCommandStreamReceiver(0)->getTagAddress()) {
        freeGraphicsMemory(gfxAllocation);
    } else {
        getCommandStreamReceiver(0)->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);