Correct destruction logic of shared allocations

wait for all os contexts that used the allocation when os context is not ready then flush related command stream receiver Change-Id: I5fb2c16c1d398c59fbd02e32ebbbb9254583244e Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
2026-01-08 14:02:58 +08:00 · 2019-01-03 14:48:24 +01:00
parent 1ce3898400
commit b5d9ed77a6
7 changed files with 99 additions and 31 deletions
--- a/runtime/memory_manager/deferrable_allocation_deletion.cpp
+++ b/runtime/memory_manager/deferrable_allocation_deletion.cpp
@@ -24,6 +24,8 @@ bool DeferrableAllocationDeletion::apply() {
                    auto currentContextTaskCount = *csr->getTagAddress();
                    if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
                        graphicsAllocation.releaseUsageInOsContext(contextId);
+                    } else {
+                        csr->flushBatchedSubmissions();
                    }
                }
            }
--- a/runtime/memory_manager/graphics_allocation.h
+++ b/runtime/memory_manager/graphics_allocation.h
@@ -134,6 +134,7 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
    bool isResidencyTaskCountBelow(uint32_t taskCount, uint32_t contextId) { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; }

    bool isMultiOsContextCapable() const { return multiOsContextCapable; }
+    bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; }

  protected:
    constexpr static uint32_t objectNotResident = (uint32_t)-1;
--- a/runtime/memory_manager/memory_manager.cpp
+++ b/runtime/memory_manager/memory_manager.cpp
@@ -15,6 +15,7 @@
 #include "runtime/helpers/basic_math.h"
 #include "runtime/helpers/kernel_commands.h"
 #include "runtime/helpers/options.h"
+#include "runtime/memory_manager/deferrable_allocation_deletion.h"
 #include "runtime/memory_manager/deferred_deleter.h"
 #include "runtime/memory_manager/host_ptr_manager.h"
 #include "runtime/memory_manager/internal_allocation_storage.h"
@@ -30,7 +31,8 @@ MemoryManager::MemoryManager(bool enable64kbpages, bool enableLocalMemory,
                             ExecutionEnvironment &executionEnvironment) : allocator32Bit(nullptr), enable64kbpages(enable64kbpages),
                                                                           localMemorySupported(enableLocalMemory),
                                                                           executionEnvironment(executionEnvironment),
-                                                                           hostPtrManager(std::make_unique<HostPtrManager>()) {
+                                                                           hostPtrManager(std::make_unique<HostPtrManager>()),
+                                                                           multiContextResourceDestructor(std::make_unique<DeferredDeleter>()) {
    registeredOsContexts.resize(1);
 };

@@ -135,20 +137,23 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
 //if not in use destroy in place
 //if in use pass to temporary allocation list that is cleaned on blocking calls
 void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
-    if (!gfxAllocation->isUsed()) {
-        freeGraphicsMemory(gfxAllocation);
-        return;
-    }
-
-    for (auto &csr : getCommandStreamReceivers()[0]) {
-        if (csr) {
-            auto osContextId = csr->getOsContext().getContextId();
-            auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId);
-
-            if (gfxAllocation->isUsedByOsContext(osContextId) &&
-                allocationTaskCount > *csr->getTagAddress()) {
-                csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
-                return;
+    if (gfxAllocation->isUsed()) {
+        if (gfxAllocation->isUsedByManyOsContexts()) {
+            multiContextResourceDestructor->deferDeletion(new DeferrableAllocationDeletion{*this, *gfxAllocation});
+            multiContextResourceDestructor->drain(false);
+            return;
+        }
+        for (auto &deviceCsrs : getCommandStreamReceivers()) {
+            for (auto &csr : deviceCsrs) {
+                if (csr) {
+                    auto osContextId = csr->getOsContext().getContextId();
+                    auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId);
+                    if (gfxAllocation->isUsedByOsContext(osContextId) &&
+                        allocationTaskCount > *csr->getTagAddress()) {
+                        csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
+                        return;
+                    }
+                }
            }
        }
    }
--- a/runtime/memory_manager/memory_manager.h
+++ b/runtime/memory_manager/memory_manager.h
@@ -259,6 +259,7 @@ class MemoryManager {
    std::unique_ptr<HostPtrManager> hostPtrManager;
    uint32_t latestContextId = std::numeric_limits<uint32_t>::max();
    uint32_t defaultEngineIndex = 0;
+    std::unique_ptr<DeferredDeleter> multiContextResourceDestructor;
 };

 std::unique_ptr<DeferredDeleter> createDeferredDeleter();
--- a/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp
+++ b/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp
@@ -11,9 +11,11 @@
 #include "runtime/memory_manager/deferrable_allocation_deletion.h"
 #include "runtime/os_interface/os_context.h"

+#include "unit_tests/libult/ult_command_stream_receiver.h"
 #include "unit_tests/mocks/mock_allocation_properties.h"
+#include "unit_tests/mocks/mock_device.h"
 #include "unit_tests/mocks/mock_memory_manager.h"
-#include "gtest/gtest.h"
+#include "test.h"

 using namespace OCLRT;

@@ -38,7 +40,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test {
        auto executionEnvironment = std::make_unique<ExecutionEnvironment>();
        memoryManager = new MockMemoryManager(*executionEnvironment);
        executionEnvironment->memoryManager.reset(memoryManager);
-        device1.reset(Device::create<Device>(nullptr, executionEnvironment.release(), 0u));
+        device1.reset(Device::create<MockDevice>(nullptr, executionEnvironment.release(), 0u));
        hwTag = device1->getDefaultEngine().commandStreamReceiver->getTagAddress();
        device1ContextId = device1->getDefaultEngine().osContext->getContextId();
        asyncDeleter = std::make_unique<DeferredDeleterPublic>();
@@ -50,7 +52,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test {
    }
    std::unique_ptr<DeferredDeleterPublic> asyncDeleter;
    MockMemoryManager *memoryManager = nullptr;
-    std::unique_ptr<Device> device1;
+    std::unique_ptr<MockDevice> device1;
    uint32_t device1ContextId = 0;
    volatile uint32_t *hwTag = nullptr;
 };
@@ -76,8 +78,8 @@ TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenW
    EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryCalled);
 }

-TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContexts) {
-    std::unique_ptr<Device> device2(Device::create<Device>(nullptr, device1->getExecutionEnvironment(), 1u));
+HWTEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContextsAndFlushNotReadyCsr) {
+    std::unique_ptr<MockDevice> device2(Device::create<MockDevice>(nullptr, device1->getExecutionEnvironment(), 1u));
    auto device2ContextId = device2->getDefaultEngine().osContext->getContextId();
    EXPECT_EQ(gpgpuEngineInstances.size() * 2, memoryManager->getOsContextCount());
    auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
@@ -88,10 +90,14 @@ TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenA
    EXPECT_TRUE(allocation->isUsedByOsContext(device1ContextId));
    EXPECT_TRUE(allocation->isUsedByOsContext(device2ContextId));
    EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled);
+    EXPECT_FALSE(device1->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
+    EXPECT_FALSE(device2->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
    asyncDeleter->deferDeletion(new DeferrableAllocationDeletion(*memoryManager, *allocation));
    while (allocation->isUsedByOsContext(device2ContextId)) // wait for second context completion signal
        std::this_thread::yield();
    EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled);
+    EXPECT_TRUE(device1->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
+    EXPECT_FALSE(device2->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
    asyncDeleter->allowExit = true;
    *hwTag = 1u;
 }
--- a/unit_tests/memory_manager/memory_manager_tests.cpp
+++ b/unit_tests/memory_manager/memory_manager_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017-2018 Intel Corporation
+ * Copyright (C) 2017-2019 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -1398,29 +1398,81 @@ TEST(GraphicsAllocation, givenCpuPointerBasedConstructorWhenGraphicsAllocationIs

 using GraphicsAllocationTests = ::testing::Test;

-HWTEST_F(GraphicsAllocationTests, givenAllocationUsedByNonDefaultCsrWhenCheckingUsageBeforeDestroyThenStoreIt) {
+HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultCsrWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) {
    auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
    auto nonDefaultOsContext = device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).osContext;
    auto nonDefaultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).commandStreamReceiver);
+
+    auto memoryManager = device->getExecutionEnvironment()->memoryManager.get();
+    auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
+
+    nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress() + 1;
+    nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress() + 1;
+    graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId());
+
+    memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
+    EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead());
+    (*nonDefaultCsr->getTagAddress())++;
+    // no need to call freeGraphicsAllocation
+}
+
+HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultDeviceWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) {
+    ExecutionEnvironment executionEnvironment;
+    executionEnvironment.incRefInternal();
+    auto defaultDevice = std::unique_ptr<MockDevice>(Device::create<MockDevice>(platformDevices[0], &executionEnvironment, 0u));
+    auto nonDefaultDevice = std::unique_ptr<MockDevice>(Device::create<MockDevice>(platformDevices[0], &executionEnvironment, 1u));
+    auto engine = nonDefaultDevice->getDefaultEngine();
+    auto commandStreamReceiver = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(engine.commandStreamReceiver);
+    auto osContextId = engine.osContext->getContextId();
+    auto memoryManager = executionEnvironment.memoryManager.get();
+    auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
+    auto notReadyTaskCount = *commandStreamReceiver->getTagAddress() + 1;
+
+    EXPECT_NE(defaultDevice->getDeviceIndex(), nonDefaultDevice->getDeviceIndex());
+    EXPECT_EQ(2u, memoryManager->getCommandStreamReceivers().size());
+
+    commandStreamReceiver->taskCount = notReadyTaskCount;
+    commandStreamReceiver->latestFlushedTaskCount = notReadyTaskCount;
+    graphicsAllocation->updateTaskCount(notReadyTaskCount, osContextId);
+
+    EXPECT_TRUE(commandStreamReceiver->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
+    memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
+    EXPECT_FALSE(commandStreamReceiver->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
+    (*commandStreamReceiver->getTagAddress())++;
+    // no need to call freeGraphicsAllocation
+}
+
+HWTEST_F(GraphicsAllocationTests, givenAllocationUsedByManyOsContextsWhenCheckingUsageBeforeDestroyThenMultiContextDestructorIsUsedForWaitingForAllOsContexts) {
+    ExecutionEnvironment executionEnvironment;
+    executionEnvironment.incRefInternal();
+    auto memoryManager = new MockMemoryManager(false, false, executionEnvironment);
+    executionEnvironment.memoryManager.reset(memoryManager);
+    auto multiContextDestructor = new MockDeferredDeleter();
+    multiContextDestructor->expectDrainBlockingValue(false);
+    memoryManager->multiContextResourceDestructor.reset(multiContextDestructor);
+
+    auto device = std::unique_ptr<MockDevice>(MockDevice::create<MockDevice>(platformDevices[0], &executionEnvironment, 0u));
+    auto nonDefaultOsContext = device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).osContext;
+    auto nonDefaultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).commandStreamReceiver);
    auto defaultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getDefaultEngine().commandStreamReceiver);
    auto defaultOsContext = device->getDefaultEngine().osContext;

    EXPECT_FALSE(defaultOsContext->getEngineType().id == nonDefaultOsContext->getEngineType().id &&
                 defaultOsContext->getEngineType().type == nonDefaultOsContext->getEngineType().type);

-    auto memoryManager = device->getExecutionEnvironment()->memoryManager.get();
    auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});

-    nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress() + 1;
-    nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress() + 1;
-    graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId());
+    nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress();
+    nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress();
+    graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress(), nonDefaultOsContext->getContextId());
    graphicsAllocation->updateTaskCount(0, defaultOsContext->getContextId()); // used and ready

+    EXPECT_TRUE(graphicsAllocation->isUsedByManyOsContexts());
+
    memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
-    EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead());
-    EXPECT_EQ(nullptr, defaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead());
-    (*nonDefaultCsr->getTagAddress())++;
-    // no need to call freeGraphicsAllocation
+    EXPECT_EQ(1, multiContextDestructor->deferDeletionCalled);
+    EXPECT_TRUE(nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
+    EXPECT_TRUE(defaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
 }

 TEST(GraphicsAllocation, givenSharedHandleBasedConstructorWhenGraphicsAllocationIsCreatedThenGpuAddressHasCorrectValue) {
--- a/unit_tests/mocks/mock_memory_manager.h
+++ b/unit_tests/mocks/mock_memory_manager.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017-2018 Intel Corporation
+ * Copyright (C) 2017-2019 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -21,6 +21,7 @@ class MockMemoryManager : public OsAgnosticMemoryManager {
    using MemoryManager::allocateGraphicsMemoryInPreferredPool;
    using MemoryManager::AllocationData;
    using MemoryManager::getAllocationData;
+    using MemoryManager::multiContextResourceDestructor;
    using MemoryManager::registeredOsContexts;
    using OsAgnosticMemoryManager::allocateGraphicsMemoryForImageFromHostPtr;
    using OsAgnosticMemoryManager::OsAgnosticMemoryManager;