diff --git a/runtime/memory_manager/deferrable_allocation_deletion.cpp b/runtime/memory_manager/deferrable_allocation_deletion.cpp index 38a59009aa..2668fb99e2 100644 --- a/runtime/memory_manager/deferrable_allocation_deletion.cpp +++ b/runtime/memory_manager/deferrable_allocation_deletion.cpp @@ -24,6 +24,8 @@ bool DeferrableAllocationDeletion::apply() { auto currentContextTaskCount = *csr->getTagAddress(); if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) { graphicsAllocation.releaseUsageInOsContext(contextId); + } else { + csr->flushBatchedSubmissions(); } } } diff --git a/runtime/memory_manager/graphics_allocation.h b/runtime/memory_manager/graphics_allocation.h index 2fe02e6f54..08e98eb47d 100644 --- a/runtime/memory_manager/graphics_allocation.h +++ b/runtime/memory_manager/graphics_allocation.h @@ -134,6 +134,7 @@ class GraphicsAllocation : public IDNode { bool isResidencyTaskCountBelow(uint32_t taskCount, uint32_t contextId) { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; } bool isMultiOsContextCapable() const { return multiOsContextCapable; } + bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; } protected: constexpr static uint32_t objectNotResident = (uint32_t)-1; diff --git a/runtime/memory_manager/memory_manager.cpp b/runtime/memory_manager/memory_manager.cpp index a1a7f04fd1..8fa7a82ff4 100644 --- a/runtime/memory_manager/memory_manager.cpp +++ b/runtime/memory_manager/memory_manager.cpp @@ -15,6 +15,7 @@ #include "runtime/helpers/basic_math.h" #include "runtime/helpers/kernel_commands.h" #include "runtime/helpers/options.h" +#include "runtime/memory_manager/deferrable_allocation_deletion.h" #include "runtime/memory_manager/deferred_deleter.h" #include "runtime/memory_manager/host_ptr_manager.h" #include "runtime/memory_manager/internal_allocation_storage.h" @@ -30,7 +31,8 @@ MemoryManager::MemoryManager(bool enable64kbpages, bool enableLocalMemory, ExecutionEnvironment &executionEnvironment) : allocator32Bit(nullptr), enable64kbpages(enable64kbpages), localMemorySupported(enableLocalMemory), executionEnvironment(executionEnvironment), - hostPtrManager(std::make_unique()) { + hostPtrManager(std::make_unique()), + multiContextResourceDestructor(std::make_unique()) { registeredOsContexts.resize(1); }; @@ -135,20 +137,23 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) { //if not in use destroy in place //if in use pass to temporary allocation list that is cleaned on blocking calls void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) { - if (!gfxAllocation->isUsed()) { - freeGraphicsMemory(gfxAllocation); - return; - } - - for (auto &csr : getCommandStreamReceivers()[0]) { - if (csr) { - auto osContextId = csr->getOsContext().getContextId(); - auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId); - - if (gfxAllocation->isUsedByOsContext(osContextId) && - allocationTaskCount > *csr->getTagAddress()) { - csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(gfxAllocation), TEMPORARY_ALLOCATION); - return; + if (gfxAllocation->isUsed()) { + if (gfxAllocation->isUsedByManyOsContexts()) { + multiContextResourceDestructor->deferDeletion(new DeferrableAllocationDeletion{*this, *gfxAllocation}); + multiContextResourceDestructor->drain(false); + return; + } + for (auto &deviceCsrs : getCommandStreamReceivers()) { + for (auto &csr : deviceCsrs) { + if (csr) { + auto osContextId = csr->getOsContext().getContextId(); + auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId); + if (gfxAllocation->isUsedByOsContext(osContextId) && + allocationTaskCount > *csr->getTagAddress()) { + csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(gfxAllocation), TEMPORARY_ALLOCATION); + return; + } + } } } } diff --git a/runtime/memory_manager/memory_manager.h b/runtime/memory_manager/memory_manager.h index 5aae7488d0..34a6a45ab7 100644 --- a/runtime/memory_manager/memory_manager.h +++ b/runtime/memory_manager/memory_manager.h @@ -259,6 +259,7 @@ class MemoryManager { std::unique_ptr hostPtrManager; uint32_t latestContextId = std::numeric_limits::max(); uint32_t defaultEngineIndex = 0; + std::unique_ptr multiContextResourceDestructor; }; std::unique_ptr createDeferredDeleter(); diff --git a/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp b/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp index 8e2f97653f..891cc78605 100644 --- a/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp +++ b/unit_tests/memory_manager/deferrable_allocation_deletion_tests.cpp @@ -11,9 +11,11 @@ #include "runtime/memory_manager/deferrable_allocation_deletion.h" #include "runtime/os_interface/os_context.h" +#include "unit_tests/libult/ult_command_stream_receiver.h" #include "unit_tests/mocks/mock_allocation_properties.h" +#include "unit_tests/mocks/mock_device.h" #include "unit_tests/mocks/mock_memory_manager.h" -#include "gtest/gtest.h" +#include "test.h" using namespace OCLRT; @@ -38,7 +40,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test { auto executionEnvironment = std::make_unique(); memoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); - device1.reset(Device::create(nullptr, executionEnvironment.release(), 0u)); + device1.reset(Device::create(nullptr, executionEnvironment.release(), 0u)); hwTag = device1->getDefaultEngine().commandStreamReceiver->getTagAddress(); device1ContextId = device1->getDefaultEngine().osContext->getContextId(); asyncDeleter = std::make_unique(); @@ -50,7 +52,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test { } std::unique_ptr asyncDeleter; MockMemoryManager *memoryManager = nullptr; - std::unique_ptr device1; + std::unique_ptr device1; uint32_t device1ContextId = 0; volatile uint32_t *hwTag = nullptr; }; @@ -76,8 +78,8 @@ TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenW EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryCalled); } -TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContexts) { - std::unique_ptr device2(Device::create(nullptr, device1->getExecutionEnvironment(), 1u)); +HWTEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContextsAndFlushNotReadyCsr) { + std::unique_ptr device2(Device::create(nullptr, device1->getExecutionEnvironment(), 1u)); auto device2ContextId = device2->getDefaultEngine().osContext->getContextId(); EXPECT_EQ(gpgpuEngineInstances.size() * 2, memoryManager->getOsContextCount()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize}); @@ -88,10 +90,14 @@ TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenA EXPECT_TRUE(allocation->isUsedByOsContext(device1ContextId)); EXPECT_TRUE(allocation->isUsedByOsContext(device2ContextId)); EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled); + EXPECT_FALSE(device1->getUltCommandStreamReceiver().flushBatchedSubmissionsCalled); + EXPECT_FALSE(device2->getUltCommandStreamReceiver().flushBatchedSubmissionsCalled); asyncDeleter->deferDeletion(new DeferrableAllocationDeletion(*memoryManager, *allocation)); while (allocation->isUsedByOsContext(device2ContextId)) // wait for second context completion signal std::this_thread::yield(); EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled); + EXPECT_TRUE(device1->getUltCommandStreamReceiver().flushBatchedSubmissionsCalled); + EXPECT_FALSE(device2->getUltCommandStreamReceiver().flushBatchedSubmissionsCalled); asyncDeleter->allowExit = true; *hwTag = 1u; } diff --git a/unit_tests/memory_manager/memory_manager_tests.cpp b/unit_tests/memory_manager/memory_manager_tests.cpp index 02f6db108f..339bbcfb9c 100644 --- a/unit_tests/memory_manager/memory_manager_tests.cpp +++ b/unit_tests/memory_manager/memory_manager_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1398,29 +1398,81 @@ TEST(GraphicsAllocation, givenCpuPointerBasedConstructorWhenGraphicsAllocationIs using GraphicsAllocationTests = ::testing::Test; -HWTEST_F(GraphicsAllocationTests, givenAllocationUsedByNonDefaultCsrWhenCheckingUsageBeforeDestroyThenStoreIt) { +HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultCsrWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); auto nonDefaultOsContext = device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).osContext; auto nonDefaultCsr = reinterpret_cast *>(device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).commandStreamReceiver); + + auto memoryManager = device->getExecutionEnvironment()->memoryManager.get(); + auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize}); + + nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress() + 1; + nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress() + 1; + graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId()); + + memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); + EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead()); + (*nonDefaultCsr->getTagAddress())++; + // no need to call freeGraphicsAllocation +} + +HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultDeviceWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) { + ExecutionEnvironment executionEnvironment; + executionEnvironment.incRefInternal(); + auto defaultDevice = std::unique_ptr(Device::create(platformDevices[0], &executionEnvironment, 0u)); + auto nonDefaultDevice = std::unique_ptr(Device::create(platformDevices[0], &executionEnvironment, 1u)); + auto engine = nonDefaultDevice->getDefaultEngine(); + auto commandStreamReceiver = reinterpret_cast *>(engine.commandStreamReceiver); + auto osContextId = engine.osContext->getContextId(); + auto memoryManager = executionEnvironment.memoryManager.get(); + auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize}); + auto notReadyTaskCount = *commandStreamReceiver->getTagAddress() + 1; + + EXPECT_NE(defaultDevice->getDeviceIndex(), nonDefaultDevice->getDeviceIndex()); + EXPECT_EQ(2u, memoryManager->getCommandStreamReceivers().size()); + + commandStreamReceiver->taskCount = notReadyTaskCount; + commandStreamReceiver->latestFlushedTaskCount = notReadyTaskCount; + graphicsAllocation->updateTaskCount(notReadyTaskCount, osContextId); + + EXPECT_TRUE(commandStreamReceiver->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); + memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); + EXPECT_FALSE(commandStreamReceiver->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); + (*commandStreamReceiver->getTagAddress())++; + // no need to call freeGraphicsAllocation +} + +HWTEST_F(GraphicsAllocationTests, givenAllocationUsedByManyOsContextsWhenCheckingUsageBeforeDestroyThenMultiContextDestructorIsUsedForWaitingForAllOsContexts) { + ExecutionEnvironment executionEnvironment; + executionEnvironment.incRefInternal(); + auto memoryManager = new MockMemoryManager(false, false, executionEnvironment); + executionEnvironment.memoryManager.reset(memoryManager); + auto multiContextDestructor = new MockDeferredDeleter(); + multiContextDestructor->expectDrainBlockingValue(false); + memoryManager->multiContextResourceDestructor.reset(multiContextDestructor); + + auto device = std::unique_ptr(MockDevice::create(platformDevices[0], &executionEnvironment, 0u)); + auto nonDefaultOsContext = device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).osContext; + auto nonDefaultCsr = reinterpret_cast *>(device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).commandStreamReceiver); auto defaultCsr = reinterpret_cast *>(device->getDefaultEngine().commandStreamReceiver); auto defaultOsContext = device->getDefaultEngine().osContext; EXPECT_FALSE(defaultOsContext->getEngineType().id == nonDefaultOsContext->getEngineType().id && defaultOsContext->getEngineType().type == nonDefaultOsContext->getEngineType().type); - auto memoryManager = device->getExecutionEnvironment()->memoryManager.get(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize}); - nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress() + 1; - nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress() + 1; - graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId()); + nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress(); + nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress(); + graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress(), nonDefaultOsContext->getContextId()); graphicsAllocation->updateTaskCount(0, defaultOsContext->getContextId()); // used and ready + EXPECT_TRUE(graphicsAllocation->isUsedByManyOsContexts()); + memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); - EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead()); - EXPECT_EQ(nullptr, defaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead()); - (*nonDefaultCsr->getTagAddress())++; - // no need to call freeGraphicsAllocation + EXPECT_EQ(1, multiContextDestructor->deferDeletionCalled); + EXPECT_TRUE(nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(defaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); } TEST(GraphicsAllocation, givenSharedHandleBasedConstructorWhenGraphicsAllocationIsCreatedThenGpuAddressHasCorrectValue) { diff --git a/unit_tests/mocks/mock_memory_manager.h b/unit_tests/mocks/mock_memory_manager.h index f955d7c09d..9caa3fb19d 100644 --- a/unit_tests/mocks/mock_memory_manager.h +++ b/unit_tests/mocks/mock_memory_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -21,6 +21,7 @@ class MockMemoryManager : public OsAgnosticMemoryManager { using MemoryManager::allocateGraphicsMemoryInPreferredPool; using MemoryManager::AllocationData; using MemoryManager::getAllocationData; + using MemoryManager::multiContextResourceDestructor; using MemoryManager::registeredOsContexts; using OsAgnosticMemoryManager::allocateGraphicsMemoryForImageFromHostPtr; using OsAgnosticMemoryManager::OsAgnosticMemoryManager;