Correct destruction logic of shared allocations

wait for all os contexts that used the allocation
when os context is not ready then flush related command stream receiver

Change-Id: I5fb2c16c1d398c59fbd02e32ebbbb9254583244e
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2019-01-03 14:48:24 +01:00
committed by sys_ocldev
parent 1ce3898400
commit b5d9ed77a6
7 changed files with 99 additions and 31 deletions

View File

@@ -24,6 +24,8 @@ bool DeferrableAllocationDeletion::apply() {
auto currentContextTaskCount = *csr->getTagAddress();
if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
graphicsAllocation.releaseUsageInOsContext(contextId);
} else {
csr->flushBatchedSubmissions();
}
}
}

View File

@@ -134,6 +134,7 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
bool isResidencyTaskCountBelow(uint32_t taskCount, uint32_t contextId) { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; }
bool isMultiOsContextCapable() const { return multiOsContextCapable; }
bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; }
protected:
constexpr static uint32_t objectNotResident = (uint32_t)-1;

View File

@@ -15,6 +15,7 @@
#include "runtime/helpers/basic_math.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/options.h"
#include "runtime/memory_manager/deferrable_allocation_deletion.h"
#include "runtime/memory_manager/deferred_deleter.h"
#include "runtime/memory_manager/host_ptr_manager.h"
#include "runtime/memory_manager/internal_allocation_storage.h"
@@ -30,7 +31,8 @@ MemoryManager::MemoryManager(bool enable64kbpages, bool enableLocalMemory,
ExecutionEnvironment &executionEnvironment) : allocator32Bit(nullptr), enable64kbpages(enable64kbpages),
localMemorySupported(enableLocalMemory),
executionEnvironment(executionEnvironment),
hostPtrManager(std::make_unique<HostPtrManager>()) {
hostPtrManager(std::make_unique<HostPtrManager>()),
multiContextResourceDestructor(std::make_unique<DeferredDeleter>()) {
registeredOsContexts.resize(1);
};
@@ -135,20 +137,23 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
//if not in use destroy in place
//if in use pass to temporary allocation list that is cleaned on blocking calls
void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
if (!gfxAllocation->isUsed()) {
freeGraphicsMemory(gfxAllocation);
return;
}
for (auto &csr : getCommandStreamReceivers()[0]) {
if (csr) {
auto osContextId = csr->getOsContext().getContextId();
auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId);
if (gfxAllocation->isUsedByOsContext(osContextId) &&
allocationTaskCount > *csr->getTagAddress()) {
csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
return;
if (gfxAllocation->isUsed()) {
if (gfxAllocation->isUsedByManyOsContexts()) {
multiContextResourceDestructor->deferDeletion(new DeferrableAllocationDeletion{*this, *gfxAllocation});
multiContextResourceDestructor->drain(false);
return;
}
for (auto &deviceCsrs : getCommandStreamReceivers()) {
for (auto &csr : deviceCsrs) {
if (csr) {
auto osContextId = csr->getOsContext().getContextId();
auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId);
if (gfxAllocation->isUsedByOsContext(osContextId) &&
allocationTaskCount > *csr->getTagAddress()) {
csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
return;
}
}
}
}
}

View File

@@ -259,6 +259,7 @@ class MemoryManager {
std::unique_ptr<HostPtrManager> hostPtrManager;
uint32_t latestContextId = std::numeric_limits<uint32_t>::max();
uint32_t defaultEngineIndex = 0;
std::unique_ptr<DeferredDeleter> multiContextResourceDestructor;
};
std::unique_ptr<DeferredDeleter> createDeferredDeleter();

View File

@@ -11,9 +11,11 @@
#include "runtime/memory_manager/deferrable_allocation_deletion.h"
#include "runtime/os_interface/os_context.h"
#include "unit_tests/libult/ult_command_stream_receiver.h"
#include "unit_tests/mocks/mock_allocation_properties.h"
#include "unit_tests/mocks/mock_device.h"
#include "unit_tests/mocks/mock_memory_manager.h"
#include "gtest/gtest.h"
#include "test.h"
using namespace OCLRT;
@@ -38,7 +40,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test {
auto executionEnvironment = std::make_unique<ExecutionEnvironment>();
memoryManager = new MockMemoryManager(*executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager);
device1.reset(Device::create<Device>(nullptr, executionEnvironment.release(), 0u));
device1.reset(Device::create<MockDevice>(nullptr, executionEnvironment.release(), 0u));
hwTag = device1->getDefaultEngine().commandStreamReceiver->getTagAddress();
device1ContextId = device1->getDefaultEngine().osContext->getContextId();
asyncDeleter = std::make_unique<DeferredDeleterPublic>();
@@ -50,7 +52,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test {
}
std::unique_ptr<DeferredDeleterPublic> asyncDeleter;
MockMemoryManager *memoryManager = nullptr;
std::unique_ptr<Device> device1;
std::unique_ptr<MockDevice> device1;
uint32_t device1ContextId = 0;
volatile uint32_t *hwTag = nullptr;
};
@@ -76,8 +78,8 @@ TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenW
EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryCalled);
}
TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContexts) {
std::unique_ptr<Device> device2(Device::create<Device>(nullptr, device1->getExecutionEnvironment(), 1u));
HWTEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContextsAndFlushNotReadyCsr) {
std::unique_ptr<MockDevice> device2(Device::create<MockDevice>(nullptr, device1->getExecutionEnvironment(), 1u));
auto device2ContextId = device2->getDefaultEngine().osContext->getContextId();
EXPECT_EQ(gpgpuEngineInstances.size() * 2, memoryManager->getOsContextCount());
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
@@ -88,10 +90,14 @@ TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenA
EXPECT_TRUE(allocation->isUsedByOsContext(device1ContextId));
EXPECT_TRUE(allocation->isUsedByOsContext(device2ContextId));
EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled);
EXPECT_FALSE(device1->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
EXPECT_FALSE(device2->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
asyncDeleter->deferDeletion(new DeferrableAllocationDeletion(*memoryManager, *allocation));
while (allocation->isUsedByOsContext(device2ContextId)) // wait for second context completion signal
std::this_thread::yield();
EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled);
EXPECT_TRUE(device1->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
EXPECT_FALSE(device2->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
asyncDeleter->allowExit = true;
*hwTag = 1u;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -1398,29 +1398,81 @@ TEST(GraphicsAllocation, givenCpuPointerBasedConstructorWhenGraphicsAllocationIs
using GraphicsAllocationTests = ::testing::Test;
HWTEST_F(GraphicsAllocationTests, givenAllocationUsedByNonDefaultCsrWhenCheckingUsageBeforeDestroyThenStoreIt) {
HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultCsrWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) {
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
auto nonDefaultOsContext = device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).osContext;
auto nonDefaultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).commandStreamReceiver);
auto memoryManager = device->getExecutionEnvironment()->memoryManager.get();
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress() + 1;
nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress() + 1;
graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId());
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead());
(*nonDefaultCsr->getTagAddress())++;
// no need to call freeGraphicsAllocation
}
HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultDeviceWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) {
ExecutionEnvironment executionEnvironment;
executionEnvironment.incRefInternal();
auto defaultDevice = std::unique_ptr<MockDevice>(Device::create<MockDevice>(platformDevices[0], &executionEnvironment, 0u));
auto nonDefaultDevice = std::unique_ptr<MockDevice>(Device::create<MockDevice>(platformDevices[0], &executionEnvironment, 1u));
auto engine = nonDefaultDevice->getDefaultEngine();
auto commandStreamReceiver = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(engine.commandStreamReceiver);
auto osContextId = engine.osContext->getContextId();
auto memoryManager = executionEnvironment.memoryManager.get();
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
auto notReadyTaskCount = *commandStreamReceiver->getTagAddress() + 1;
EXPECT_NE(defaultDevice->getDeviceIndex(), nonDefaultDevice->getDeviceIndex());
EXPECT_EQ(2u, memoryManager->getCommandStreamReceivers().size());
commandStreamReceiver->taskCount = notReadyTaskCount;
commandStreamReceiver->latestFlushedTaskCount = notReadyTaskCount;
graphicsAllocation->updateTaskCount(notReadyTaskCount, osContextId);
EXPECT_TRUE(commandStreamReceiver->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
EXPECT_FALSE(commandStreamReceiver->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
(*commandStreamReceiver->getTagAddress())++;
// no need to call freeGraphicsAllocation
}
HWTEST_F(GraphicsAllocationTests, givenAllocationUsedByManyOsContextsWhenCheckingUsageBeforeDestroyThenMultiContextDestructorIsUsedForWaitingForAllOsContexts) {
ExecutionEnvironment executionEnvironment;
executionEnvironment.incRefInternal();
auto memoryManager = new MockMemoryManager(false, false, executionEnvironment);
executionEnvironment.memoryManager.reset(memoryManager);
auto multiContextDestructor = new MockDeferredDeleter();
multiContextDestructor->expectDrainBlockingValue(false);
memoryManager->multiContextResourceDestructor.reset(multiContextDestructor);
auto device = std::unique_ptr<MockDevice>(MockDevice::create<MockDevice>(platformDevices[0], &executionEnvironment, 0u));
auto nonDefaultOsContext = device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).osContext;
auto nonDefaultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).commandStreamReceiver);
auto defaultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getDefaultEngine().commandStreamReceiver);
auto defaultOsContext = device->getDefaultEngine().osContext;
EXPECT_FALSE(defaultOsContext->getEngineType().id == nonDefaultOsContext->getEngineType().id &&
defaultOsContext->getEngineType().type == nonDefaultOsContext->getEngineType().type);
auto memoryManager = device->getExecutionEnvironment()->memoryManager.get();
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress() + 1;
nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress() + 1;
graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId());
nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress();
nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress();
graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress(), nonDefaultOsContext->getContextId());
graphicsAllocation->updateTaskCount(0, defaultOsContext->getContextId()); // used and ready
EXPECT_TRUE(graphicsAllocation->isUsedByManyOsContexts());
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead());
EXPECT_EQ(nullptr, defaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead());
(*nonDefaultCsr->getTagAddress())++;
// no need to call freeGraphicsAllocation
EXPECT_EQ(1, multiContextDestructor->deferDeletionCalled);
EXPECT_TRUE(nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(defaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
}
TEST(GraphicsAllocation, givenSharedHandleBasedConstructorWhenGraphicsAllocationIsCreatedThenGpuAddressHasCorrectValue) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -21,6 +21,7 @@ class MockMemoryManager : public OsAgnosticMemoryManager {
using MemoryManager::allocateGraphicsMemoryInPreferredPool;
using MemoryManager::AllocationData;
using MemoryManager::getAllocationData;
using MemoryManager::multiContextResourceDestructor;
using MemoryManager::registeredOsContexts;
using OsAgnosticMemoryManager::allocateGraphicsMemoryForImageFromHostPtr;
using OsAgnosticMemoryManager::OsAgnosticMemoryManager;