mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
Correct destruction logic of shared allocations
wait for all os contexts that used the allocation when os context is not ready then flush related command stream receiver Change-Id: I5fb2c16c1d398c59fbd02e32ebbbb9254583244e Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
1ce3898400
commit
b5d9ed77a6
@@ -24,6 +24,8 @@ bool DeferrableAllocationDeletion::apply() {
|
||||
auto currentContextTaskCount = *csr->getTagAddress();
|
||||
if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
|
||||
graphicsAllocation.releaseUsageInOsContext(contextId);
|
||||
} else {
|
||||
csr->flushBatchedSubmissions();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,6 +134,7 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
|
||||
bool isResidencyTaskCountBelow(uint32_t taskCount, uint32_t contextId) { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; }
|
||||
|
||||
bool isMultiOsContextCapable() const { return multiOsContextCapable; }
|
||||
bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; }
|
||||
|
||||
protected:
|
||||
constexpr static uint32_t objectNotResident = (uint32_t)-1;
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "runtime/helpers/basic_math.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/memory_manager/deferrable_allocation_deletion.h"
|
||||
#include "runtime/memory_manager/deferred_deleter.h"
|
||||
#include "runtime/memory_manager/host_ptr_manager.h"
|
||||
#include "runtime/memory_manager/internal_allocation_storage.h"
|
||||
@@ -30,7 +31,8 @@ MemoryManager::MemoryManager(bool enable64kbpages, bool enableLocalMemory,
|
||||
ExecutionEnvironment &executionEnvironment) : allocator32Bit(nullptr), enable64kbpages(enable64kbpages),
|
||||
localMemorySupported(enableLocalMemory),
|
||||
executionEnvironment(executionEnvironment),
|
||||
hostPtrManager(std::make_unique<HostPtrManager>()) {
|
||||
hostPtrManager(std::make_unique<HostPtrManager>()),
|
||||
multiContextResourceDestructor(std::make_unique<DeferredDeleter>()) {
|
||||
registeredOsContexts.resize(1);
|
||||
};
|
||||
|
||||
@@ -135,20 +137,23 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
|
||||
//if not in use destroy in place
|
||||
//if in use pass to temporary allocation list that is cleaned on blocking calls
|
||||
void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
|
||||
if (!gfxAllocation->isUsed()) {
|
||||
freeGraphicsMemory(gfxAllocation);
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto &csr : getCommandStreamReceivers()[0]) {
|
||||
if (csr) {
|
||||
auto osContextId = csr->getOsContext().getContextId();
|
||||
auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId);
|
||||
|
||||
if (gfxAllocation->isUsedByOsContext(osContextId) &&
|
||||
allocationTaskCount > *csr->getTagAddress()) {
|
||||
csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
|
||||
return;
|
||||
if (gfxAllocation->isUsed()) {
|
||||
if (gfxAllocation->isUsedByManyOsContexts()) {
|
||||
multiContextResourceDestructor->deferDeletion(new DeferrableAllocationDeletion{*this, *gfxAllocation});
|
||||
multiContextResourceDestructor->drain(false);
|
||||
return;
|
||||
}
|
||||
for (auto &deviceCsrs : getCommandStreamReceivers()) {
|
||||
for (auto &csr : deviceCsrs) {
|
||||
if (csr) {
|
||||
auto osContextId = csr->getOsContext().getContextId();
|
||||
auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId);
|
||||
if (gfxAllocation->isUsedByOsContext(osContextId) &&
|
||||
allocationTaskCount > *csr->getTagAddress()) {
|
||||
csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -259,6 +259,7 @@ class MemoryManager {
|
||||
std::unique_ptr<HostPtrManager> hostPtrManager;
|
||||
uint32_t latestContextId = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t defaultEngineIndex = 0;
|
||||
std::unique_ptr<DeferredDeleter> multiContextResourceDestructor;
|
||||
};
|
||||
|
||||
std::unique_ptr<DeferredDeleter> createDeferredDeleter();
|
||||
|
||||
@@ -11,9 +11,11 @@
|
||||
#include "runtime/memory_manager/deferrable_allocation_deletion.h"
|
||||
#include "runtime/os_interface/os_context.h"
|
||||
|
||||
#include "unit_tests/libult/ult_command_stream_receiver.h"
|
||||
#include "unit_tests/mocks/mock_allocation_properties.h"
|
||||
#include "unit_tests/mocks/mock_device.h"
|
||||
#include "unit_tests/mocks/mock_memory_manager.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
@@ -38,7 +40,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test {
|
||||
auto executionEnvironment = std::make_unique<ExecutionEnvironment>();
|
||||
memoryManager = new MockMemoryManager(*executionEnvironment);
|
||||
executionEnvironment->memoryManager.reset(memoryManager);
|
||||
device1.reset(Device::create<Device>(nullptr, executionEnvironment.release(), 0u));
|
||||
device1.reset(Device::create<MockDevice>(nullptr, executionEnvironment.release(), 0u));
|
||||
hwTag = device1->getDefaultEngine().commandStreamReceiver->getTagAddress();
|
||||
device1ContextId = device1->getDefaultEngine().osContext->getContextId();
|
||||
asyncDeleter = std::make_unique<DeferredDeleterPublic>();
|
||||
@@ -50,7 +52,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test {
|
||||
}
|
||||
std::unique_ptr<DeferredDeleterPublic> asyncDeleter;
|
||||
MockMemoryManager *memoryManager = nullptr;
|
||||
std::unique_ptr<Device> device1;
|
||||
std::unique_ptr<MockDevice> device1;
|
||||
uint32_t device1ContextId = 0;
|
||||
volatile uint32_t *hwTag = nullptr;
|
||||
};
|
||||
@@ -76,8 +78,8 @@ TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenW
|
||||
EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryCalled);
|
||||
}
|
||||
|
||||
TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContexts) {
|
||||
std::unique_ptr<Device> device2(Device::create<Device>(nullptr, device1->getExecutionEnvironment(), 1u));
|
||||
HWTEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContextsAndFlushNotReadyCsr) {
|
||||
std::unique_ptr<MockDevice> device2(Device::create<MockDevice>(nullptr, device1->getExecutionEnvironment(), 1u));
|
||||
auto device2ContextId = device2->getDefaultEngine().osContext->getContextId();
|
||||
EXPECT_EQ(gpgpuEngineInstances.size() * 2, memoryManager->getOsContextCount());
|
||||
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
|
||||
@@ -88,10 +90,14 @@ TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenA
|
||||
EXPECT_TRUE(allocation->isUsedByOsContext(device1ContextId));
|
||||
EXPECT_TRUE(allocation->isUsedByOsContext(device2ContextId));
|
||||
EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled);
|
||||
EXPECT_FALSE(device1->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
|
||||
EXPECT_FALSE(device2->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
|
||||
asyncDeleter->deferDeletion(new DeferrableAllocationDeletion(*memoryManager, *allocation));
|
||||
while (allocation->isUsedByOsContext(device2ContextId)) // wait for second context completion signal
|
||||
std::this_thread::yield();
|
||||
EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled);
|
||||
EXPECT_TRUE(device1->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
|
||||
EXPECT_FALSE(device2->getUltCommandStreamReceiver<FamilyType>().flushBatchedSubmissionsCalled);
|
||||
asyncDeleter->allowExit = true;
|
||||
*hwTag = 1u;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -1398,29 +1398,81 @@ TEST(GraphicsAllocation, givenCpuPointerBasedConstructorWhenGraphicsAllocationIs
|
||||
|
||||
using GraphicsAllocationTests = ::testing::Test;
|
||||
|
||||
HWTEST_F(GraphicsAllocationTests, givenAllocationUsedByNonDefaultCsrWhenCheckingUsageBeforeDestroyThenStoreIt) {
|
||||
HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultCsrWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) {
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
auto nonDefaultOsContext = device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).osContext;
|
||||
auto nonDefaultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).commandStreamReceiver);
|
||||
|
||||
auto memoryManager = device->getExecutionEnvironment()->memoryManager.get();
|
||||
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
|
||||
|
||||
nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress() + 1;
|
||||
nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress() + 1;
|
||||
graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId());
|
||||
|
||||
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
|
||||
EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead());
|
||||
(*nonDefaultCsr->getTagAddress())++;
|
||||
// no need to call freeGraphicsAllocation
|
||||
}
|
||||
|
||||
HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultDeviceWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) {
|
||||
ExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.incRefInternal();
|
||||
auto defaultDevice = std::unique_ptr<MockDevice>(Device::create<MockDevice>(platformDevices[0], &executionEnvironment, 0u));
|
||||
auto nonDefaultDevice = std::unique_ptr<MockDevice>(Device::create<MockDevice>(platformDevices[0], &executionEnvironment, 1u));
|
||||
auto engine = nonDefaultDevice->getDefaultEngine();
|
||||
auto commandStreamReceiver = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(engine.commandStreamReceiver);
|
||||
auto osContextId = engine.osContext->getContextId();
|
||||
auto memoryManager = executionEnvironment.memoryManager.get();
|
||||
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
|
||||
auto notReadyTaskCount = *commandStreamReceiver->getTagAddress() + 1;
|
||||
|
||||
EXPECT_NE(defaultDevice->getDeviceIndex(), nonDefaultDevice->getDeviceIndex());
|
||||
EXPECT_EQ(2u, memoryManager->getCommandStreamReceivers().size());
|
||||
|
||||
commandStreamReceiver->taskCount = notReadyTaskCount;
|
||||
commandStreamReceiver->latestFlushedTaskCount = notReadyTaskCount;
|
||||
graphicsAllocation->updateTaskCount(notReadyTaskCount, osContextId);
|
||||
|
||||
EXPECT_TRUE(commandStreamReceiver->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
|
||||
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
|
||||
EXPECT_FALSE(commandStreamReceiver->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
|
||||
(*commandStreamReceiver->getTagAddress())++;
|
||||
// no need to call freeGraphicsAllocation
|
||||
}
|
||||
|
||||
HWTEST_F(GraphicsAllocationTests, givenAllocationUsedByManyOsContextsWhenCheckingUsageBeforeDestroyThenMultiContextDestructorIsUsedForWaitingForAllOsContexts) {
|
||||
ExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.incRefInternal();
|
||||
auto memoryManager = new MockMemoryManager(false, false, executionEnvironment);
|
||||
executionEnvironment.memoryManager.reset(memoryManager);
|
||||
auto multiContextDestructor = new MockDeferredDeleter();
|
||||
multiContextDestructor->expectDrainBlockingValue(false);
|
||||
memoryManager->multiContextResourceDestructor.reset(multiContextDestructor);
|
||||
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::create<MockDevice>(platformDevices[0], &executionEnvironment, 0u));
|
||||
auto nonDefaultOsContext = device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).osContext;
|
||||
auto nonDefaultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getEngine(EngineInstanceConstants::lowPriorityGpgpuEngineIndex).commandStreamReceiver);
|
||||
auto defaultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(device->getDefaultEngine().commandStreamReceiver);
|
||||
auto defaultOsContext = device->getDefaultEngine().osContext;
|
||||
|
||||
EXPECT_FALSE(defaultOsContext->getEngineType().id == nonDefaultOsContext->getEngineType().id &&
|
||||
defaultOsContext->getEngineType().type == nonDefaultOsContext->getEngineType().type);
|
||||
|
||||
auto memoryManager = device->getExecutionEnvironment()->memoryManager.get();
|
||||
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
|
||||
|
||||
nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress() + 1;
|
||||
nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress() + 1;
|
||||
graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId());
|
||||
nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress();
|
||||
nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress();
|
||||
graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress(), nonDefaultOsContext->getContextId());
|
||||
graphicsAllocation->updateTaskCount(0, defaultOsContext->getContextId()); // used and ready
|
||||
|
||||
EXPECT_TRUE(graphicsAllocation->isUsedByManyOsContexts());
|
||||
|
||||
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
|
||||
EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead());
|
||||
EXPECT_EQ(nullptr, defaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead());
|
||||
(*nonDefaultCsr->getTagAddress())++;
|
||||
// no need to call freeGraphicsAllocation
|
||||
EXPECT_EQ(1, multiContextDestructor->deferDeletionCalled);
|
||||
EXPECT_TRUE(nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
|
||||
EXPECT_TRUE(defaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty());
|
||||
}
|
||||
|
||||
TEST(GraphicsAllocation, givenSharedHandleBasedConstructorWhenGraphicsAllocationIsCreatedThenGpuAddressHasCorrectValue) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -21,6 +21,7 @@ class MockMemoryManager : public OsAgnosticMemoryManager {
|
||||
using MemoryManager::allocateGraphicsMemoryInPreferredPool;
|
||||
using MemoryManager::AllocationData;
|
||||
using MemoryManager::getAllocationData;
|
||||
using MemoryManager::multiContextResourceDestructor;
|
||||
using MemoryManager::registeredOsContexts;
|
||||
using OsAgnosticMemoryManager::allocateGraphicsMemoryForImageFromHostPtr;
|
||||
using OsAgnosticMemoryManager::OsAgnosticMemoryManager;
|
||||
|
||||
Reference in New Issue
Block a user