Allocate per-context memory with specific GPU VA

Related-To: NEO-4829

Change-Id: I821f6709bfa98df3b51a1c966b7a953752d85f74
Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2020-07-01 10:38:19 +02:00
committed by sys_ocldev
parent 1f63f39d77
commit fea4472553
12 changed files with 276 additions and 0 deletions

View File

@@ -91,6 +91,12 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryForNonSvmHost
return memoryAllocation;
}
GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryWithGpuVa(const AllocationData &allocationData) {
auto memoryAllocation = static_cast<MemoryAllocation *>(allocateGraphicsMemoryWithAlignment(allocationData));
memoryAllocation->setCpuPtrAndGpuAddress(memoryAllocation->getUnderlyingBuffer(), allocationData.gpuAddress);
return memoryAllocation;
}
GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemory64kb(const AllocationData &allocationData) {
AllocationData allocationData64kb = allocationData;
allocationData64kb.size = alignUp(allocationData.size, MemoryConstants::pageSize64k);
@@ -355,4 +361,18 @@ MemoryAllocation *OsAgnosticMemoryManager::createMemoryAllocation(GraphicsAlloca
return memoryAllocation;
}
AddressRange OsAgnosticMemoryManager::reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) {
auto gfxPartition = getGfxPartition(rootDeviceIndex);
auto gpuVa = GmmHelper::canonize(gfxPartition->heapAllocate(HeapIndex::HEAP_STANDARD, size));
return AddressRange{gpuVa, size};
}
void OsAgnosticMemoryManager::freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) {
uint64_t graphicsAddress = addressRange.address;
graphicsAddress = GmmHelper::decanonize(graphicsAddress);
auto gfxPartition = getGfxPartition(rootDeviceIndex);
gfxPartition->freeGpuAddressRange(graphicsAddress, addressRange.size);
}
} // namespace NEO

View File

@@ -79,6 +79,9 @@ class OsAgnosticMemoryManager : public MemoryManager {
void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override;
void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override;
AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override;
void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override;
protected:
GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) override;
GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) override;
@@ -86,6 +89,7 @@ class OsAgnosticMemoryManager : public MemoryManager {
GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override;
GraphicsAllocation *allocateShareableMemory(const AllocationData &allocationData) override;
GraphicsAllocation *allocateGraphicsMemoryForImageImpl(const AllocationData &allocationData, std::unique_ptr<Gmm> gmm) override;
GraphicsAllocation *allocateGraphicsMemoryWithGpuVa(const AllocationData &allocationData) override;
void *lockResourceImpl(GraphicsAllocation &graphicsAllocation) override { return graphicsAllocation.getUnderlyingBuffer(); }
void unlockResourceImpl(GraphicsAllocation &graphicsAllocation) override {}

View File

@@ -15,6 +15,7 @@
#include "opencl/test/unit_test/mocks/mock_execution_environment.h"
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
#include "opencl/test/unit_test/mocks/mock_memory_manager.h"
#include "opencl/test/unit_test/mocks/mock_os_context.h"
#include "test.h"
#include "gtest/gtest.h"
@@ -899,6 +900,32 @@ TEST(MemoryManagerTest, givenDebugContextSaveAreaTypeWhenGetAllocationDataIsCall
EXPECT_TRUE(allocData.flags.useSystemMemory);
}
TEST(MemoryManagerTest, givenPropertiesWithOsContextWhenGetAllocationDataIsCalledThenOsContextIsSet) {
AllocationData allocData;
MockMemoryManager mockMemoryManager;
AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::DEBUG_CONTEXT_SAVE_AREA};
MockOsContext osContext(0u, 1,
HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0],
PreemptionMode::Disabled, false, false, false);
properties.osContext = &osContext;
mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties));
EXPECT_EQ(&osContext, allocData.osContext);
}
TEST(MemoryManagerTest, givenPropertiesWithGpuAddressWhenGetAllocationDataIsCalledThenGpuAddressIsSet) {
AllocationData allocData;
MockMemoryManager mockMemoryManager;
AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::DEBUG_CONTEXT_SAVE_AREA};
properties.gpuAddress = 0x4000;
mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties));
EXPECT_EQ(properties.gpuAddress, allocData.gpuAddress);
}
using MemoryManagerGetAlloctionDataHaveToBeForcedTo48BitTest = testing::TestWithParam<std::tuple<GraphicsAllocation::AllocationType, bool>>;
TEST_P(MemoryManagerGetAlloctionDataHaveToBeForcedTo48BitTest, givenAllocationTypesHaveToBeForcedTo48BitThenAllocationDataResource48BitIsSet) {

View File

@@ -1292,6 +1292,39 @@ TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerAndFreeMemoryDisabledW
EXPECT_FALSE(mockManager->freeMemoryCalled);
}
TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenGpuAddressIsReservedAndFreedThenAddressFromGfxPartitionIsUsed) {
MockExecutionEnvironment executionEnvironment;
OsAgnosticMemoryManager memoryManager(executionEnvironment);
auto addressRange = memoryManager.reserveGpuAddress(MemoryConstants::pageSize, 0);
EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_STANDARD), GmmHelper::decanonize(addressRange.address));
EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_STANDARD), GmmHelper::decanonize(addressRange.address));
memoryManager.freeGpuAddress(addressRange, 0);
}
TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenGpuAddressIsSetThenAllocationWithSpecifiedGpuAddressInSystemMemoryIsCreated) {
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
auto memoryManager = new OsAgnosticMemoryManager(executionEnvironment);
std::unique_ptr<CommandStreamReceiver> csr(createCommandStream(executionEnvironment, 0u));
executionEnvironment.memoryManager.reset(memoryManager);
auto osContext = memoryManager->createAndRegisterOsContext(csr.get(),
HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0],
1, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo),
false, false, false);
MockAllocationProperties properties = {0, MemoryConstants::pageSize};
properties.osContext = osContext;
properties.gpuAddress = 0x2000;
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool());
EXPECT_EQ(0x2000u, allocation->getGpuAddress());
memoryManager->freeGraphicsMemory(allocation);
}
TEST(MemoryManager, givenSharedResourceCopyWhenAllocatingGraphicsMemoryThenAllocateGraphicsMemoryForImageIsCalled) {
ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment();
MockMemoryManager memoryManager(false, true, *executionEnvironment);

View File

@@ -114,6 +114,120 @@ TEST_F(DrmMemoryManagerTest, GivenGraphicsAllocationWhenAddAndRemoveAllocationTo
EXPECT_EQ(fragment, nullptr);
}
TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGpuAddressIsReservedAndFreedThenAddressFromGfxPartitionIsUsed) {
auto memoryManager = std::make_unique<TestedDrmMemoryManager>(false, true, false, *executionEnvironment);
auto addressRange = memoryManager->reserveGpuAddress(MemoryConstants::pageSize, 0);
EXPECT_LE(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_STANDARD), GmmHelper::decanonize(addressRange.address));
EXPECT_GT(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_STANDARD), GmmHelper::decanonize(addressRange.address));
memoryManager->freeGpuAddress(addressRange, 0);
}
TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenSmallSizeAndGpuAddressSetWhenGraphicsMemoryIsAllocatedThenAllocationWithSpecifiedGpuAddressInSystemMemoryIsCreated) {
auto memoryManager = std::make_unique<TestedDrmMemoryManager>(false, true, false, *executionEnvironment);
auto osContext = device->getDefaultEngine().osContext;
MockAllocationProperties properties = {rootDeviceIndex, MemoryConstants::pageSize};
properties.gpuAddress = 0x2000;
properties.osContext = osContext;
mock->reset();
mock->ioctl_expected.gemUserptr = 1;
mock->ioctl_expected.execbuffer2 = 0; //pinBB not called
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool());
EXPECT_EQ(0x2000u, allocation->getGpuAddress());
mock->testIoctls();
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(DrmMemoryManagerTest, givenInjectedFailuresWhenGraphicsMemoryWithGpuVaIsAllocatedThenNullptrIsReturned) {
mock->ioctl_expected.total = -1; //don't care
auto memoryManager = std::make_unique<TestedDrmMemoryManager>(false, true, false, *executionEnvironment);
auto osContext = device->getDefaultEngine().osContext;
MockAllocationProperties properties = {rootDeviceIndex, MemoryConstants::pageSize};
properties.gpuAddress = 0x2000;
properties.osContext = osContext;
InjectedFunction method = [&](size_t failureIndex) {
auto ptr = memoryManager->allocateGraphicsMemoryWithProperties(properties);
if (MemoryManagement::nonfailingAllocation != failureIndex) {
EXPECT_EQ(nullptr, ptr);
} else {
EXPECT_NE(nullptr, ptr);
memoryManager->freeGraphicsMemory(ptr);
}
};
injectFailures(method);
}
TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenSizeExceedingThresholdAndGpuAddressSetWhenGraphicsMemoryIsAllocatedThenAllocationWithSpecifiedGpuAddressInSystemMemoryIsCreated) {
auto memoryManager = std::make_unique<TestedDrmMemoryManager>(false, true, false, *executionEnvironment);
auto osContext = device->getDefaultEngine().osContext;
MockAllocationProperties properties = {rootDeviceIndex, memoryManager->pinThreshold + MemoryConstants::pageSize};
properties.gpuAddress = 0x2000;
properties.osContext = osContext;
mock->reset();
mock->ioctl_expected.gemUserptr = 1;
mock->ioctl_expected.execbuffer2 = 1; //pinBB called
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool());
EXPECT_EQ(0x2000u, allocation->getGpuAddress());
mock->testIoctls();
memoryManager->freeGraphicsMemory(allocation);
memoryManager->injectPinBB(nullptr, rootDeviceIndex); // pinBB not available
mock->reset();
mock->ioctl_expected.gemUserptr = 1;
mock->ioctl_expected.execbuffer2 = 0; //pinBB not called
properties.gpuAddress = 0x5000;
allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool());
EXPECT_EQ(0x5000u, allocation->getGpuAddress());
mock->testIoctls();
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDisabledForcePinAndSizeExceedingThresholdAndGpuAddressSetWhenGraphicsMemoryIsAllocatedThenBufferIsNotPinned) {
auto memoryManager = std::make_unique<TestedDrmMemoryManager>(false, false, false, *executionEnvironment);
auto osContext = device->getDefaultEngine().osContext;
MockAllocationProperties properties = {rootDeviceIndex, memoryManager->pinThreshold + MemoryConstants::pageSize};
properties.gpuAddress = 0x2000;
properties.osContext = osContext;
mock->reset();
mock->ioctl_expected.gemUserptr = 1;
mock->ioctl_expected.execbuffer2 = 0; //pinBB not called
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool());
EXPECT_EQ(0x2000u, allocation->getGpuAddress());
mock->testIoctls();
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenforcePinAllowedWhenMemoryManagerIsCreatedThenPinBbIsCreated) {
auto memoryManager = std::make_unique<TestedDrmMemoryManager>(false, true, false, *executionEnvironment);
EXPECT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]);

View File

@@ -21,6 +21,7 @@ class MockWddmMemoryManager : public MemoryManagerCreate<WddmMemoryManager> {
using BaseClass::allocateGraphicsMemory64kb;
using BaseClass::allocateGraphicsMemoryForNonSvmHostPtr;
using BaseClass::allocateGraphicsMemoryInDevicePool;
using BaseClass::allocateGraphicsMemoryWithGpuVa;
using BaseClass::allocateGraphicsMemoryWithProperties;
using BaseClass::allocateShareableMemory;
using BaseClass::createGraphicsAllocation;

View File

@@ -406,6 +406,24 @@ TEST_F(WddmMemoryManagerSimpleTest, givenNonZeroFenceValueOnSomeOfMultipleEngine
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmMemoryManagerSimpleTest, givenWddmMemoryManagerWhenGpuAddressIsReservedAndFreedThenAddressRangeIsZero) {
auto addressRange = memoryManager->reserveGpuAddress(MemoryConstants::pageSize, 0);
EXPECT_EQ(0u, GmmHelper::decanonize(addressRange.address));
EXPECT_EQ(0u, addressRange.size);
memoryManager->freeGpuAddress(addressRange, 0);
}
TEST_F(WddmMemoryManagerSimpleTest, givenWddmMemoryManagerWhenAllocatingWithGpuVaThenNullptrIsReturned) {
AllocationData allocationData;
allocationData.size = 0x1000;
allocationData.gpuAddress = 0x2000;
allocationData.osContext = osContext;
auto allocation = memoryManager->allocateGraphicsMemoryWithGpuVa(allocationData);
EXPECT_EQ(nullptr, allocation);
}
TEST_F(WddmMemoryManagerTest, givenDefaultWddmMemoryManagerWhenAskedForVirtualPaddingSupportThenFalseIsReturned) {
EXPECT_FALSE(memoryManager->peekVirtualPaddingSupport());
}

View File

@@ -35,6 +35,8 @@ struct AllocationProperties {
ImageInfo *imgInfo = nullptr;
bool multiStorageResource = false;
DeviceBitfield subDevicesBitfield{};
uint64_t gpuAddress = 0;
OsContext *osContext = nullptr;
AllocationProperties(uint32_t rootDeviceIndex, size_t size,
GraphicsAllocation::AllocationType allocationType)
@@ -102,10 +104,12 @@ struct AllocationData {
static_assert(sizeof(AllocationData::flags) == sizeof(AllocationData::allFlags), "");
GraphicsAllocation::AllocationType type = GraphicsAllocation::AllocationType::UNKNOWN;
const void *hostPtr = nullptr;
uint64_t gpuAddress = 0;
size_t size = 0;
size_t alignment = 0;
StorageInfo storageInfo = {};
ImageInfo *imgInfo = nullptr;
uint32_t rootDeviceIndex = 0;
OsContext *osContext = nullptr;
};
} // namespace NEO

View File

@@ -351,6 +351,8 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo
allocationData.hostPtr = nullptr;
}
allocationData.gpuAddress = properties.gpuAddress;
allocationData.osContext = properties.osContext;
allocationData.rootDeviceIndex = properties.rootDeviceIndex;
auto hwInfo = executionEnvironment.rootDeviceEnvironments[properties.rootDeviceIndex]->getHardwareInfo();
@@ -406,6 +408,9 @@ GraphicsAllocation *MemoryManager::allocateGraphicsMemory(const AllocationData &
if (allocationData.hostPtr) {
return allocateGraphicsMemoryWithHostPtr(allocationData);
}
if (allocationData.gpuAddress) {
return allocateGraphicsMemoryWithGpuVa(allocationData);
}
if (peek64kbPagesEnabled(allocationData.rootDeviceIndex) && allocationData.flags.allow64kbPages) {
return allocateGraphicsMemory64kb(allocationData);
}

View File

@@ -41,6 +41,11 @@ struct AlignedMallocRestrictions {
uintptr_t minAddress;
};
struct AddressRange {
uint64_t address;
size_t size;
};
constexpr size_t paddingBufferSize = 2 * MemoryConstants::megaByte;
class MemoryManager {
@@ -162,6 +167,8 @@ class MemoryManager {
virtual void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex){};
void *getReservedMemory(size_t size, size_t alignment);
GfxPartition *getGfxPartition(uint32_t rootDeviceIndex) { return gfxPartitions.at(rootDeviceIndex).get(); }
virtual AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) { return AddressRange{0, 0}; };
virtual void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) { return; };
static HeapIndex selectInternalHeap(bool useLocalMemory) { return useLocalMemory ? HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY : HeapIndex::HEAP_INTERNAL; }
static HeapIndex selectExternalHeap(bool useLocalMemory) { return useLocalMemory ? HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY : HeapIndex::HEAP_EXTERNAL; }
@@ -190,6 +197,8 @@ class MemoryManager {
virtual GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) = 0;
virtual GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData, bool useLocalMemory) = 0;
virtual GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) = 0;
virtual GraphicsAllocation *allocateGraphicsMemoryWithGpuVa(const AllocationData &allocationData) { return nullptr; }
GraphicsAllocation *allocateGraphicsMemoryForImageFromHostPtr(const AllocationData &allocationData);
MOCKABLE_VIRTUAL GraphicsAllocation *allocateGraphicsMemoryForImage(const AllocationData &allocationData);
virtual GraphicsAllocation *allocateGraphicsMemoryForImageImpl(const AllocationData &allocationData, std::unique_ptr<Gmm> gmm) = 0;

View File

@@ -254,6 +254,35 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithHostPtr(const Allocat
return res;
}
GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryWithGpuVa(const AllocationData &allocationData) {
auto osContextLinux = static_cast<OsContextLinux *>(allocationData.osContext);
const size_t minAlignment = MemoryConstants::allocationAlignment;
size_t alignedSize = alignUp(allocationData.size, minAlignment);
auto res = alignedMallocWrapper(alignedSize, minAlignment);
if (!res)
return nullptr;
BufferObject *bo = allocUserptr(reinterpret_cast<uintptr_t>(res), alignedSize, 0, allocationData.rootDeviceIndex);
if (!bo) {
alignedFreeWrapper(res);
return nullptr;
}
UNRECOVERABLE_IF(allocationData.gpuAddress == 0);
bo->gpuAddress = allocationData.gpuAddress;
if (forcePinEnabled && pinBBs.at(allocationData.rootDeviceIndex) != nullptr && alignedSize >= this->pinThreshold) {
pinBBs.at(allocationData.rootDeviceIndex)->pin(&bo, 1, osContextLinux->getContextId());
}
auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo, res, bo->gpuAddress, alignedSize, MemoryPool::System4KBPages);
allocation->setDriverAllocatedCpuPtr(res);
return allocation;
}
DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) {
if (allocationData.size == 0 || !allocationData.hostPtr)
return nullptr;
@@ -762,4 +791,13 @@ uint32_t DrmMemoryManager::getRootDeviceIndex(const Drm *drm) {
return CommonConstants::unspecifiedDeviceIndex;
}
AddressRange DrmMemoryManager::reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) {
auto gpuVa = acquireGpuRange(size, false, rootDeviceIndex, false);
return AddressRange{gpuVa, size};
}
void DrmMemoryManager::freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) {
releaseGpuRange(reinterpret_cast<void *>(addressRange.address), addressRange.size, rootDeviceIndex);
}
} // namespace NEO

View File

@@ -55,6 +55,8 @@ class DrmMemoryManager : public MemoryManager {
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) override;
int obtainFdFromHandle(int boHandle, uint32_t rootDeviceindex);
AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override;
void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override;
protected:
BufferObject *findAndReferenceSharedBufferObject(int boHandle);
@@ -75,6 +77,7 @@ class DrmMemoryManager : public MemoryManager {
DrmAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override;
GraphicsAllocation *allocateShareableMemory(const AllocationData &allocationData) override;
GraphicsAllocation *allocateGraphicsMemoryForImageImpl(const AllocationData &allocationData, std::unique_ptr<Gmm> gmm) override;
GraphicsAllocation *allocateGraphicsMemoryWithGpuVa(const AllocationData &allocationData) override;
void *lockResourceImpl(GraphicsAllocation &graphicsAllocation) override;
void *lockResourceInLocalMemoryImpl(GraphicsAllocation &graphicsAllocation);