feature: heapAssigner per root device

- create heapAssigner per root device in memory manager to allow per
device config

Related-To: NEO-7063

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe 2023-10-19 14:24:35 +00:00 committed by Compute-Runtime-Automation
parent 18e45b2b42
commit 97faeae16f
16 changed files with 44 additions and 40 deletions

View File

@ -23,23 +23,17 @@ using AlocationHelperTests = Test<DeviceFixture>;
using Platforms = IsAtMostProduct<IGFX_TIGERLAKE_LP>;
HWTEST2_F(AlocationHelperTests, givenLinearStreamTypeWhenUseExternalAllocatorForSshAndDshDisabledThenUse32BitIsFalse, Platforms) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(false);
HeapAssigner heapAssigner = {};
HeapAssigner heapAssigner{false};
EXPECT_FALSE(heapAssigner.use32BitHeap(AllocationType::LINEAR_STREAM));
}
HWTEST2_F(AlocationHelperTests, givenLinearStreamTypeWhenUseExternalAllocatorForSshAndDshEnabledThenUse32BitIsTrue, Platforms) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true);
HeapAssigner heapAssigner = {};
HeapAssigner heapAssigner{true};
EXPECT_TRUE(heapAssigner.use32BitHeap(AllocationType::LINEAR_STREAM));
}
HWTEST2_F(AlocationHelperTests, givenLinearStreamTypeWhenUseIternalAllocatorThenUseHeapExternal, Platforms) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true);
HeapAssigner heapAssigner = {};
HeapAssigner heapAssigner{true};
auto heapIndex = heapAssigner.get32BitHeapIndex(AllocationType::LINEAR_STREAM, true, *defaultHwInfo.get(), false);
EXPECT_EQ(heapIndex, NEO::HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY);
}
@ -52,6 +46,7 @@ TEST_F(AlocationHelperTests, givenLinearStreamAllocationWhenSelectingHeapWithUse
allocation.set32BitAllocation(false);
EXPECT_EQ(MemoryManager::selectExternalHeap(allocation.isAllocatedInLocalMemoryPool()), mockMemoryManager->selectHeap(&allocation, false, false, false));
EXPECT_TRUE(mockMemoryManager->heapAssigners[0]->apiAllowExternalHeapForSshAndDsh);
}
TEST_F(AlocationHelperTests, givenExternalHeapIndexWhenMapingToExternalFrontWindowThenEternalFrontWindowReturned) {

View File

@ -17,9 +17,7 @@ namespace NEO {
using AlocationHelperTests = Test<ClDeviceFixture>;
HWTEST_F(AlocationHelperTests, givenLinearStreamTypeWhenUseExternalAllocatorForSshAndDshDisabledThenUse32BitIsFalse) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true);
HeapAssigner heapAssigner = {};
HeapAssigner heapAssigner{false};
EXPECT_FALSE(heapAssigner.use32BitHeap(AllocationType::LINEAR_STREAM));
}

View File

@ -352,6 +352,15 @@ TEST(ClMemoryManagerTest, givenForcedLinearImages3DImageAndProperDescriptorValue
alignedFree(hostPtr);
}
TEST(ClOsAgnosticMemoryManager, givenUseExternalAllocatorForSshAndDshWhenMemoryManagerCreatedThenExternalHeapIsNotallowed) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true);
MockExecutionEnvironment executionEnvironment{};
MockMemoryManager memoryManager(false, false, executionEnvironment);
EXPECT_FALSE(memoryManager.heapAssigners[0]->apiAllowExternalHeapForSshAndDsh);
}
using ClMemoryManagerMultiRootDeviceTests = MultiRootDeviceFixture;
TEST_F(ClMemoryManagerMultiRootDeviceTests, WhenAllocatingGlobalSurfaceThenItHasCorrectRootDeviceIndex) {

View File

@ -7,15 +7,14 @@
#include "shared/source/helpers/heap_assigner.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/memory_manager/gfx_partition.h"
#include "shared/source/memory_manager/memory_manager.h"
namespace NEO {
HeapAssigner::HeapAssigner() {
apiAllowExternalHeapForSshAndDsh = ApiSpecificConfig::getGlobalBindlessHeapConfiguration();
HeapAssigner::HeapAssigner(bool allowExternalHeap) {
apiAllowExternalHeapForSshAndDsh = allowExternalHeap;
}
bool HeapAssigner::useInternal32BitHeap(AllocationType allocType) {
return GraphicsAllocation::isIsaAllocationType(allocType) ||

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -14,7 +14,7 @@ enum class HeapIndex : uint32_t;
struct HardwareInfo;
struct HeapAssigner {
HeapAssigner();
HeapAssigner(bool allowExternalHeap);
~HeapAssigner() = default;
bool useExternal32BitHeap(AllocationType allocType);
bool useInternal32BitHeap(AllocationType allocType);

View File

@ -69,6 +69,9 @@ MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : execu
anyLocalMemorySupported |= this->localMemorySupported[rootDeviceIndex];
isLocalMemoryUsedForIsa(rootDeviceIndex);
auto globalHeap = ApiSpecificConfig::getGlobalBindlessHeapConfiguration();
heapAssigners.push_back(std::make_unique<HeapAssigner>(globalHeap));
}
if (anyLocalMemorySupported) {
@ -638,7 +641,7 @@ GraphicsAllocation *MemoryManager::allocateGraphicsMemory(const AllocationData &
}
return allocation;
}
bool use32Allocator = heapAssigner.use32BitHeap(allocationData.type);
bool use32Allocator = heapAssigners[allocationData.rootDeviceIndex]->use32BitHeap(allocationData.type);
bool isAllocationOnLimitedGPU = isLimitedGPUOnType(allocationData.rootDeviceIndex, allocationData.type);
if (use32Allocator || isAllocationOnLimitedGPU ||
@ -758,10 +761,10 @@ void MemoryManager::unlockResource(GraphicsAllocation *graphicsAllocation) {
HeapIndex MemoryManager::selectHeap(const GraphicsAllocation *allocation, bool hasPointer, bool isFullRangeSVM, bool useFrontWindow) {
if (allocation) {
if (heapAssigner.useInternal32BitHeap(allocation->getAllocationType())) {
if (heapAssigners[allocation->getRootDeviceIndex()]->useInternal32BitHeap(allocation->getAllocationType())) {
return useFrontWindow ? HeapAssigner::mapInternalWindowIndex(selectInternalHeap(allocation->isAllocatedInLocalMemoryPool())) : selectInternalHeap(allocation->isAllocatedInLocalMemoryPool());
}
if (allocation->is32BitAllocation() || heapAssigner.useExternal32BitHeap(allocation->getAllocationType())) {
if (allocation->is32BitAllocation() || heapAssigners[allocation->getRootDeviceIndex()]->useExternal32BitHeap(allocation->getAllocationType())) {
return useFrontWindow ? HeapAssigner::mapExternalWindowIndex(selectExternalHeap(allocation->isAllocatedInLocalMemoryPool()))
: selectExternalHeap(allocation->isAllocatedInLocalMemoryPool());
}

View File

@ -345,7 +345,7 @@ class MemoryManager {
std::unique_ptr<PageFaultManager> pageFaultManager;
std::unique_ptr<PrefetchManager> prefetchManager;
OSMemory::ReservedCpuAddressRange reservedCpuAddressRange;
HeapAssigner heapAssigner;
std::vector<std::unique_ptr<HeapAssigner>> heapAssigners;
AlignmentSelector alignmentSelector = {};
std::unique_ptr<std::once_flag[]> checkIsaPlacementOnceFlags;
std::vector<bool> isaInLocalMemory;

View File

@ -46,7 +46,7 @@ void OsAgnosticMemoryManager::initialize(bool aubUsage) {
this->enable64kbpages[rootDeviceIndex] = is64kbPagesEnabled(hwInfo);
this->localMemorySupported.push_back(gfxCoreHelper.getEnableLocalMemory(*hwInfo));
auto gpuAddressSpace = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->capabilityTable.gpuAddressSpace;
if (!getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, reservedCpuAddressRangeSize, rootDeviceIndex, gfxPartitions.size(), heapAssigner.apiAllowExternalHeapForSshAndDsh, OsAgnosticMemoryManager::getSystemSharedMemory(rootDeviceIndex))) {
if (!getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, reservedCpuAddressRangeSize, rootDeviceIndex, gfxPartitions.size(), heapAssigners[rootDeviceIndex]->apiAllowExternalHeapForSshAndDsh, OsAgnosticMemoryManager::getSystemSharedMemory(rootDeviceIndex))) {
initialized = false;
return;
}
@ -186,7 +186,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemory64kb(const Al
GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) {
auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo();
auto heap = heapAssigner.get32BitHeapIndex(allocationData.type, false, *hwInfo, allocationData.flags.use32BitFrontWindow);
auto heap = heapAssigners[allocationData.rootDeviceIndex]->get32BitHeapIndex(allocationData.type, false, *hwInfo, allocationData.flags.use32BitFrontWindow);
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
auto gmmHelper = getGmmHelper(allocationData.rootDeviceIndex);
@ -588,7 +588,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool(
if (allocationData.flags.useSystemMemory || (allocationData.flags.allow32Bit && this->force32bitAllocations)) {
return nullptr;
}
bool use32Allocator = heapAssigner.use32BitHeap(allocationData.type);
bool use32Allocator = heapAssigners[allocationData.rootDeviceIndex]->use32BitHeap(allocationData.type);
if (allocationData.type == AllocationType::SVM_GPU) {
auto storage = allocateSystemMemory(allocationData.size, MemoryConstants::pageSize2M);
auto canonizedGpuAddress = gmmHelper->canonize(reinterpret_cast<uint64_t>(allocationData.hostPtr));
@ -626,7 +626,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool(
if (use32Allocator) {
auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo();
heapIndex = heapAssigner.get32BitHeapIndex(allocationData.type, true, *hwInfo, allocationData.flags.use32BitFrontWindow);
heapIndex = heapAssigners[allocationData.rootDeviceIndex]->get32BitHeapIndex(allocationData.type, true, *hwInfo, allocationData.flags.use32BitFrontWindow);
} else if ((gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0) && !allocationData.flags.resource48Bit) {
heapIndex = HeapIndex::HEAP_EXTENDED;
}

View File

@ -17,7 +17,7 @@ constexpr uint64_t dummyAddress = 0xFFFFF000u;
class OsAgnosticMemoryManager : public MemoryManager {
public:
using MemoryManager::allocateGraphicsMemory;
using MemoryManager::heapAssigner;
using MemoryManager::heapAssigners;
OsAgnosticMemoryManager(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(false, executionEnvironment) {}
OsAgnosticMemoryManager(bool aubUsage, ExecutionEnvironment &executionEnvironment);

View File

@ -78,7 +78,7 @@ void DrmMemoryManager::initialize(gemCloseWorkerMode mode) {
for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) {
auto gpuAddressSpace = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->capabilityTable.gpuAddressSpace;
if (!getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, getSizeToReserve(), rootDeviceIndex, gfxPartitions.size(), heapAssigner.apiAllowExternalHeapForSshAndDsh, DrmMemoryManager::getSystemSharedMemory(rootDeviceIndex))) {
if (!getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, getSizeToReserve(), rootDeviceIndex, gfxPartitions.size(), heapAssigners[rootDeviceIndex]->apiAllowExternalHeapForSshAndDsh, DrmMemoryManager::getSystemSharedMemory(rootDeviceIndex))) {
initialized = false;
return;
}
@ -686,7 +686,7 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryForImageImpl(const A
GraphicsAllocation *DrmMemoryManager::allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) {
auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo();
auto allocatorToUse = heapAssigner.get32BitHeapIndex(allocationData.type, false, *hwInfo, allocationData.flags.use32BitFrontWindow);
auto allocatorToUse = heapAssigners[allocationData.rootDeviceIndex]->get32BitHeapIndex(allocationData.type, false, *hwInfo, allocationData.flags.use32BitFrontWindow);
if (allocationData.hostPtr) {
uintptr_t inputPtr = reinterpret_cast<uintptr_t>(allocationData.hostPtr);
@ -1788,7 +1788,7 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
uint64_t gpuAddress = 0lu;
status = getGpuAddress(this->alignmentSelector, this->heapAssigner, *hwInfo, gfxPartition, allocationData, sizeAllocated, gmmHelper, gpuAddress);
status = getGpuAddress(this->alignmentSelector, *this->heapAssigners[allocationData.rootDeviceIndex], *hwInfo, gfxPartition, allocationData, sizeAllocated, gmmHelper, gpuAddress);
if (status == AllocationStatus::Error) {
return nullptr;
}
@ -1834,9 +1834,9 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
auto canonizedGpuAddress = gmmHelper->canonize(gpuAddress);
allocation->setCpuPtrAndGpuAddress(cpuAddress, canonizedGpuAddress);
}
if (heapAssigner.useInternal32BitHeap(allocationData.type)) {
if (heapAssigners[allocationData.rootDeviceIndex]->useInternal32BitHeap(allocationData.type)) {
allocation->setGpuBaseAddress(gmmHelper->canonize(getInternalHeapBaseAddress(allocationData.rootDeviceIndex, true)));
} else if (heapAssigner.useExternal32BitHeap(allocationData.type)) {
} else if (heapAssigners[allocationData.rootDeviceIndex]->useExternal32BitHeap(allocationData.type)) {
allocation->setGpuBaseAddress(gmmHelper->canonize(getExternalHeapBaseAddress(allocationData.rootDeviceIndex, true)));
}

View File

@ -57,7 +57,7 @@ WddmMemoryManager::WddmMemoryManager(ExecutionEnvironment &executionEnvironment)
for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) {
mallocRestrictions.minAddress = std::max(mallocRestrictions.minAddress, getWddm(rootDeviceIndex).getWddmMinAddress());
getWddm(rootDeviceIndex).initGfxPartition(*getGfxPartition(rootDeviceIndex), rootDeviceIndex, gfxPartitions.size(), heapAssigner.apiAllowExternalHeapForSshAndDsh);
getWddm(rootDeviceIndex).initGfxPartition(*getGfxPartition(rootDeviceIndex), rootDeviceIndex, gfxPartitions.size(), heapAssigners[rootDeviceIndex]->apiAllowExternalHeapForSshAndDsh);
}
alignmentSelector.addCandidateAlignment(MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage);
@ -524,7 +524,7 @@ GraphicsAllocation *WddmMemoryManager::allocate32BitGraphicsMemoryImpl(const All
freeSystemMemory(pSysMem);
return nullptr;
}
auto baseAddress = getGfxPartition(allocationData.rootDeviceIndex)->getHeapBase(heapAssigner.get32BitHeapIndex(allocationData.type, false, *hwInfo, allocationData.flags.use32BitFrontWindow));
auto baseAddress = getGfxPartition(allocationData.rootDeviceIndex)->getHeapBase(heapAssigners[allocationData.rootDeviceIndex]->get32BitHeapIndex(allocationData.type, false, *hwInfo, allocationData.flags.use32BitFrontWindow));
UNRECOVERABLE_IF(gmmHelper->canonize(baseAddress) != wddmAllocation->getGpuBaseAddress());
wddmAllocation->setGpuBaseAddress(gmmHelper->canonize(baseAddress));
@ -1364,7 +1364,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const
wddmAllocation->storageInfo = allocationData.storageInfo;
wddmAllocation->setFlushL3Required(allocationData.flags.flushL3);
wddmAllocation->needsMakeResidentBeforeLock = true;
if (heapAssigner.use32BitHeap(allocationData.type)) {
if (heapAssigners[allocationData.rootDeviceIndex]->use32BitHeap(allocationData.type)) {
wddmAllocation->allocInFrontWindowPool = allocationData.flags.use32BitFrontWindow;
}
@ -1375,7 +1375,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const
auto &wddm = getWddm(allocationData.rootDeviceIndex);
if (!heapAssigner.use32BitHeap(allocationData.type)) {
if (!heapAssigners[allocationData.rootDeviceIndex]->use32BitHeap(allocationData.type)) {
adjustGpuPtrToHostAddressSpace(*wddmAllocation.get(), requiredGpuVa);
}

View File

@ -80,7 +80,7 @@ class TestedDrmMemoryManager : public MemoryManagerCreate<DrmMemoryManager> {
using DrmMemoryManager::waitOnCompletionFence;
using MemoryManager::allocateGraphicsMemoryInDevicePool;
using MemoryManager::allRegisteredEngines;
using MemoryManager::heapAssigner;
using MemoryManager::heapAssigners;
TestedDrmMemoryManager(ExecutionEnvironment &executionEnvironment);
TestedDrmMemoryManager(bool enableLocalMemory,

View File

@ -16,7 +16,7 @@ namespace NEO {
class AlocationHelperTests : public Test<DeviceFixture> {
public:
HeapAssigner heapAssigner = {};
HeapAssigner heapAssigner{false};
};
HWTEST_F(AlocationHelperTests, givenKernelIsaTypeWhenUse32BitHeapCalledThenTrueReturned) {

View File

@ -31,7 +31,7 @@ TEST_F(FrontWindowAllocatorTests, givenAllocateInFrontWindowPoolFlagWhenAllocate
allocData.flags.use32BitFrontWindow = true;
allocData.size = MemoryConstants::kiloByte;
auto allocation(memManager->allocate32BitGraphicsMemoryImpl(allocData));
auto heap = memManager->heapAssigner.get32BitHeapIndex(allocData.type, false, *defaultHwInfo, true);
auto heap = memManager->heapAssigners[allocData.rootDeviceIndex]->get32BitHeapIndex(allocData.type, false, *defaultHwInfo, true);
auto gmmHelper = memManager->getGmmHelper(allocData.rootDeviceIndex);
EXPECT_EQ(gmmHelper->canonize(memManager->getGfxPartition(0)->getHeapMinimalAddress(heap)), allocation->getGpuAddress());

View File

@ -1927,7 +1927,7 @@ TEST_F(DrmMemoryManagerLocalMemoryPrelimTest, givenSupportedTypeWhenAllocatingIn
EXPECT_LT(gmmHelper->canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_SVM)), gpuAddress);
EXPECT_GT(gmmHelper->canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_SVM)), gpuAddress);
}
} else if (memoryManager->heapAssigner.useInternal32BitHeap(allocation->getAllocationType())) {
} else if (memoryManager->heapAssigners[rootDeviceIndex]->useInternal32BitHeap(allocation->getAllocationType())) {
EXPECT_LT(gmmHelper->canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)), gpuAddress);
EXPECT_GT(gmmHelper->canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)), gpuAddress);
} else {

View File

@ -609,7 +609,7 @@ HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenSupportedTypeWhenAllocatingInDev
EXPECT_LT(gmmHelper->canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_SVM)), gpuAddress);
EXPECT_GT(gmmHelper->canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_SVM)), gpuAddress);
}
} else if (memoryManager->heapAssigner.useInternal32BitHeap(allocation->getAllocationType())) {
} else if (memoryManager->heapAssigners[rootDeviceIndex]->useInternal32BitHeap(allocation->getAllocationType())) {
EXPECT_LT(gmmHelper->canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)), gpuAddress);
EXPECT_GT(gmmHelper->canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)), gpuAddress);
} else {