fix: create separate heap for host and shared usm in 48-56b VA

Related-To: NEO-7665
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2023-04-27 17:43:49 +02:00
committed by Compute-Runtime-Automation
parent 1c44e3ab3e
commit 5a5c20f99c
8 changed files with 49 additions and 29 deletions

View File

@@ -491,8 +491,8 @@ DECLARE_DEBUG_VARIABLE(bool, EnableDebuggerMmapMemoryAccess, false, "Mmap used t
DECLARE_DEBUG_VARIABLE(bool, ForceDefaultGrfCompilationMode, false, "Adds build option -cl-intel-128-GRF-per-thread to force kernel compilation in Default-GRF mode")
DECLARE_DEBUG_VARIABLE(bool, ForceLargeGrfCompilationMode, false, "Adds build option -cl-intel-256-GRF-per-thread to force kernel compilation in Large-GRF mode")
DECLARE_DEBUG_VARIABLE(bool, EnableConcurrentSharedCrossP2PDeviceAccess, false, "Enables the concurrent use between host and peer devices of shared-allocations ")
DECLARE_DEBUG_VARIABLE(bool, AllocateSharedAllocationsInHeapExtended, false, "When enabled driver can allocate shared unified memory allocation in heap extended. (0 - disable, 1 - enable)")
DECLARE_DEBUG_VARIABLE(bool, AllocateHostAllocationsInHeapExtended, true, "When enabled driver can allocate host unified memory allocation in heap extended. (0 - disable, 1 - enable)")
DECLARE_DEBUG_VARIABLE(bool, AllocateSharedAllocationsInHeapExtendedHost, false, "When enabled driver can allocate shared unified memory allocation in heap extended host. (0 - disable, 1 - enable)")
DECLARE_DEBUG_VARIABLE(bool, AllocateHostAllocationsInHeapExtendedHost, true, "When enabled driver can allocate host unified memory allocation in heap extended host. (0 - disable, 1 - enable)")
DECLARE_DEBUG_VARIABLE(int32_t, ForceAutoGrfCompilationMode, -1, "Adds build option -*-intel-enable-auto-large-GRF-mode to force kernel compilation")
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version")
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCL21FeaturesSupport, -1, "-1: default, 0: disable, 1:enable. Force support of OpenCL 2.0 and OpenCL 2.1 API features")

View File

@@ -90,7 +90,7 @@ static void reserveHigh48BitRangeWithMemoryMapsParse(OSMemory *osMemory, OSMemor
static void reserve57BitRangeWithMemoryMapsParse(OSMemory *osMemory, OSMemory::ReservedCpuAddressRange &reservedCpuAddressRange, uint64_t reservationSize) {
constexpr uint64_t areaBase = maxNBitValue(48) + 1;
constexpr uint64_t areaTop = maxNBitValue(57);
constexpr uint64_t areaTop = maxNBitValue(56);
reserveRangeWithMemoryMapsParse(osMemory, reservedCpuAddressRange, areaBase, areaTop, reservationSize);
}
@@ -343,7 +343,6 @@ bool GfxPartition::initAdditionalRange(uint32_t cpuVirtualAddressSize, uint64_t
return false;
}
bool isExtendedHeapInitialized = false;
if (cpuVirtualAddressSize == 57 && CpuInfo::getInstance().isCpuFlagPresent("la57")) {
// Always reserve 48 bit window on 57 bit CPU
if (reservedCpuAddressRangeForHeapSvm.alignedPtr == nullptr) {
@@ -370,8 +369,7 @@ bool GfxPartition::initAdditionalRange(uint32_t cpuVirtualAddressSize, uint64_t
uint64_t heapExtendedSize = MemoryConstants::teraByte;
reserve57BitRangeWithMemoryMapsParse(osMemory.get(), reservedCpuAddressRangeForHeapExtended, heapExtendedSize);
if (reservedCpuAddressRangeForHeapExtended.alignedPtr) {
heapInit(HeapIndex::HEAP_EXTENDED, castToUint64(reservedCpuAddressRangeForHeapExtended.alignedPtr), heapExtendedSize);
isExtendedHeapInitialized = true;
heapInit(HeapIndex::HEAP_EXTENDED_HOST, castToUint64(reservedCpuAddressRangeForHeapExtended.alignedPtr), heapExtendedSize);
}
}
} else {
@@ -382,7 +380,7 @@ bool GfxPartition::initAdditionalRange(uint32_t cpuVirtualAddressSize, uint64_t
}
// Init HEAP_EXTENDED only for 57 bit GPU
if (gpuAddressSpace == maxNBitValue(57) && !isExtendedHeapInitialized) {
if (gpuAddressSpace == maxNBitValue(57)) {
// Split HEAP_EXTENDED among root devices (like HEAP_STANDARD64K)
auto heapExtendedSize = alignDown((maxNBitValue(48) + 1) / numRootDevices, GfxPartition::heapGranularity);
heapInit(HeapIndex::HEAP_EXTENDED, maxNBitValue(57 - 1) + 1 + rootDeviceIndex * heapExtendedSize, heapExtendedSize);

View File

@@ -28,6 +28,7 @@ enum class HeapIndex : uint32_t {
HEAP_EXTERNAL_DEVICE_FRONT_WINDOW,
HEAP_INTERNAL_FRONT_WINDOW,
HEAP_INTERNAL_DEVICE_FRONT_WINDOW,
HEAP_EXTENDED_HOST,
// Please put new heap indexes above this line
TOTAL_HEAPS

View File

@@ -1977,10 +1977,10 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
auto totalSizeToAlloc = alignedSize + alignment;
uint64_t preferredAddress = 0;
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
auto canAllocateInHeapExtended = DebugManager.flags.AllocateHostAllocationsInHeapExtended.get();
if (canAllocateInHeapExtended && allocationData.flags.isUSMHostAllocation && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0u) {
auto canAllocateInHeapExtended = DebugManager.flags.AllocateHostAllocationsInHeapExtendedHost.get();
if (canAllocateInHeapExtended && allocationData.flags.isUSMHostAllocation && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST) > 0u) {
preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED);
preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED_HOST);
}
auto cpuPointer = this->mmapFunction(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
@@ -2130,9 +2130,9 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
uint64_t preferredAddress = 0;
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
auto canAllocateInHeapExtended = DebugManager.flags.AllocateSharedAllocationsInHeapExtended.get();
if (canAllocateInHeapExtended && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0u && !allocationData.flags.resource48Bit) {
preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED);
auto canAllocateInHeapExtended = DebugManager.flags.AllocateSharedAllocationsInHeapExtendedHost.get();
if (canAllocateInHeapExtended && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST) > 0u && !allocationData.flags.resource48Bit) {
preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED_HOST);
}
auto cpuPointer = this->mmapFunction(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);

View File

@@ -66,6 +66,9 @@ class MockGfxPartition : public GfxPartition {
GfxPartition::freeGpuAddressRange(gpuAddress, size);
}
}
void initHeap(HeapIndex heapIndex, uint64_t base, uint64_t size, size_t allocationAlignment) {
getHeap(heapIndex).init(base, size, allocationAlignment);
}
uint32_t freeGpuAddressRangeCalled = 0u;
bool callBasefreeGpuAddressRange = false;

View File

@@ -513,8 +513,8 @@ ExperimentalCopyThroughLockWaitlistSizeThreshold= -1
ForceDummyBlitWa = -1
DetectIndirectAccessInKernel = -1
OptimizeIoqBarriersHandling = -1
AllocateSharedAllocationsInHeapExtended = 0
AllocateHostAllocationsInHeapExtended = 1
AllocateSharedAllocationsInHeapExtendedHost = 0
AllocateHostAllocationsInHeapExtendedHost = 1
DirectSubmissionControllerMaxTimeout = -1
ExitOnSubmissionNumber = -1
ExitOnSubmissionMode = 0

View File

@@ -869,8 +869,8 @@ TEST(GfxPartitionTest, givenGpuAddressSpaceIs57BitAndSeveralRootDevicesThenHeapE
auto heapExtendedSize = MemoryConstants::teraByte;
EXPECT_EQ(heapExtendedSize, gfxPartition.getHeapSize(HeapIndex::HEAP_EXTENDED));
EXPECT_LT(maxNBitValue(48), gfxPartition.getHeapBase(HeapIndex::HEAP_EXTENDED));
EXPECT_EQ(heapExtendedSize, gfxPartition.getHeapSize(HeapIndex::HEAP_EXTENDED_HOST));
EXPECT_LT(maxNBitValue(48), gfxPartition.getHeapBase(HeapIndex::HEAP_EXTENDED_HOST));
}
{

View File

@@ -6520,6 +6520,9 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingHostUSMT
if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) {
GTEST_SKIP();
}
auto mockGfxPartition = std::make_unique<MockGfxPartition>();
mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k);
memoryManager->overrideGfxPartition(mockGfxPartition.release());
VariableBackup<bool> backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true);
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true);
SysCalls::mmapCapturedExtendedPointers.clear();
@@ -6537,8 +6540,8 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingHostUSMT
auto gpuAddress = reinterpret_cast<uint64_t>(SysCalls::mmapCapturedExtendedPointers[0]);
SysCalls::mmapCapturedExtendedPointers.clear();
auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex);
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress));
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress));
EXPECT_EQ(hostUSM->getGpuAddress(), gpuAddress);
EXPECT_EQ(hostUSM->getReservedAddressPtr(), reinterpret_cast<void *>(gpuAddress));
@@ -6549,6 +6552,9 @@ TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingHostU
if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) {
GTEST_SKIP();
}
auto mockGfxPartition = std::make_unique<MockGfxPartition>();
mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k);
memoryManager->overrideGfxPartition(mockGfxPartition.release());
VariableBackup<bool> backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true);
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, false);
SysCalls::mmapCapturedExtendedPointers.clear();
@@ -6566,8 +6572,8 @@ TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingHostU
auto gpuAddress = reinterpret_cast<uint64_t>(SysCalls::mmapCapturedExtendedPointers[0]);
SysCalls::mmapCapturedExtendedPointers.clear();
auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex);
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress));
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress));
EXPECT_NE(hostUSM->getGpuAddress(), gpuAddress);
memoryManager->freeGraphicsMemory(hostUSM);
@@ -6578,8 +6584,11 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuAndDisabledHeapExtende
GTEST_SKIP();
}
auto mockGfxPartition = std::make_unique<MockGfxPartition>();
mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k);
memoryManager->overrideGfxPartition(mockGfxPartition.release());
DebugManagerStateRestore restorer;
DebugManager.flags.AllocateHostAllocationsInHeapExtended.set(false);
DebugManager.flags.AllocateHostAllocationsInHeapExtendedHost.set(false);
VariableBackup<bool> backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true);
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true);
SysCalls::mmapCapturedExtendedPointers.clear();
@@ -6602,8 +6611,11 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingSharedUS
GTEST_SKIP();
}
auto mockGfxPartition = std::make_unique<MockGfxPartition>();
mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k);
memoryManager->overrideGfxPartition(mockGfxPartition.release());
DebugManagerStateRestore restorer;
DebugManager.flags.AllocateSharedAllocationsInHeapExtended.set(true);
DebugManager.flags.AllocateSharedAllocationsInHeapExtendedHost.set(true);
VariableBackup<bool> backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true);
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true);
SysCalls::mmapCapturedExtendedPointers.clear();
@@ -6627,8 +6639,8 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingSharedUS
auto gpuAddress = reinterpret_cast<uint64_t>(SysCalls::mmapCapturedExtendedPointers[0]);
SysCalls::mmapCapturedExtendedPointers.clear();
auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex);
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress));
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress));
EXPECT_EQ(sharedUSM->getGpuAddress(), gpuAddress);
EXPECT_EQ(sharedUSM->getReservedAddressPtr(), reinterpret_cast<void *>(gpuAddress));
@@ -6639,8 +6651,11 @@ TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingShare
if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) {
GTEST_SKIP();
}
auto mockGfxPartition = std::make_unique<MockGfxPartition>();
mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k);
memoryManager->overrideGfxPartition(mockGfxPartition.release());
DebugManagerStateRestore restorer;
DebugManager.flags.AllocateSharedAllocationsInHeapExtended.set(true);
DebugManager.flags.AllocateSharedAllocationsInHeapExtendedHost.set(true);
VariableBackup<bool> backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true);
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, false);
SysCalls::mmapCapturedExtendedPointers.clear();
@@ -6664,8 +6679,8 @@ TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingShare
auto gpuAddress = reinterpret_cast<uint64_t>(SysCalls::mmapCapturedExtendedPointers[0]);
SysCalls::mmapCapturedExtendedPointers.clear();
auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex);
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress));
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress));
EXPECT_NE(sharedUSM->getGpuAddress(), gpuAddress);
memoryManager->freeGraphicsMemory(sharedUSM);
@@ -6675,8 +6690,11 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocating48bResou
if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) {
GTEST_SKIP();
}
auto mockGfxPartition = std::make_unique<MockGfxPartition>();
mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k);
memoryManager->overrideGfxPartition(mockGfxPartition.release());
DebugManagerStateRestore restorer;
DebugManager.flags.AllocateSharedAllocationsInHeapExtended.set(true);
DebugManager.flags.AllocateSharedAllocationsInHeapExtendedHost.set(true);
VariableBackup<bool> backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true);
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true);
SysCalls::mmapCapturedExtendedPointers.clear();