From 5a5c20f99cecffb3c4ab32d3331f8748136056d5 Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Thu, 27 Apr 2023 17:43:49 +0200 Subject: [PATCH] fix: create separate heap for host and shared usm in 48-56b VA Related-To: NEO-7665 Signed-off-by: Mateusz Jablonski --- .../debug_settings/debug_variables_base.inl | 4 +- .../source/memory_manager/gfx_partition.cpp | 8 ++-- shared/source/memory_manager/gfx_partition.h | 1 + .../os_interface/linux/drm_memory_manager.cpp | 12 +++--- shared/test/common/mocks/mock_gfx_partition.h | 3 ++ shared/test/common/test_files/igdrcl.config | 4 +- .../memory_manager/gfx_partition_tests.cpp | 4 +- .../linux/drm_memory_manager_tests.cpp | 42 +++++++++++++------ 8 files changed, 49 insertions(+), 29 deletions(-) diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 6ab268a7de..709b138111 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -491,8 +491,8 @@ DECLARE_DEBUG_VARIABLE(bool, EnableDebuggerMmapMemoryAccess, false, "Mmap used t DECLARE_DEBUG_VARIABLE(bool, ForceDefaultGrfCompilationMode, false, "Adds build option -cl-intel-128-GRF-per-thread to force kernel compilation in Default-GRF mode") DECLARE_DEBUG_VARIABLE(bool, ForceLargeGrfCompilationMode, false, "Adds build option -cl-intel-256-GRF-per-thread to force kernel compilation in Large-GRF mode") DECLARE_DEBUG_VARIABLE(bool, EnableConcurrentSharedCrossP2PDeviceAccess, false, "Enables the concurrent use between host and peer devices of shared-allocations ") -DECLARE_DEBUG_VARIABLE(bool, AllocateSharedAllocationsInHeapExtended, false, "When enabled driver can allocate shared unified memory allocation in heap extended. (0 - disable, 1 - enable)") -DECLARE_DEBUG_VARIABLE(bool, AllocateHostAllocationsInHeapExtended, true, "When enabled driver can allocate host unified memory allocation in heap extended. (0 - disable, 1 - enable)") +DECLARE_DEBUG_VARIABLE(bool, AllocateSharedAllocationsInHeapExtendedHost, false, "When enabled driver can allocate shared unified memory allocation in heap extended host. (0 - disable, 1 - enable)") +DECLARE_DEBUG_VARIABLE(bool, AllocateHostAllocationsInHeapExtendedHost, true, "When enabled driver can allocate host unified memory allocation in heap extended host. (0 - disable, 1 - enable)") DECLARE_DEBUG_VARIABLE(int32_t, ForceAutoGrfCompilationMode, -1, "Adds build option -*-intel-enable-auto-large-GRF-mode to force kernel compilation") DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version") DECLARE_DEBUG_VARIABLE(int32_t, ForceOCL21FeaturesSupport, -1, "-1: default, 0: disable, 1:enable. Force support of OpenCL 2.0 and OpenCL 2.1 API features") diff --git a/shared/source/memory_manager/gfx_partition.cpp b/shared/source/memory_manager/gfx_partition.cpp index f1ecd8a040..d3e8be3464 100644 --- a/shared/source/memory_manager/gfx_partition.cpp +++ b/shared/source/memory_manager/gfx_partition.cpp @@ -90,7 +90,7 @@ static void reserveHigh48BitRangeWithMemoryMapsParse(OSMemory *osMemory, OSMemor static void reserve57BitRangeWithMemoryMapsParse(OSMemory *osMemory, OSMemory::ReservedCpuAddressRange &reservedCpuAddressRange, uint64_t reservationSize) { constexpr uint64_t areaBase = maxNBitValue(48) + 1; - constexpr uint64_t areaTop = maxNBitValue(57); + constexpr uint64_t areaTop = maxNBitValue(56); reserveRangeWithMemoryMapsParse(osMemory, reservedCpuAddressRange, areaBase, areaTop, reservationSize); } @@ -343,7 +343,6 @@ bool GfxPartition::initAdditionalRange(uint32_t cpuVirtualAddressSize, uint64_t return false; } - bool isExtendedHeapInitialized = false; if (cpuVirtualAddressSize == 57 && CpuInfo::getInstance().isCpuFlagPresent("la57")) { // Always reserve 48 bit window on 57 bit CPU if (reservedCpuAddressRangeForHeapSvm.alignedPtr == nullptr) { @@ -370,8 +369,7 @@ bool GfxPartition::initAdditionalRange(uint32_t cpuVirtualAddressSize, uint64_t uint64_t heapExtendedSize = MemoryConstants::teraByte; reserve57BitRangeWithMemoryMapsParse(osMemory.get(), reservedCpuAddressRangeForHeapExtended, heapExtendedSize); if (reservedCpuAddressRangeForHeapExtended.alignedPtr) { - heapInit(HeapIndex::HEAP_EXTENDED, castToUint64(reservedCpuAddressRangeForHeapExtended.alignedPtr), heapExtendedSize); - isExtendedHeapInitialized = true; + heapInit(HeapIndex::HEAP_EXTENDED_HOST, castToUint64(reservedCpuAddressRangeForHeapExtended.alignedPtr), heapExtendedSize); } } } else { @@ -382,7 +380,7 @@ bool GfxPartition::initAdditionalRange(uint32_t cpuVirtualAddressSize, uint64_t } // Init HEAP_EXTENDED only for 57 bit GPU - if (gpuAddressSpace == maxNBitValue(57) && !isExtendedHeapInitialized) { + if (gpuAddressSpace == maxNBitValue(57)) { // Split HEAP_EXTENDED among root devices (like HEAP_STANDARD64K) auto heapExtendedSize = alignDown((maxNBitValue(48) + 1) / numRootDevices, GfxPartition::heapGranularity); heapInit(HeapIndex::HEAP_EXTENDED, maxNBitValue(57 - 1) + 1 + rootDeviceIndex * heapExtendedSize, heapExtendedSize); diff --git a/shared/source/memory_manager/gfx_partition.h b/shared/source/memory_manager/gfx_partition.h index e6d0fcb1b6..386415097d 100644 --- a/shared/source/memory_manager/gfx_partition.h +++ b/shared/source/memory_manager/gfx_partition.h @@ -28,6 +28,7 @@ enum class HeapIndex : uint32_t { HEAP_EXTERNAL_DEVICE_FRONT_WINDOW, HEAP_INTERNAL_FRONT_WINDOW, HEAP_INTERNAL_DEVICE_FRONT_WINDOW, + HEAP_EXTENDED_HOST, // Please put new heap indexes above this line TOTAL_HEAPS diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index be354a3ae1..c45b17fa6f 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -1977,10 +1977,10 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData & auto totalSizeToAlloc = alignedSize + alignment; uint64_t preferredAddress = 0; auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex); - auto canAllocateInHeapExtended = DebugManager.flags.AllocateHostAllocationsInHeapExtended.get(); - if (canAllocateInHeapExtended && allocationData.flags.isUSMHostAllocation && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0u) { + auto canAllocateInHeapExtended = DebugManager.flags.AllocateHostAllocationsInHeapExtendedHost.get(); + if (canAllocateInHeapExtended && allocationData.flags.isUSMHostAllocation && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST) > 0u) { - preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED); + preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED_HOST); } auto cpuPointer = this->mmapFunction(reinterpret_cast(preferredAddress), totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); @@ -2130,9 +2130,9 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const uint64_t preferredAddress = 0; auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex); - auto canAllocateInHeapExtended = DebugManager.flags.AllocateSharedAllocationsInHeapExtended.get(); - if (canAllocateInHeapExtended && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0u && !allocationData.flags.resource48Bit) { - preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED); + auto canAllocateInHeapExtended = DebugManager.flags.AllocateSharedAllocationsInHeapExtendedHost.get(); + if (canAllocateInHeapExtended && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST) > 0u && !allocationData.flags.resource48Bit) { + preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED_HOST); } auto cpuPointer = this->mmapFunction(reinterpret_cast(preferredAddress), totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); diff --git a/shared/test/common/mocks/mock_gfx_partition.h b/shared/test/common/mocks/mock_gfx_partition.h index 83871798aa..938a7ad7a5 100644 --- a/shared/test/common/mocks/mock_gfx_partition.h +++ b/shared/test/common/mocks/mock_gfx_partition.h @@ -66,6 +66,9 @@ class MockGfxPartition : public GfxPartition { GfxPartition::freeGpuAddressRange(gpuAddress, size); } } + void initHeap(HeapIndex heapIndex, uint64_t base, uint64_t size, size_t allocationAlignment) { + getHeap(heapIndex).init(base, size, allocationAlignment); + } uint32_t freeGpuAddressRangeCalled = 0u; bool callBasefreeGpuAddressRange = false; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index d66a9b4df5..015e01d50b 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -513,8 +513,8 @@ ExperimentalCopyThroughLockWaitlistSizeThreshold= -1 ForceDummyBlitWa = -1 DetectIndirectAccessInKernel = -1 OptimizeIoqBarriersHandling = -1 -AllocateSharedAllocationsInHeapExtended = 0 -AllocateHostAllocationsInHeapExtended = 1 +AllocateSharedAllocationsInHeapExtendedHost = 0 +AllocateHostAllocationsInHeapExtendedHost = 1 DirectSubmissionControllerMaxTimeout = -1 ExitOnSubmissionNumber = -1 ExitOnSubmissionMode = 0 diff --git a/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp b/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp index b253f69b4c..158289112d 100644 --- a/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp +++ b/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp @@ -869,8 +869,8 @@ TEST(GfxPartitionTest, givenGpuAddressSpaceIs57BitAndSeveralRootDevicesThenHeapE auto heapExtendedSize = MemoryConstants::teraByte; - EXPECT_EQ(heapExtendedSize, gfxPartition.getHeapSize(HeapIndex::HEAP_EXTENDED)); - EXPECT_LT(maxNBitValue(48), gfxPartition.getHeapBase(HeapIndex::HEAP_EXTENDED)); + EXPECT_EQ(heapExtendedSize, gfxPartition.getHeapSize(HeapIndex::HEAP_EXTENDED_HOST)); + EXPECT_LT(maxNBitValue(48), gfxPartition.getHeapBase(HeapIndex::HEAP_EXTENDED_HOST)); } { diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index c3526da174..be7ef5588f 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -6520,6 +6520,9 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingHostUSMT if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) { GTEST_SKIP(); } + auto mockGfxPartition = std::make_unique(); + mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k); + memoryManager->overrideGfxPartition(mockGfxPartition.release()); VariableBackup backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true); VariableBackup backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true); SysCalls::mmapCapturedExtendedPointers.clear(); @@ -6537,8 +6540,8 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingHostUSMT auto gpuAddress = reinterpret_cast(SysCalls::mmapCapturedExtendedPointers[0]); SysCalls::mmapCapturedExtendedPointers.clear(); auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex); - EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); - EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); + EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress)); + EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress)); EXPECT_EQ(hostUSM->getGpuAddress(), gpuAddress); EXPECT_EQ(hostUSM->getReservedAddressPtr(), reinterpret_cast(gpuAddress)); @@ -6549,6 +6552,9 @@ TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingHostU if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) { GTEST_SKIP(); } + auto mockGfxPartition = std::make_unique(); + mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k); + memoryManager->overrideGfxPartition(mockGfxPartition.release()); VariableBackup backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true); VariableBackup backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, false); SysCalls::mmapCapturedExtendedPointers.clear(); @@ -6566,8 +6572,8 @@ TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingHostU auto gpuAddress = reinterpret_cast(SysCalls::mmapCapturedExtendedPointers[0]); SysCalls::mmapCapturedExtendedPointers.clear(); auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex); - EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); - EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); + EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress)); + EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress)); EXPECT_NE(hostUSM->getGpuAddress(), gpuAddress); memoryManager->freeGraphicsMemory(hostUSM); @@ -6578,8 +6584,11 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuAndDisabledHeapExtende GTEST_SKIP(); } + auto mockGfxPartition = std::make_unique(); + mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k); + memoryManager->overrideGfxPartition(mockGfxPartition.release()); DebugManagerStateRestore restorer; - DebugManager.flags.AllocateHostAllocationsInHeapExtended.set(false); + DebugManager.flags.AllocateHostAllocationsInHeapExtendedHost.set(false); VariableBackup backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true); VariableBackup backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true); SysCalls::mmapCapturedExtendedPointers.clear(); @@ -6602,8 +6611,11 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingSharedUS GTEST_SKIP(); } + auto mockGfxPartition = std::make_unique(); + mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k); + memoryManager->overrideGfxPartition(mockGfxPartition.release()); DebugManagerStateRestore restorer; - DebugManager.flags.AllocateSharedAllocationsInHeapExtended.set(true); + DebugManager.flags.AllocateSharedAllocationsInHeapExtendedHost.set(true); VariableBackup backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true); VariableBackup backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true); SysCalls::mmapCapturedExtendedPointers.clear(); @@ -6627,8 +6639,8 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingSharedUS auto gpuAddress = reinterpret_cast(SysCalls::mmapCapturedExtendedPointers[0]); SysCalls::mmapCapturedExtendedPointers.clear(); auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex); - EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); - EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); + EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress)); + EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress)); EXPECT_EQ(sharedUSM->getGpuAddress(), gpuAddress); EXPECT_EQ(sharedUSM->getReservedAddressPtr(), reinterpret_cast(gpuAddress)); @@ -6639,8 +6651,11 @@ TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingShare if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) { GTEST_SKIP(); } + auto mockGfxPartition = std::make_unique(); + mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k); + memoryManager->overrideGfxPartition(mockGfxPartition.release()); DebugManagerStateRestore restorer; - DebugManager.flags.AllocateSharedAllocationsInHeapExtended.set(true); + DebugManager.flags.AllocateSharedAllocationsInHeapExtendedHost.set(true); VariableBackup backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true); VariableBackup backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, false); SysCalls::mmapCapturedExtendedPointers.clear(); @@ -6664,8 +6679,8 @@ TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingShare auto gpuAddress = reinterpret_cast(SysCalls::mmapCapturedExtendedPointers[0]); SysCalls::mmapCapturedExtendedPointers.clear(); auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex); - EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); - EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); + EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress)); + EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED_HOST), gmmHelper->decanonize(gpuAddress)); EXPECT_NE(sharedUSM->getGpuAddress(), gpuAddress); memoryManager->freeGraphicsMemory(sharedUSM); @@ -6675,8 +6690,11 @@ TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocating48bResou if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) { GTEST_SKIP(); } + auto mockGfxPartition = std::make_unique(); + mockGfxPartition->initHeap(HeapIndex::HEAP_EXTENDED_HOST, maxNBitValue(48) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k); + memoryManager->overrideGfxPartition(mockGfxPartition.release()); DebugManagerStateRestore restorer; - DebugManager.flags.AllocateSharedAllocationsInHeapExtended.set(true); + DebugManager.flags.AllocateSharedAllocationsInHeapExtendedHost.set(true); VariableBackup backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true); VariableBackup backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true); SysCalls::mmapCapturedExtendedPointers.clear();