From 363aa8cdbd8a5186d8cbad375927258648ccd70d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Zwoli=C5=84ski?= Date: Tue, 8 Apr 2025 13:22:21 +0000 Subject: [PATCH] performance: align local memory Tag allocations to 2MB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align local memory allocations of tag types to 2MB when 2MB alignment is enabled via the product helper (is2MBLocalMemAlignmentEnabled flag). Refactored the allocateGraphicsMemoryInDevicePool function to improve readability and maintainability. Simplified the logic for determining base size and final alignment by reducing redundant code. Related-To: NEO-12287 Signed-off-by: Fabian ZwoliƄski --- .../memory_manager/graphics_allocation.h | 6 +++ .../os_interface/linux/drm_memory_manager.cpp | 35 ++++++++--------- ...m_memory_manager_localmem_prelim_tests.cpp | 8 ++-- .../linux/drm_memory_manager_tests.cpp | 38 +++++++++++++++++++ 4 files changed, 63 insertions(+), 24 deletions(-) diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h index 8d928e24ad..c751a08500 100644 --- a/shared/source/memory_manager/graphics_allocation.h +++ b/shared/source/memory_manager/graphics_allocation.h @@ -253,6 +253,12 @@ class GraphicsAllocation : public IDNode, NEO::NonCopyableAn type == AllocationType::globalSurface; } + static bool is2MBPageAllocationType(AllocationType type) { + return type == AllocationType::timestampPacketTagBuffer || + type == AllocationType::gpuTimestampDeviceBuffer || + type == AllocationType::profilingTagBuffer; + } + static uint32_t getNumHandlesForKmdSharedAllocation(uint32_t numBanks); void *getReservedAddressPtr() const { diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 129b489ba4..d40677afa9 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -2121,40 +2121,35 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo(); std::unique_ptr gmm; - size_t sizeAligned = 0; - size_t finalAlignment = MemoryConstants::pageSize64k; auto gmmHelper = getGmmHelper(allocationData.rootDeviceIndex); auto &productHelper = gmmHelper->getRootDeviceEnvironment().getHelper(); + size_t baseSize = 0u; if (allocationData.type == AllocationType::image) { allocationData.imgInfo->useLocalMemory = true; gmm = std::make_unique(gmmHelper, *allocationData.imgInfo, allocationData.storageInfo, allocationData.flags.preferCompressed); - if (productHelper.is2MBLocalMemAlignmentEnabled() && - allocationData.imgInfo->size >= MemoryConstants::pageSize2M) { - finalAlignment = MemoryConstants::pageSize2M; - } - - sizeAligned = alignUp(allocationData.imgInfo->size, finalAlignment); - + baseSize = allocationData.imgInfo->size; + } else if (allocationData.type == AllocationType::writeCombined) { + baseSize = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte; } else { - if (allocationData.type == AllocationType::writeCombined) { - sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte; - } else { - sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k); - } + baseSize = allocationData.size; + } - if (productHelper.is2MBLocalMemAlignmentEnabled() && - allocationData.size >= MemoryConstants::pageSize2M) { + size_t finalAlignment = MemoryConstants::pageSize64k; + if (debugManager.flags.ExperimentalAlignLocalMemorySizeTo2MB.get()) { + finalAlignment = MemoryConstants::pageSize2M; + } else if (productHelper.is2MBLocalMemAlignmentEnabled()) { + if (baseSize >= MemoryConstants::pageSize2M || + GraphicsAllocation::is2MBPageAllocationType(allocationData.type)) { finalAlignment = MemoryConstants::pageSize2M; } + } - if (debugManager.flags.ExperimentalAlignLocalMemorySizeTo2MB.get()) { - finalAlignment = MemoryConstants::pageSize2M; - } + size_t sizeAligned = alignUp(baseSize, finalAlignment); - sizeAligned = alignUp(sizeAligned, finalAlignment); + if (allocationData.type != AllocationType::image) { gmm = this->makeGmmIfSingleHandle(allocationData, sizeAligned); } diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_localmem_prelim_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_localmem_prelim_tests.cpp index 1f3f5256f8..590b378120 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_localmem_prelim_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_localmem_prelim_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1993,10 +1993,10 @@ TEST_F(DrmMemoryManagerLocalMemoryPrelimTest, givenSupportedTypeWhenAllocatingIn const bool prefer57bitAddressing = memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::heapExtended) > 0 && !allocData.flags.resource48Bit; auto heap = HeapIndex::heapStandard64KB; - if (prefer2MBAlignment) { - heap = HeapIndex::heapStandard2MB; - } else if (prefer57bitAddressing) { + if (prefer57bitAddressing) { heap = HeapIndex::heapExtended; + } else if (prefer2MBAlignment) { + heap = HeapIndex::heapStandard2MB; } EXPECT_LT(gmmHelper->canonize(memoryManager->getGfxPartition(0)->getHeapBase(heap)), gpuAddress); diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index f95d6e287d..e2007bda6e 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -7517,6 +7517,44 @@ TEST_F(DrmMemoryManagerLocalMemoryAlignmentTest, givenEnabled2MBSizeAlignmentWhe memoryManager->freeGraphicsMemory(allocation); } +TEST_F(DrmMemoryManagerLocalMemoryAlignmentTest, Given2MBLocalMemAlignmentEnabledWhenAllocating2MBPageTypeInDevicePoolThenAllocationIs2MBAligned) { + auto mockProductHelper = new MockProductHelper; + executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->productHelper.reset(mockProductHelper); + mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true; + + ASSERT_TRUE(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->productHelper->is2MBLocalMemAlignmentEnabled()); + + for (uint32_t i = 0; i < static_cast(AllocationType::count); i++) { + auto allocType = static_cast(i); + + if (!GraphicsAllocation::is2MBPageAllocationType(allocType)) { + continue; + } + + const auto requestedSize = MemoryConstants::pageSize; + const auto expectedAlignedSize = alignUp(requestedSize, MemoryConstants::pageSize2M); + + AllocationData allocationData; + allocationData.allFlags = 0; + allocationData.flags.allocateMemory = true; + allocationData.rootDeviceIndex = rootDeviceIndex; + allocationData.type = allocType; + allocationData.flags.resource48Bit = true; + allocationData.size = requestedSize; + + MemoryManager::AllocationStatus allocationStatus; + auto memoryManager = createMemoryManager(); + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); + ASSERT_NE(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); + EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::heapStandard2MB)); + EXPECT_EQ(expectedAlignedSize, allocation->getUnderlyingBufferSize()); + EXPECT_EQ(expectedAlignedSize, allocation->getReservedAddressSize()); + EXPECT_TRUE(isAligned(allocation->getGpuAddress())); + memoryManager->freeGraphicsMemory(allocation); + } +} + TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenNotSetUseSystemMemoryWhenGraphicsAllocationInDevicePoolIsAllocatedForBufferThenLocalMemoryAllocationIsReturnedFromStandard64KbHeap) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData;