From 7e9c358a734c783def5a70597bb865413b376046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Zwoli=C5=84ski?= Date: Fri, 31 Jan 2025 11:20:34 +0000 Subject: [PATCH] fix: apply 2MB alignment to large local memory image allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this patch, we align up the allocation size to 2MB for all image allocations >= 2MB located in local memory. 2MB alignment support is defined by function: `is2MBLocalMemAlignmentEnabled` Related-To: NEO-12287 Signed-off-by: Fabian ZwoliƄski --- .../os_interface/linux/drm_memory_manager.cpp | 26 ++++++--- .../os_interface/linux/drm_memory_manager.h | 4 +- .../linux/drm_memory_manager_tests.cpp | 54 +++++++++++++++++++ 3 files changed, 74 insertions(+), 10 deletions(-) diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 3bfb79fcf6..8d9e2d5e6a 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -1984,7 +1984,7 @@ GraphicsAllocation *DrmMemoryManager::allocatePhysicalLocalDeviceMemory(const Al auto allocation = this->makeDrmAllocation(allocationData, std::move(gmm), 0u, sizeAligned); auto *drmAllocation = static_cast(allocation.get()); - if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), 0u, maxOsContextCount)) { + if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), 0u, maxOsContextCount, MemoryConstants::pageSize64k)) { for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) { delete allocation->getGmm(handleId); } @@ -2060,13 +2060,22 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A std::unique_ptr gmm; size_t sizeAligned = 0; + size_t finalAlignment = MemoryConstants::pageSize64k; auto gmmHelper = getGmmHelper(allocationData.rootDeviceIndex); + auto &productHelper = gmmHelper->getRootDeviceEnvironment().getHelper(); if (allocationData.type == AllocationType::image) { allocationData.imgInfo->useLocalMemory = true; gmm = std::make_unique(gmmHelper, *allocationData.imgInfo, allocationData.storageInfo, allocationData.flags.preferCompressed); - sizeAligned = alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize64k); + + if (productHelper.is2MBLocalMemAlignmentEnabled() && + allocationData.imgInfo->size >= MemoryConstants::pageSize2M) { + finalAlignment = MemoryConstants::pageSize2M; + } + + sizeAligned = alignUp(allocationData.imgInfo->size, finalAlignment); + } else { if (allocationData.type == AllocationType::writeCombined) { sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte; @@ -2074,15 +2083,16 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k); } - auto &productHelper = gmmHelper->getRootDeviceEnvironment().getHelper(); if (productHelper.is2MBLocalMemAlignmentEnabled() && allocationData.size >= MemoryConstants::pageSize2M) { - sizeAligned = alignUp(sizeAligned, MemoryConstants::pageSize2M); + finalAlignment = MemoryConstants::pageSize2M; } if (debugManager.flags.ExperimentalAlignLocalMemorySizeTo2MB.get()) { - sizeAligned = alignUp(sizeAligned, MemoryConstants::pageSize2M); + finalAlignment = MemoryConstants::pageSize2M; } + + sizeAligned = alignUp(sizeAligned, finalAlignment); gmm = this->makeGmmIfSingleHandle(allocationData, sizeAligned); } @@ -2100,7 +2110,7 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A auto *drmAllocation = static_cast(allocation.get()); auto *graphicsAllocation = static_cast(allocation.get()); - if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress, maxOsContextCount)) { + if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress, maxOsContextCount, finalAlignment)) { for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) { delete allocation->getGmm(handleId); } @@ -2262,7 +2272,7 @@ bool DrmMemoryManager::createDrmChunkedAllocation(Drm *drm, DrmAllocation *alloc return true; } -bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount) { +bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount, size_t preferredAlignment) { BufferObjects bos{}; auto &storageInfo = allocation->storageInfo; auto boAddress = gpuAddress; @@ -2311,7 +2321,7 @@ bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, } } auto gmm = allocation->getGmm(handleId); - auto boSize = alignUp(gmm->gmmResourceInfo->getSizeAllocation(), MemoryConstants::pageSize64k); + auto boSize = alignUp(gmm->gmmResourceInfo->getSizeAllocation(), preferredAlignment); bos[handleId] = createBufferObjectInMemoryRegion(allocation->getRootDeviceIndex(), gmm, allocation->getAllocationType(), boAddress, boSize, memoryBanks, maxOsContextCount, pairHandle, !allocation->isAllocatedInLocalMemoryPool(), allocation->isUsmHostAllocation()); if (nullptr == bos[handleId]) { diff --git a/shared/source/os_interface/linux/drm_memory_manager.h b/shared/source/os_interface/linux/drm_memory_manager.h index 1e84d10338..7cc40a8994 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.h +++ b/shared/source/os_interface/linux/drm_memory_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -179,7 +179,7 @@ class DrmMemoryManager : public MemoryManager { void cleanupBeforeReturn(const AllocationData &allocationData, GfxPartition *gfxPartition, DrmAllocation *drmAllocation, GraphicsAllocation *graphicsAllocation, uint64_t &gpuAddress, size_t &sizeAllocated); GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) override; bool createDrmChunkedAllocation(Drm *drm, DrmAllocation *allocation, uint64_t boAddress, size_t boSize, size_t maxOsContextCount); - bool createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount); + bool createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount, size_t preferredAlignment); void registerAllocationInOs(GraphicsAllocation *allocation) override; void waitOnCompletionFence(GraphicsAllocation *allocation); bool allocationTypeForCompletionFence(AllocationType allocationType); diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index f99fe6630e..03e2002602 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -7696,6 +7696,60 @@ TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDebugVar memoryManager->freeGraphicsMemory(allocation2); } +TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenEnabled2MBSizeAlignmentWhenAllocatingLargeImageAllocationThenUseProperAlignment) { + auto mockProductHelper = new MockProductHelper; + executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->productHelper.reset(mockProductHelper); + mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true; + + ASSERT_TRUE(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->productHelper->is2MBLocalMemAlignmentEnabled()); + + ImageDescriptor imgDesc = {}; + imgDesc.imageType = ImageType::image1D; + imgDesc.imageWidth = 2 * MemoryConstants::megaByte + 1; + auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); + + MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; + AllocationData allocData; + allocData.allFlags = 0; + allocData.size = MemoryConstants::pageSize; + allocData.type = AllocationType::image; + allocData.flags.resource48Bit = true; + allocData.imgInfo = &imgInfo; + allocData.rootDeviceIndex = rootDeviceIndex; + + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); + EXPECT_NE(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); + + EXPECT_TRUE(allocData.imgInfo->useLocalMemory); + EXPECT_EQ(MemoryPool::localMemory, allocation->getMemoryPool()); + + auto gmm = allocation->getDefaultGmm(); + EXPECT_NE(nullptr, gmm); + EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); + + auto gpuAddress = allocation->getGpuAddress(); + auto sizeAligned = alignUp(allocData.imgInfo->size, MemoryConstants::pageSize2M); + + EXPECT_NE(0u, gpuAddress); + + auto gmmHelper = device->getGmmHelper(); + EXPECT_LT(gmmHelper->canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::heapStandard2MB)), gpuAddress); + EXPECT_GT(gmmHelper->canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::heapStandard2MB)), gpuAddress); + EXPECT_EQ(0u, allocation->getGpuBaseAddress()); + EXPECT_EQ(sizeAligned, allocation->getUnderlyingBufferSize()); + EXPECT_EQ(gpuAddress, reinterpret_cast(allocation->getReservedAddressPtr())); + EXPECT_EQ(sizeAligned, allocation->getReservedAddressSize()); + + auto drmAllocation = static_cast(allocation); + auto bo = drmAllocation->getBO(); + EXPECT_NE(nullptr, bo); + EXPECT_EQ(gpuAddress, bo->peekAddress()); + EXPECT_EQ(sizeAligned, bo->peekSize()); + + memoryManager->freeGraphicsMemory(allocation); +} + struct DrmMemoryManagerToTestCopyMemoryToAllocationBanks : public DrmMemoryManager { DrmMemoryManagerToTestCopyMemoryToAllocationBanks(ExecutionEnvironment &executionEnvironment, size_t lockableLocalMemorySize) : DrmMemoryManager(GemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) {