performance: align local memory Tag allocations to 2MB

Align local memory allocations of tag types to 2MB when
2MB alignment is enabled via the product helper
(is2MBLocalMemAlignmentEnabled flag).

Refactored the allocateGraphicsMemoryInDevicePool function to improve
readability and maintainability. Simplified the logic for
determining base size and final alignment by reducing redundant code.

Related-To: NEO-12287
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2025-04-08 13:22:21 +00:00
committed by Compute-Runtime-Automation
parent c7fe7dde90
commit 363aa8cdbd
4 changed files with 63 additions and 24 deletions

View File

@@ -253,6 +253,12 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation>, NEO::NonCopyableAn
type == AllocationType::globalSurface;
}
static bool is2MBPageAllocationType(AllocationType type) {
return type == AllocationType::timestampPacketTagBuffer ||
type == AllocationType::gpuTimestampDeviceBuffer ||
type == AllocationType::profilingTagBuffer;
}
static uint32_t getNumHandlesForKmdSharedAllocation(uint32_t numBanks);
void *getReservedAddressPtr() const {

View File

@@ -2121,40 +2121,35 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo();
std::unique_ptr<Gmm> gmm;
size_t sizeAligned = 0;
size_t finalAlignment = MemoryConstants::pageSize64k;
auto gmmHelper = getGmmHelper(allocationData.rootDeviceIndex);
auto &productHelper = gmmHelper->getRootDeviceEnvironment().getHelper<ProductHelper>();
size_t baseSize = 0u;
if (allocationData.type == AllocationType::image) {
allocationData.imgInfo->useLocalMemory = true;
gmm = std::make_unique<Gmm>(gmmHelper, *allocationData.imgInfo,
allocationData.storageInfo, allocationData.flags.preferCompressed);
if (productHelper.is2MBLocalMemAlignmentEnabled() &&
allocationData.imgInfo->size >= MemoryConstants::pageSize2M) {
finalAlignment = MemoryConstants::pageSize2M;
}
sizeAligned = alignUp(allocationData.imgInfo->size, finalAlignment);
baseSize = allocationData.imgInfo->size;
} else if (allocationData.type == AllocationType::writeCombined) {
baseSize = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte;
} else {
if (allocationData.type == AllocationType::writeCombined) {
sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte;
} else {
sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k);
}
baseSize = allocationData.size;
}
if (productHelper.is2MBLocalMemAlignmentEnabled() &&
allocationData.size >= MemoryConstants::pageSize2M) {
size_t finalAlignment = MemoryConstants::pageSize64k;
if (debugManager.flags.ExperimentalAlignLocalMemorySizeTo2MB.get()) {
finalAlignment = MemoryConstants::pageSize2M;
} else if (productHelper.is2MBLocalMemAlignmentEnabled()) {
if (baseSize >= MemoryConstants::pageSize2M ||
GraphicsAllocation::is2MBPageAllocationType(allocationData.type)) {
finalAlignment = MemoryConstants::pageSize2M;
}
}
if (debugManager.flags.ExperimentalAlignLocalMemorySizeTo2MB.get()) {
finalAlignment = MemoryConstants::pageSize2M;
}
size_t sizeAligned = alignUp(baseSize, finalAlignment);
sizeAligned = alignUp(sizeAligned, finalAlignment);
if (allocationData.type != AllocationType::image) {
gmm = this->makeGmmIfSingleHandle(allocationData, sizeAligned);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -1993,10 +1993,10 @@ TEST_F(DrmMemoryManagerLocalMemoryPrelimTest, givenSupportedTypeWhenAllocatingIn
const bool prefer57bitAddressing = memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::heapExtended) > 0 && !allocData.flags.resource48Bit;
auto heap = HeapIndex::heapStandard64KB;
if (prefer2MBAlignment) {
heap = HeapIndex::heapStandard2MB;
} else if (prefer57bitAddressing) {
if (prefer57bitAddressing) {
heap = HeapIndex::heapExtended;
} else if (prefer2MBAlignment) {
heap = HeapIndex::heapStandard2MB;
}
EXPECT_LT(gmmHelper->canonize(memoryManager->getGfxPartition(0)->getHeapBase(heap)), gpuAddress);

View File

@@ -7517,6 +7517,44 @@ TEST_F(DrmMemoryManagerLocalMemoryAlignmentTest, givenEnabled2MBSizeAlignmentWhe
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(DrmMemoryManagerLocalMemoryAlignmentTest, Given2MBLocalMemAlignmentEnabledWhenAllocating2MBPageTypeInDevicePoolThenAllocationIs2MBAligned) {
auto mockProductHelper = new MockProductHelper;
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->productHelper.reset(mockProductHelper);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
ASSERT_TRUE(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->productHelper->is2MBLocalMemAlignmentEnabled());
for (uint32_t i = 0; i < static_cast<uint32_t>(AllocationType::count); i++) {
auto allocType = static_cast<AllocationType>(i);
if (!GraphicsAllocation::is2MBPageAllocationType(allocType)) {
continue;
}
const auto requestedSize = MemoryConstants::pageSize;
const auto expectedAlignedSize = alignUp(requestedSize, MemoryConstants::pageSize2M);
AllocationData allocationData;
allocationData.allFlags = 0;
allocationData.flags.allocateMemory = true;
allocationData.rootDeviceIndex = rootDeviceIndex;
allocationData.type = allocType;
allocationData.flags.resource48Bit = true;
allocationData.size = requestedSize;
MemoryManager::AllocationStatus allocationStatus;
auto memoryManager = createMemoryManager();
auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus);
ASSERT_NE(nullptr, allocation);
EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus);
EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::heapStandard2MB));
EXPECT_EQ(expectedAlignedSize, allocation->getUnderlyingBufferSize());
EXPECT_EQ(expectedAlignedSize, allocation->getReservedAddressSize());
EXPECT_TRUE(isAligned<MemoryConstants::pageSize2M>(allocation->getGpuAddress()));
memoryManager->freeGraphicsMemory(allocation);
}
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenNotSetUseSystemMemoryWhenGraphicsAllocationInDevicePoolIsAllocatedForBufferThenLocalMemoryAllocationIsReturnedFromStandard64KbHeap) {
MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success;
AllocationData allocData;