fix: apply 2MB alignment to large local memory image allocations

In this patch, we align up the allocation size to 2MB for all
image allocations >= 2MB located in local memory.
2MB alignment support is defined by function:
`is2MBLocalMemAlignmentEnabled`

Related-To: NEO-12287

Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2025-01-31 11:20:34 +00:00
committed by Compute-Runtime-Automation
parent 67136d14e7
commit 7e9c358a73
3 changed files with 74 additions and 10 deletions

View File

@@ -1984,7 +1984,7 @@ GraphicsAllocation *DrmMemoryManager::allocatePhysicalLocalDeviceMemory(const Al
auto allocation = this->makeDrmAllocation(allocationData, std::move(gmm), 0u, sizeAligned);
auto *drmAllocation = static_cast<DrmAllocation *>(allocation.get());
if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), 0u, maxOsContextCount)) {
if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), 0u, maxOsContextCount, MemoryConstants::pageSize64k)) {
for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) {
delete allocation->getGmm(handleId);
}
@@ -2060,13 +2060,22 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
std::unique_ptr<Gmm> gmm;
size_t sizeAligned = 0;
size_t finalAlignment = MemoryConstants::pageSize64k;
auto gmmHelper = getGmmHelper(allocationData.rootDeviceIndex);
auto &productHelper = gmmHelper->getRootDeviceEnvironment().getHelper<ProductHelper>();
if (allocationData.type == AllocationType::image) {
allocationData.imgInfo->useLocalMemory = true;
gmm = std::make_unique<Gmm>(gmmHelper, *allocationData.imgInfo,
allocationData.storageInfo, allocationData.flags.preferCompressed);
sizeAligned = alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize64k);
if (productHelper.is2MBLocalMemAlignmentEnabled() &&
allocationData.imgInfo->size >= MemoryConstants::pageSize2M) {
finalAlignment = MemoryConstants::pageSize2M;
}
sizeAligned = alignUp(allocationData.imgInfo->size, finalAlignment);
} else {
if (allocationData.type == AllocationType::writeCombined) {
sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte;
@@ -2074,15 +2083,16 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k);
}
auto &productHelper = gmmHelper->getRootDeviceEnvironment().getHelper<ProductHelper>();
if (productHelper.is2MBLocalMemAlignmentEnabled() &&
allocationData.size >= MemoryConstants::pageSize2M) {
sizeAligned = alignUp(sizeAligned, MemoryConstants::pageSize2M);
finalAlignment = MemoryConstants::pageSize2M;
}
if (debugManager.flags.ExperimentalAlignLocalMemorySizeTo2MB.get()) {
sizeAligned = alignUp(sizeAligned, MemoryConstants::pageSize2M);
finalAlignment = MemoryConstants::pageSize2M;
}
sizeAligned = alignUp(sizeAligned, finalAlignment);
gmm = this->makeGmmIfSingleHandle(allocationData, sizeAligned);
}
@@ -2100,7 +2110,7 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
auto *drmAllocation = static_cast<DrmAllocation *>(allocation.get());
auto *graphicsAllocation = static_cast<GraphicsAllocation *>(allocation.get());
if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress, maxOsContextCount)) {
if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress, maxOsContextCount, finalAlignment)) {
for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) {
delete allocation->getGmm(handleId);
}
@@ -2262,7 +2272,7 @@ bool DrmMemoryManager::createDrmChunkedAllocation(Drm *drm, DrmAllocation *alloc
return true;
}
bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount) {
bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount, size_t preferredAlignment) {
BufferObjects bos{};
auto &storageInfo = allocation->storageInfo;
auto boAddress = gpuAddress;
@@ -2311,7 +2321,7 @@ bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation,
}
}
auto gmm = allocation->getGmm(handleId);
auto boSize = alignUp(gmm->gmmResourceInfo->getSizeAllocation(), MemoryConstants::pageSize64k);
auto boSize = alignUp(gmm->gmmResourceInfo->getSizeAllocation(), preferredAlignment);
bos[handleId] = createBufferObjectInMemoryRegion(allocation->getRootDeviceIndex(), gmm, allocation->getAllocationType(), boAddress, boSize, memoryBanks, maxOsContextCount, pairHandle,
!allocation->isAllocatedInLocalMemoryPool(), allocation->isUsmHostAllocation());
if (nullptr == bos[handleId]) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -179,7 +179,7 @@ class DrmMemoryManager : public MemoryManager {
void cleanupBeforeReturn(const AllocationData &allocationData, GfxPartition *gfxPartition, DrmAllocation *drmAllocation, GraphicsAllocation *graphicsAllocation, uint64_t &gpuAddress, size_t &sizeAllocated);
GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) override;
bool createDrmChunkedAllocation(Drm *drm, DrmAllocation *allocation, uint64_t boAddress, size_t boSize, size_t maxOsContextCount);
bool createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount);
bool createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount, size_t preferredAlignment);
void registerAllocationInOs(GraphicsAllocation *allocation) override;
void waitOnCompletionFence(GraphicsAllocation *allocation);
bool allocationTypeForCompletionFence(AllocationType allocationType);