fix: apply 2MB alignment to large local memory image allocations

In this patch, we align up the allocation size to 2MB for all
image allocations >= 2MB located in local memory.
2MB alignment support is defined by function:
`is2MBLocalMemAlignmentEnabled`

Related-To: NEO-12287

Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2025-01-31 11:20:34 +00:00
committed by Compute-Runtime-Automation
parent 67136d14e7
commit 7e9c358a73
3 changed files with 74 additions and 10 deletions

View File

@@ -1984,7 +1984,7 @@ GraphicsAllocation *DrmMemoryManager::allocatePhysicalLocalDeviceMemory(const Al
auto allocation = this->makeDrmAllocation(allocationData, std::move(gmm), 0u, sizeAligned);
auto *drmAllocation = static_cast<DrmAllocation *>(allocation.get());
if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), 0u, maxOsContextCount)) {
if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), 0u, maxOsContextCount, MemoryConstants::pageSize64k)) {
for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) {
delete allocation->getGmm(handleId);
}
@@ -2060,13 +2060,22 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
std::unique_ptr<Gmm> gmm;
size_t sizeAligned = 0;
size_t finalAlignment = MemoryConstants::pageSize64k;
auto gmmHelper = getGmmHelper(allocationData.rootDeviceIndex);
auto &productHelper = gmmHelper->getRootDeviceEnvironment().getHelper<ProductHelper>();
if (allocationData.type == AllocationType::image) {
allocationData.imgInfo->useLocalMemory = true;
gmm = std::make_unique<Gmm>(gmmHelper, *allocationData.imgInfo,
allocationData.storageInfo, allocationData.flags.preferCompressed);
sizeAligned = alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize64k);
if (productHelper.is2MBLocalMemAlignmentEnabled() &&
allocationData.imgInfo->size >= MemoryConstants::pageSize2M) {
finalAlignment = MemoryConstants::pageSize2M;
}
sizeAligned = alignUp(allocationData.imgInfo->size, finalAlignment);
} else {
if (allocationData.type == AllocationType::writeCombined) {
sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte;
@@ -2074,15 +2083,16 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k);
}
auto &productHelper = gmmHelper->getRootDeviceEnvironment().getHelper<ProductHelper>();
if (productHelper.is2MBLocalMemAlignmentEnabled() &&
allocationData.size >= MemoryConstants::pageSize2M) {
sizeAligned = alignUp(sizeAligned, MemoryConstants::pageSize2M);
finalAlignment = MemoryConstants::pageSize2M;
}
if (debugManager.flags.ExperimentalAlignLocalMemorySizeTo2MB.get()) {
sizeAligned = alignUp(sizeAligned, MemoryConstants::pageSize2M);
finalAlignment = MemoryConstants::pageSize2M;
}
sizeAligned = alignUp(sizeAligned, finalAlignment);
gmm = this->makeGmmIfSingleHandle(allocationData, sizeAligned);
}
@@ -2100,7 +2110,7 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
auto *drmAllocation = static_cast<DrmAllocation *>(allocation.get());
auto *graphicsAllocation = static_cast<GraphicsAllocation *>(allocation.get());
if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress, maxOsContextCount)) {
if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress, maxOsContextCount, finalAlignment)) {
for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) {
delete allocation->getGmm(handleId);
}
@@ -2262,7 +2272,7 @@ bool DrmMemoryManager::createDrmChunkedAllocation(Drm *drm, DrmAllocation *alloc
return true;
}
bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount) {
bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount, size_t preferredAlignment) {
BufferObjects bos{};
auto &storageInfo = allocation->storageInfo;
auto boAddress = gpuAddress;
@@ -2311,7 +2321,7 @@ bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation,
}
}
auto gmm = allocation->getGmm(handleId);
auto boSize = alignUp(gmm->gmmResourceInfo->getSizeAllocation(), MemoryConstants::pageSize64k);
auto boSize = alignUp(gmm->gmmResourceInfo->getSizeAllocation(), preferredAlignment);
bos[handleId] = createBufferObjectInMemoryRegion(allocation->getRootDeviceIndex(), gmm, allocation->getAllocationType(), boAddress, boSize, memoryBanks, maxOsContextCount, pairHandle,
!allocation->isAllocatedInLocalMemoryPool(), allocation->isUsmHostAllocation());
if (nullptr == bos[handleId]) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -179,7 +179,7 @@ class DrmMemoryManager : public MemoryManager {
void cleanupBeforeReturn(const AllocationData &allocationData, GfxPartition *gfxPartition, DrmAllocation *drmAllocation, GraphicsAllocation *graphicsAllocation, uint64_t &gpuAddress, size_t &sizeAllocated);
GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) override;
bool createDrmChunkedAllocation(Drm *drm, DrmAllocation *allocation, uint64_t boAddress, size_t boSize, size_t maxOsContextCount);
bool createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount);
bool createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount, size_t preferredAlignment);
void registerAllocationInOs(GraphicsAllocation *allocation) override;
void waitOnCompletionFence(GraphicsAllocation *allocation);
bool allocationTypeForCompletionFence(AllocationType allocationType);

View File

@@ -7696,6 +7696,60 @@ TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDebugVar
memoryManager->freeGraphicsMemory(allocation2);
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenEnabled2MBSizeAlignmentWhenAllocatingLargeImageAllocationThenUseProperAlignment) {
auto mockProductHelper = new MockProductHelper;
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->productHelper.reset(mockProductHelper);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
ASSERT_TRUE(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->productHelper->is2MBLocalMemAlignmentEnabled());
ImageDescriptor imgDesc = {};
imgDesc.imageType = ImageType::image1D;
imgDesc.imageWidth = 2 * MemoryConstants::megaByte + 1;
auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr);
MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success;
AllocationData allocData;
allocData.allFlags = 0;
allocData.size = MemoryConstants::pageSize;
allocData.type = AllocationType::image;
allocData.flags.resource48Bit = true;
allocData.imgInfo = &imgInfo;
allocData.rootDeviceIndex = rootDeviceIndex;
auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status);
EXPECT_NE(nullptr, allocation);
EXPECT_EQ(MemoryManager::AllocationStatus::Success, status);
EXPECT_TRUE(allocData.imgInfo->useLocalMemory);
EXPECT_EQ(MemoryPool::localMemory, allocation->getMemoryPool());
auto gmm = allocation->getDefaultGmm();
EXPECT_NE(nullptr, gmm);
EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly);
auto gpuAddress = allocation->getGpuAddress();
auto sizeAligned = alignUp(allocData.imgInfo->size, MemoryConstants::pageSize2M);
EXPECT_NE(0u, gpuAddress);
auto gmmHelper = device->getGmmHelper();
EXPECT_LT(gmmHelper->canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::heapStandard2MB)), gpuAddress);
EXPECT_GT(gmmHelper->canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::heapStandard2MB)), gpuAddress);
EXPECT_EQ(0u, allocation->getGpuBaseAddress());
EXPECT_EQ(sizeAligned, allocation->getUnderlyingBufferSize());
EXPECT_EQ(gpuAddress, reinterpret_cast<uint64_t>(allocation->getReservedAddressPtr()));
EXPECT_EQ(sizeAligned, allocation->getReservedAddressSize());
auto drmAllocation = static_cast<DrmAllocation *>(allocation);
auto bo = drmAllocation->getBO();
EXPECT_NE(nullptr, bo);
EXPECT_EQ(gpuAddress, bo->peekAddress());
EXPECT_EQ(sizeAligned, bo->peekSize());
memoryManager->freeGraphicsMemory(allocation);
}
struct DrmMemoryManagerToTestCopyMemoryToAllocationBanks : public DrmMemoryManager {
DrmMemoryManagerToTestCopyMemoryToAllocationBanks(ExecutionEnvironment &executionEnvironment, size_t lockableLocalMemorySize)
: DrmMemoryManager(GemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) {