fix: Avoid redundant padding in ISA allocations

- Add isaPaddingIncluded flag to AllocationProperties to control ISA
padding
- Modify MemoryManager to skip automatic padding when flag is set
- Update ISAPoolAllocator to manage padding explicitly and set the
flag
- Use actual buffer size for HeapAllocator instead of requested size
- Add test covering different padding scenarios

This prevents double padding addition which was wasting memory when
ISAPoolAllocator managed its own padding on top of MemoryManager's
automatic padding.

Related-To: NEO-12287, HSD-18042276431
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2025-05-30 10:21:30 +00:00
committed by Compute-Runtime-Automation
parent a188c29e8c
commit e2228201ce
5 changed files with 50 additions and 16 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2024 Intel Corporation
* Copyright (C) 2019-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -50,6 +50,7 @@ struct AllocationProperties {
bool makeGPUVaDifferentThanCPUPtr = false;
uint32_t cacheRegion = 0;
bool makeDeviceBufferLockable = false;
bool isaPaddingIncluded = false;
AllocationProperties(uint32_t rootDeviceIndex, size_t size,
AllocationType allocationType, DeviceBitfield subDevicesBitfieldParam)

View File

@@ -668,7 +668,8 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo
}
allocationData.hostPtr = hostPtr;
if (GraphicsAllocation::isKernelIsaAllocationType(properties.allocationType)) {
if (GraphicsAllocation::isKernelIsaAllocationType(properties.allocationType) &&
!properties.isaPaddingIncluded) {
allocationData.size = properties.size + helper.getPaddingForISAAllocation();
} else {
allocationData.size = properties.size;

View File

@@ -16,15 +16,18 @@ namespace NEO {
ISAPool::ISAPool(Device *device, bool isBuiltin, size_t storageSize)
: BaseType(device->getMemoryManager(), nullptr), device(device), isBuiltin(isBuiltin) {
this->chunkAllocator.reset(new NEO::HeapAllocator(params.startingOffset, storageSize, MemoryConstants::pageSize, 0u));
auto allocationType = isBuiltin ? NEO::AllocationType::kernelIsaInternal : NEO::AllocationType::kernelIsa;
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(),
storageSize,
allocationType,
device->getDeviceBitfield()});
AllocationProperties allocProperties = {device->getRootDeviceIndex(),
storageSize,
allocationType,
device->getDeviceBitfield()};
allocProperties.isaPaddingIncluded = true;
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties);
this->chunkAllocator.reset(new NEO::HeapAllocator(params.startingOffset,
graphicsAllocation ? graphicsAllocation->getUnderlyingBufferSize() : 0u,
MemoryConstants::pageSize,
0u));
this->mainStorage.reset(graphicsAllocation);
this->mtx = std::make_unique<std::mutex>();
this->stackVec.push_back(graphicsAllocation);
}
@@ -70,29 +73,29 @@ ISAPoolAllocator::ISAPoolAllocator(Device *device) : device(device) {
*
* @return returns SharedIsaAllocation or nullptr if allocation didn't succeeded
*/
SharedIsaAllocation *ISAPoolAllocator::requestGraphicsAllocationForIsa(bool isBuiltin, size_t size) {
SharedIsaAllocation *ISAPoolAllocator::requestGraphicsAllocationForIsa(bool isBuiltin, size_t sizeWithPadding) {
std::unique_lock lock(allocatorMtx);
auto maxAllocationSize = getAllocationSize(isBuiltin);
if (size > maxAllocationSize) {
addNewBufferPool(ISAPool(device, isBuiltin, size));
if (sizeWithPadding > maxAllocationSize) {
addNewBufferPool(ISAPool(device, isBuiltin, sizeWithPadding));
}
auto sharedIsaAllocation = tryAllocateISA(isBuiltin, size);
auto sharedIsaAllocation = tryAllocateISA(isBuiltin, sizeWithPadding);
if (sharedIsaAllocation) {
return sharedIsaAllocation;
}
drain();
sharedIsaAllocation = tryAllocateISA(isBuiltin, size);
sharedIsaAllocation = tryAllocateISA(isBuiltin, sizeWithPadding);
if (sharedIsaAllocation) {
return sharedIsaAllocation;
}
addNewBufferPool(ISAPool(device, isBuiltin, getAllocationSize(isBuiltin)));
return tryAllocateISA(isBuiltin, size);
return tryAllocateISA(isBuiltin, sizeWithPadding);
}
/**

View File

@@ -44,7 +44,7 @@ class ISAPool : public AbstractBuffersPool<ISAPool, GraphicsAllocation> {
class ISAPoolAllocator : public AbstractBuffersAllocator<ISAPool, GraphicsAllocation> {
public:
ISAPoolAllocator(Device *device);
SharedIsaAllocation *requestGraphicsAllocationForIsa(bool isBuiltin, size_t size);
SharedIsaAllocation *requestGraphicsAllocationForIsa(bool isBuiltin, size_t sizeWithPadding);
void freeSharedIsaAllocation(SharedIsaAllocation *sharedIsaAllocation);
private:

View File

@@ -1588,6 +1588,35 @@ TEST(OsAgnosticMemoryManager, givenForcedSystemMemoryForIsaAndEnabledLocalMemory
memoryManager.freeGraphicsMemory(allocation);
}
TEST(OsAgnosticMemoryManager, givenDifferentIsaPaddingIncludedFlagValuesWhenAllocatingGraphicsMemoryForIsaThenUnderlyingBufferSizeMatchesExpectation) {
DebugManagerStateRestore dbgRestore;
debugManager.flags.ForceSystemMemoryPlacement.set(1 << (static_cast<uint32_t>(AllocationType::kernelIsa) - 1));
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo();
hwInfo->featureTable.flags.ftrLocalMemory = true;
auto &gfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper<GfxCoreHelper>();
const auto isaPadding = gfxCoreHelper.getPaddingForISAAllocation();
MockMemoryManager memoryManager(false, true, executionEnvironment);
size_t kernelIsaSize = 4096;
AllocationProperties allocProperties = {0, kernelIsaSize, AllocationType::kernelIsa, 1};
for (auto isaPaddingIncluded : {false, true}) {
allocProperties.isaPaddingIncluded = isaPaddingIncluded;
auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(allocProperties);
ASSERT_NE(nullptr, allocation);
if (isaPaddingIncluded) {
EXPECT_EQ(kernelIsaSize, allocation->getUnderlyingBufferSize());
} else {
EXPECT_EQ(kernelIsaSize + isaPadding, allocation->getUnderlyingBufferSize());
}
memoryManager.freeGraphicsMemory(allocation);
}
}
class MemoryManagerWithAsyncDeleterTest : public ::testing::Test {
public:
MemoryManagerWithAsyncDeleterTest() : memoryManager(false, false){};