feature: add pooling of USM global/constant surface

Related-To: NEO-12287
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2025-09-19 14:53:48 +00:00
committed by Compute-Runtime-Automation
parent 0b6b0e3954
commit a1c5fa1a13
15 changed files with 635 additions and 42 deletions

View File

@@ -19,6 +19,24 @@ class MockUsmMemAllocPool : public UsmMemAllocPool {
using UsmMemAllocPool::poolMemoryType;
using UsmMemAllocPool::poolSize;
bool initialize(SVMAllocsManager *svmMemoryManager, const UnifiedMemoryProperties &memoryProperties, size_t poolSize, size_t minServicedSize, size_t maxServicedSize) override {
if (callBaseInitialize) {
return UsmMemAllocPool::initialize(svmMemoryManager, memoryProperties, poolSize, minServicedSize, maxServicedSize);
}
return initializeResult;
}
bool callBaseInitialize = true;
bool initializeResult = false;
void *createUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties) override {
if (callBaseCreateUnifiedMemoryAllocation) {
return UsmMemAllocPool::createUnifiedMemoryAllocation(size, memoryProperties);
}
return createUnifiedMemoryAllocationResult;
}
bool callBaseCreateUnifiedMemoryAllocation = true;
void *createUnifiedMemoryAllocationResult = nullptr;
void cleanup() override {
++cleanupCalled;
if (callBaseCleanup) {
@@ -27,6 +45,12 @@ class MockUsmMemAllocPool : public UsmMemAllocPool {
}
uint32_t cleanupCalled = 0u;
bool callBaseCleanup = true;
bool freeSVMAlloc(const void *ptr, bool blocking) override {
++freeSVMAllocCalled;
return UsmMemAllocPool::freeSVMAlloc(ptr, blocking);
};
uint32_t freeSVMAllocCalled = 0u;
};
class MockUsmMemAllocPoolsManager : public UsmMemAllocPoolsManager {

View File

@@ -10,12 +10,15 @@
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/blit_helper.h"
#include "shared/source/helpers/local_memory_access_modes.h"
#include "shared/source/memory_manager/unified_memory_pooling.h"
#include "shared/source/program/program_initialization.h"
#include "shared/test/common/compiler_interface/linker_mock.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_product_helper.h"
#include "shared/test/common/mocks/mock_svm_manager.h"
#include "shared/test/common/mocks/mock_usm_memory_pool.h"
#include "gtest/gtest.h"
@@ -313,3 +316,175 @@ TEST(AllocateGlobalSurfaceTest, whenAllocatingGlobalSurfaceWithZeroInitSizeGreat
device.getMemoryManager()->freeGraphicsMemory(alloc);
}
struct AllocateGlobalSurfaceWithUsmPoolTest : public ::testing::Test {
void SetUp() override {
device.injectMemoryManager(new MockMemoryManager());
device.resetUsmConstantSurfaceAllocPool(new UsmMemAllocPool);
device.resetUsmGlobalSurfaceAllocPool(new UsmMemAllocPool);
mockProductHelper = new MockProductHelper;
device.getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
svmAllocsManager = std::make_unique<MockSVMAllocsManager>(device.getMemoryManager());
}
MockProductHelper *mockProductHelper{nullptr};
std::unique_ptr<MockSVMAllocsManager> svmAllocsManager;
WhiteBox<LinkerInput> linkerInputExportGlobalVariables;
WhiteBox<LinkerInput> linkerInputExportGlobalConstants;
MockDevice device{};
DebugManagerStateRestore restore;
};
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, GivenUsmAllocPoolAnd2MBLocalMemAlignmentDisabledThenGlobalSurfaceAllocationNotTakenFromUsmPool) {
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
linkerInputExportGlobalConstants.traits.exportsGlobalConstants = true;
std::vector<uint8_t> initData;
initData.resize(64, 7U);
std::unique_ptr<SharedPoolAllocation> globalSurface;
globalSurface.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data()));
ASSERT_NE(nullptr, globalSurface);
EXPECT_FALSE(device.getUsmConstantSurfaceAllocPool()->isInPool(reinterpret_cast<void *>(globalSurface->getGpuAddress())));
EXPECT_EQ(globalSurface->getGraphicsAllocation()->getUnderlyingBufferSize(), globalSurface->getSize());
EXPECT_EQ(0u, globalSurface->getOffset());
svmAllocsManager->freeSVMAlloc(reinterpret_cast<void *>(static_cast<uintptr_t>(globalSurface->getGpuAddress())));
globalSurface.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data()));
ASSERT_NE(nullptr, globalSurface);
EXPECT_FALSE(device.getUsmGlobalSurfaceAllocPool()->isInPool(reinterpret_cast<void *>(globalSurface->getGpuAddress())));
EXPECT_EQ(globalSurface->getGraphicsAllocation()->getUnderlyingBufferSize(), globalSurface->getSize());
EXPECT_EQ(0u, globalSurface->getOffset());
svmAllocsManager->freeSVMAlloc(reinterpret_cast<void *>(static_cast<uintptr_t>(globalSurface->getGpuAddress())));
}
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, GivenUsmAllocPoolAnd2MBLocalMemAlignmentEnabledThenGlobalSurfaceAllocationTakenFromUsmPool) {
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
linkerInputExportGlobalConstants.traits.exportsGlobalConstants = true;
std::vector<uint8_t> initData;
initData.resize(64, 7U);
{
std::unique_ptr<SharedPoolAllocation> constantSurface1;
std::unique_ptr<SharedPoolAllocation> constantSurface2;
constantSurface1.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data()));
ASSERT_NE(nullptr, constantSurface1);
EXPECT_TRUE(device.getUsmConstantSurfaceAllocPool()->isInPool(reinterpret_cast<void *>(constantSurface1->getGpuAddress())));
EXPECT_NE(constantSurface1->getGraphicsAllocation()->getUnderlyingBufferSize(), constantSurface1->getSize());
EXPECT_EQ(0, memcmp(constantSurface1->getUnderlyingBuffer(), initData.data(), initData.size()));
EXPECT_TRUE(constantSurface1->getGraphicsAllocation()->isMemObjectsAllocationWithWritableFlags());
EXPECT_EQ(AllocationType::constantSurface, constantSurface1->getGraphicsAllocation()->getAllocationType());
constantSurface2.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data()));
ASSERT_NE(nullptr, constantSurface2);
EXPECT_TRUE(device.getUsmConstantSurfaceAllocPool()->isInPool(reinterpret_cast<void *>(constantSurface2->getGpuAddress())));
EXPECT_NE(constantSurface2->getGraphicsAllocation()->getUnderlyingBufferSize(), constantSurface2->getSize());
EXPECT_EQ(0, memcmp(constantSurface2->getUnderlyingBuffer(), initData.data(), initData.size()));
EXPECT_TRUE(constantSurface2->getGraphicsAllocation()->isMemObjectsAllocationWithWritableFlags());
EXPECT_EQ(AllocationType::constantSurface, constantSurface2->getGraphicsAllocation()->getAllocationType());
EXPECT_EQ(constantSurface1->getGraphicsAllocation(), constantSurface2->getGraphicsAllocation());
EXPECT_EQ(constantSurface1->getSize(), constantSurface2->getSize());
EXPECT_NE(constantSurface1->getGpuAddress(), constantSurface2->getGpuAddress());
EXPECT_NE(constantSurface1->getOffset(), constantSurface2->getOffset());
}
{
std::unique_ptr<SharedPoolAllocation> globalSurface1;
std::unique_ptr<SharedPoolAllocation> globalSurface2;
globalSurface1.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data()));
ASSERT_NE(nullptr, globalSurface1);
EXPECT_TRUE(device.getUsmGlobalSurfaceAllocPool()->isInPool(reinterpret_cast<void *>(globalSurface1->getGpuAddress())));
EXPECT_NE(globalSurface1->getGraphicsAllocation()->getUnderlyingBufferSize(), globalSurface1->getSize());
EXPECT_EQ(0, memcmp(globalSurface1->getUnderlyingBuffer(), initData.data(), initData.size()));
EXPECT_TRUE(globalSurface1->getGraphicsAllocation()->isMemObjectsAllocationWithWritableFlags());
EXPECT_EQ(AllocationType::globalSurface, globalSurface1->getGraphicsAllocation()->getAllocationType());
globalSurface2.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data()));
ASSERT_NE(nullptr, globalSurface2);
EXPECT_TRUE(device.getUsmGlobalSurfaceAllocPool()->isInPool(reinterpret_cast<void *>(globalSurface2->getGpuAddress())));
EXPECT_NE(globalSurface2->getGraphicsAllocation()->getUnderlyingBufferSize(), globalSurface2->getSize());
EXPECT_EQ(0, memcmp(globalSurface2->getUnderlyingBuffer(), initData.data(), initData.size()));
EXPECT_TRUE(globalSurface2->getGraphicsAllocation()->isMemObjectsAllocationWithWritableFlags());
EXPECT_EQ(AllocationType::globalSurface, globalSurface2->getGraphicsAllocation()->getAllocationType());
EXPECT_EQ(globalSurface1->getGraphicsAllocation(), globalSurface2->getGraphicsAllocation());
EXPECT_EQ(globalSurface1->getSize(), globalSurface2->getSize());
EXPECT_NE(globalSurface1->getGpuAddress(), globalSurface2->getGpuAddress());
EXPECT_NE(globalSurface1->getOffset(), globalSurface2->getOffset());
}
}
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, Given2MBLocalMemAlignmentEnabledButUsmPoolInitializeFailsThenDoNotUseUsmPool) {
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
auto usmConstantSurfaceAllocPool = new MockUsmMemAllocPool;
auto usmGlobalSurfaceAllocPool = new MockUsmMemAllocPool;
device.resetUsmConstantSurfaceAllocPool(usmConstantSurfaceAllocPool);
device.resetUsmGlobalSurfaceAllocPool(usmGlobalSurfaceAllocPool);
usmConstantSurfaceAllocPool->callBaseInitialize = false;
usmConstantSurfaceAllocPool->initializeResult = false;
usmGlobalSurfaceAllocPool->callBaseInitialize = false;
usmGlobalSurfaceAllocPool->initializeResult = false;
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
linkerInputExportGlobalConstants.traits.exportsGlobalConstants = true;
std::vector<uint8_t> initData;
initData.resize(64, 7U);
std::unique_ptr<SharedPoolAllocation> globalSurface;
globalSurface.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data()));
ASSERT_NE(nullptr, globalSurface);
EXPECT_FALSE(device.getUsmConstantSurfaceAllocPool()->isInPool(reinterpret_cast<void *>(globalSurface->getGpuAddress())));
svmAllocsManager->freeSVMAlloc(reinterpret_cast<void *>(static_cast<uintptr_t>(globalSurface->getGpuAddress())));
globalSurface.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data()));
ASSERT_NE(nullptr, globalSurface);
EXPECT_FALSE(device.getUsmGlobalSurfaceAllocPool()->isInPool(reinterpret_cast<void *>(globalSurface->getGpuAddress())));
svmAllocsManager->freeSVMAlloc(reinterpret_cast<void *>(static_cast<uintptr_t>(globalSurface->getGpuAddress())));
}
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, Given2MBLocalMemAlignmentEnabledButAllocatingFromUsmPoolFailsThenDoNotUseUsmPool) {
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
auto usmConstantSurfaceAllocPool = new MockUsmMemAllocPool;
auto usmGlobalSurfaceAllocPool = new MockUsmMemAllocPool;
device.resetUsmConstantSurfaceAllocPool(usmConstantSurfaceAllocPool);
device.resetUsmGlobalSurfaceAllocPool(usmGlobalSurfaceAllocPool);
usmConstantSurfaceAllocPool->callBaseCreateUnifiedMemoryAllocation = false;
usmConstantSurfaceAllocPool->createUnifiedMemoryAllocationResult = nullptr;
usmGlobalSurfaceAllocPool->callBaseCreateUnifiedMemoryAllocation = false;
usmGlobalSurfaceAllocPool->createUnifiedMemoryAllocationResult = nullptr;
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
linkerInputExportGlobalConstants.traits.exportsGlobalConstants = true;
std::vector<uint8_t> initData;
initData.resize(64, 7U);
std::unique_ptr<SharedPoolAllocation> globalSurface;
globalSurface.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalConstants, initData.data()));
ASSERT_NE(nullptr, globalSurface);
EXPECT_FALSE(device.getUsmConstantSurfaceAllocPool()->isInPool(reinterpret_cast<void *>(globalSurface->getGpuAddress())));
svmAllocsManager->freeSVMAlloc(reinterpret_cast<void *>(static_cast<uintptr_t>(globalSurface->getGpuAddress())));
globalSurface.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data()));
ASSERT_NE(nullptr, globalSurface);
EXPECT_FALSE(device.getUsmGlobalSurfaceAllocPool()->isInPool(reinterpret_cast<void *>(globalSurface->getGpuAddress())));
svmAllocsManager->freeSVMAlloc(reinterpret_cast<void *>(static_cast<uintptr_t>(globalSurface->getGpuAddress())));
}