feature: prepare for l0 usm device pooling

Related-To: NEO-6893

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2025-04-25 17:35:55 +00:00
committed by Compute-Runtime-Automation
parent c2266fc69e
commit 75e313ce28
23 changed files with 459 additions and 35 deletions

View File

@@ -274,12 +274,17 @@ ze_result_t ContextImp::allocDeviceMem(ze_device_handle_t hDevice,
unifiedMemoryProperties.allocationFlags.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing();
}
if (false == lookupTable.exportMemory &&
neoDevice->getUsmMemAllocPoolsManager()) {
neoDevice->getUsmMemAllocPoolsManager()->ensureInitialized(this->driverHandle->svmAllocsManager);
if (auto usmPtrFromPool = neoDevice->getUsmMemAllocPoolsManager()->createUnifiedMemoryAllocation(size, unifiedMemoryProperties)) {
*ptr = usmPtrFromPool;
return ZE_RESULT_SUCCESS;
if (false == lookupTable.exportMemory) {
if (neoDevice->getUsmMemAllocPoolsManager()) {
if (auto usmPtrFromPool = neoDevice->getUsmMemAllocPoolsManager()->createUnifiedMemoryAllocation(size, unifiedMemoryProperties)) {
*ptr = usmPtrFromPool;
return ZE_RESULT_SUCCESS;
}
} else if (neoDevice->getUsmMemAllocPool()) {
if (auto usmPtrFromPool = neoDevice->getUsmMemAllocPool()->createUnifiedMemoryAllocation(size, unifiedMemoryProperties)) {
*ptr = usmPtrFromPool;
return ZE_RESULT_SUCCESS;
}
}
}
@@ -464,6 +469,10 @@ ze_result_t ContextImp::freeMem(const void *ptr, bool blocking) {
blocking)) {
return ZE_RESULT_SUCCESS;
}
} else if (auto deviceUsmPool = allocation->device->getUsmMemAllocPool()) {
if (deviceUsmPool->freeSVMAlloc(ptr, blocking)) {
return ZE_RESULT_SUCCESS;
}
}
}
this->driverHandle->svmAllocsManager->freeSVMAlloc(const_cast<void *>(ptr), blocking);
@@ -487,6 +496,18 @@ ze_result_t ContextImp::freeMemExt(const ze_memory_free_ext_desc_t *pMemFreeDesc
this->freePeerAllocations(ptr, false, Device::fromHandle(pairDevice.second));
}
if (InternalMemoryType::hostUnifiedMemory == allocation->memoryType) {
if (this->driverHandle->usmHostMemAllocPool.freeSVMAlloc(ptr, false)) {
return ZE_RESULT_SUCCESS;
}
} else if (InternalMemoryType::deviceUnifiedMemory == allocation->memoryType) {
if (auto deviceUsmPool = allocation->device->getUsmMemAllocPool()) {
if (deviceUsmPool->freeSVMAlloc(ptr, false)) {
return ZE_RESULT_SUCCESS;
}
}
}
this->driverHandle->svmAllocsManager->freeSVMAllocDefer(const_cast<void *>(ptr));
return ZE_RESULT_SUCCESS;
}
@@ -641,6 +662,11 @@ void ContextImp::setIPCHandleData(NEO::GraphicsAllocation *graphicsAllocation, u
ipcData.poolOffset = poolOffset;
break;
}
} else if (auto deviceUsmMemAllocPool = neoDevice->getUsmMemAllocPool()) {
if (auto poolOffset = deviceUsmMemAllocPool->getOffsetInPool(addrToPtr(ptrAddress))) {
ipcData.poolOffset = poolOffset;
break;
}
}
}
}

View File

@@ -1625,6 +1625,8 @@ void DeviceImp::releaseResources() {
getNEODevice()->getMemoryManager()->freeGraphicsMemory(syncDispatchTokenAllocation);
getNEODevice()->cleanupUsmAllocationPool();
this->bcsSplit.releaseResources();
if (neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.get()) {

View File

@@ -300,7 +300,15 @@ ze_result_t DriverHandleImp::initialize(std::vector<std::unique_ptr<NEO::Device>
}
this->svmAllocsManager->initUsmAllocationsCaches(*this->devices[0]->getNEODevice());
this->initHostUsmAllocPool();
for (auto &device : this->devices) {
this->initDeviceUsmAllocPool(*device->getNEODevice());
if (auto deviceUsmAllocPool = device->getNEODevice()->getUsmMemAllocPool()) {
deviceUsmAllocPool->ensureInitialized(this->svmAllocsManager);
}
if (auto deviceUsmAllocPoolsManager = device->getNEODevice()->getUsmMemAllocPoolsManager()) {
deviceUsmAllocPoolsManager->ensureInitialized(this->svmAllocsManager);
}
}
this->numDevices = static_cast<uint32_t>(this->devices.size());
uuidTimestamp = static_cast<uint64_t>(std::chrono::system_clock::now().time_since_epoch().count());
@@ -352,6 +360,23 @@ void DriverHandleImp::initHostUsmAllocPool() {
}
}
void DriverHandleImp::initDeviceUsmAllocPool(NEO::Device &device) {
const uint64_t minServicedSize = 0u;
const uint64_t maxServicedSize = 1 * MemoryConstants::megaByte;
bool enabled = NEO::ApiSpecificConfig::isDeviceUsmPoolingEnabled() && device.getProductHelper().isDeviceUsmPoolAllocatorSupported();
uint64_t poolSize = 2 * MemoryConstants::megaByte;
if (NEO::debugManager.flags.EnableDeviceUsmAllocationPool.get() != -1) {
enabled = NEO::debugManager.flags.EnableDeviceUsmAllocationPool.get() > 0;
poolSize = NEO::debugManager.flags.EnableDeviceUsmAllocationPool.get() * MemoryConstants::megaByte;
}
if (enabled) {
device.resetUsmAllocationPool(new NEO::UsmMemAllocPool(rootDeviceIndices, deviceBitfields, &device, InternalMemoryType::deviceUnifiedMemory,
poolSize, minServicedSize, maxServicedSize));
}
}
ze_result_t DriverHandleImp::getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) {
// If the user has requested FLAT or COMBINED device hierarchy model, then report all the sub devices as devices.

View File

@@ -124,6 +124,7 @@ struct DriverHandleImp : public DriverHandle {
std::map<uint64_t, IpcHandleTracking *> &getIPCHandleMap() { return this->ipcHandles; };
[[nodiscard]] std::unique_lock<std::mutex> lockIPCHandleMap() { return std::unique_lock<std::mutex>(this->ipcHandleMapMutex); };
void initHostUsmAllocPool();
void initDeviceUsmAllocPool(NEO::Device &device);
std::unique_ptr<HostPointerManager> hostPointerManager;

View File

@@ -60,7 +60,7 @@ bool ApiSpecificConfig::isHostAllocationCacheEnabled() {
}
bool ApiSpecificConfig::isDeviceUsmPoolingEnabled() {
return true;
return false;
}
bool ApiSpecificConfig::isHostUsmPoolingEnabled() {

View File

@@ -59,6 +59,7 @@ struct DeviceFixture {
template <typename T>
struct DeviceFixtureWithCustomMemoryManager : public DeviceFixture {
void setUp() {
debugManager.flags.EnableDeviceUsmAllocationPool.set(0);
auto executionEnvironment = NEO::MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u);
memoryManager = new T(*executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager);
@@ -68,7 +69,7 @@ struct DeviceFixtureWithCustomMemoryManager : public DeviceFixture {
void tearDown() {
DeviceFixture::tearDown();
}
DebugManagerStateRestore restorer;
T *memoryManager = nullptr;
};

View File

@@ -536,6 +536,11 @@ void MemoryExportImportImplicitScalingTest::SetUp() {
}
void MemoryExportImportImplicitScalingTest::TearDown() {
// cleanup pool before restoring svm manager
for (auto device : driverHandle->devices) {
device->getNEODevice()->cleanupUsmAllocationPool();
device->getNEODevice()->resetUsmAllocationPool(nullptr);
}
driverHandle->svmAllocsManager = prevSvmAllocsManager;
delete currSvmAllocsManager;
driverHandle->setMemoryManager(prevMemoryManager);

View File

@@ -5488,8 +5488,9 @@ HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageWhenCreatingCounterBasedEv
EXPECT_EQ(counterValue, inOrderExecInfo->getCounterValue());
EXPECT_EQ(castToUint64(externalStorageAllocProperties.deviceAddress), inOrderExecInfo->getBaseDeviceAddress());
EXPECT_NE(nullptr, inOrderExecInfo->getDeviceCounterAllocation());
auto lockedPtr = reinterpret_cast<uint64_t *>(ptrOffset(inOrderExecInfo->getDeviceCounterAllocation()->getLockedPtr(), sizeof(uint64_t)));
SvmAllocationData *deviceAlloc = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(devAddress));
auto offset = ptrDiff(devAddress, deviceAlloc->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress());
auto lockedPtr = reinterpret_cast<uint64_t *>(ptrOffset(inOrderExecInfo->getDeviceCounterAllocation()->getLockedPtr(), sizeof(uint64_t) + offset));
EXPECT_EQ(inOrderExecInfo->getBaseHostAddress(), lockedPtr);
EXPECT_EQ(inOrderExecInfo->getExternalHostAllocation(), inOrderExecInfo->getDeviceCounterAllocation());

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -34,6 +34,7 @@ namespace ult {
struct L0DebuggerWindowsFixture {
void setUp() {
debugManager.flags.ForcePreferredAllocationMethod.set(static_cast<int32_t>(GfxMemoryAllocationMethod::useUmdSystemPtr));
debugManager.flags.EnableDeviceUsmAllocationPool.set(0);
executionEnvironment = new NEO::ExecutionEnvironment;
executionEnvironment->setDebuggingMode(NEO::DebuggingMode::online);
executionEnvironment->prepareRootDeviceEnvironments(1);

View File

@@ -198,6 +198,7 @@ TEST_F(DriverVersionTest, givenExternalAllocatorWhenCallingGetExtensionPropertie
DebugManagerStateRestore restorer;
NEO::debugManager.flags.UseBindlessMode.set(1);
NEO::debugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
NEO::debugManager.flags.EnableDeviceUsmAllocationPool.set(0);
auto hwInfo = *NEO::defaultHwInfo;
NEO::MockDevice *neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo);

View File

@@ -50,7 +50,7 @@ TEST(ApiSpecificConfigL0Tests, WhenCheckingIfHostDeviceAllocationCacheIsEnabledT
TEST(ApiSpecificConfigL0Tests, WhenCheckingIfUsmAllocPoolingIsEnabledThenReturnFalse) {
EXPECT_FALSE(ApiSpecificConfig::isHostUsmPoolingEnabled());
EXPECT_TRUE(ApiSpecificConfig::isDeviceUsmPoolingEnabled());
EXPECT_FALSE(ApiSpecificConfig::isDeviceUsmPoolingEnabled());
}
TEST(ApiSpecificConfigL0Tests, GivenDebugFlagCombinationsGetCorrectSharedAllocPrefetchEnabled) {

View File

@@ -171,6 +171,7 @@ struct MemoryExportImportObtainFdTest : public ::testing::Test {
void SetUp() override {
DebugManagerStateRestore restorer;
debugManager.flags.EnableImplicitScaling.set(1);
debugManager.flags.EnableDeviceUsmAllocationPool.set(0); // not compatible with MemoryManagerIpcImplicitScalingObtainFdMock
executionEnvironment = new NEO::ExecutionEnvironment();
executionEnvironment->prepareRootDeviceEnvironments(numRootDevices);
@@ -604,7 +605,7 @@ struct DriverHandleObtaindFdMock : public L0::DriverHandleImp {
struct MemoryObtainFdTest : public ::testing::Test {
void SetUp() override {
DebugManagerStateRestore restorer;
debugManager.flags.EnableDeviceUsmAllocationPool.set(0); // not compatible with MemoryManagerIpcObtainFdMock
executionEnvironment = new NEO::ExecutionEnvironment();
executionEnvironment->prepareRootDeviceEnvironments(numRootDevices);
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();

View File

@@ -324,6 +324,11 @@ TEST_F(MemoryExportImportImplicitScalingTest,
using MemoryTest = Test<DeviceFixture>;
struct CompressionMemoryTest : public MemoryTest {
void SetUp() override {
debugManager.flags.EnableHostUsmAllocationPool.set(0);
debugManager.flags.EnableDeviceUsmAllocationPool.set(0);
MemoryTest::SetUp();
}
GraphicsAllocation *allocDeviceMem(size_t size) {
ptr = nullptr;
ze_result_t result = context->allocDeviceMem(device->toHandle(),
@@ -1331,11 +1336,16 @@ TEST_F(MemoryTest, givenProductWithNon48bForRTWhenAllocatingSharedMemoryAsRayTra
TEST_F(MemoryTest, givenProductWith48bForRTWhenAllocatingDeviceMemoryAsRayTracingAllocationAddressIsIn48Bits) {
size_t size = 10;
size_t alignment = 1u;
void *ptr = reinterpret_cast<void *>(0x1234);
ze_device_mem_alloc_desc_t deviceDesc = {};
// do warmup alloc to make sure usm device pool is allocated before validation in mem mgr
{
void *ptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, context->allocDeviceMem(device->toHandle(), &deviceDesc, size, 0u, &ptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, context->freeMem(ptr));
}
void *ptr = reinterpret_cast<void *>(0x1234);
ze_raytracing_mem_alloc_ext_desc_t rtDesc = {};
rtDesc.stype = ZE_STRUCTURE_TYPE_RAYTRACING_MEM_ALLOC_EXT_DESC;
deviceDesc.pNext = &rtDesc;
@@ -1515,6 +1525,13 @@ TEST_F(MemoryTest, whenAllocatingDeviceMemoryThenAlignmentIsPassedCorrectlyAndMe
auto memoryManager = static_cast<MockMemoryManager *>(neoDevice->getMemoryManager());
// do warmup alloc to make sure usm device pool is allocated before validation in mem mgr
{
void *ptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, context->allocDeviceMem(device->toHandle(), &deviceDesc, size, 0u, &ptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, context->freeMem(ptr));
}
size_t alignment = 8 * MemoryConstants::megaByte;
do {
alignment >>= 1;
@@ -1665,7 +1682,6 @@ struct SVMAllocsManagerFreeExtMock : public NEO::SVMAllocsManager {
struct FreeExtTests : public ::testing::Test {
void SetUp() override {
neoDevice =
NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get());
auto mockBuiltIns = new MockBuiltins();
@@ -2010,6 +2026,13 @@ TEST_F(FreeExtTests,
TEST_F(FreeExtTests,
whenAllocMemFailsWithDeferredFreeAllocationThenMemoryFreedAndRetrySucceeds) {
// does not make sense for usm pooling, disable for test
driverHandle->usmHostMemAllocPool.cleanup();
if (auto deviceUsmPool = neoDevice->getUsmMemAllocPool()) {
deviceUsmPool->cleanup();
neoDevice->usmMemAllocPool.reset(nullptr);
}
size_t size = 1024;
size_t alignment = 1u;
void *ptr = nullptr;
@@ -2242,7 +2265,8 @@ struct ContextRelaxedSizeMock : public ContextImp {
struct MemoryRelaxedSizeTests : public ::testing::Test {
void SetUp() override {
// disable usm device pooling, used svm manager mock does not make svmData
debugManager.flags.EnableDeviceUsmAllocationPool.set(0);
neoDevice =
NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get());
auto mockBuiltIns = new MockBuiltins();
@@ -2274,6 +2298,7 @@ struct MemoryRelaxedSizeTests : public ::testing::Test {
NEO::MockDevice *neoDevice = nullptr;
L0::Device *device = nullptr;
std::unique_ptr<ContextRelaxedSizeMock> context;
DebugManagerStateRestore restorer;
};
TEST_F(MemoryRelaxedSizeTests,
@@ -3729,6 +3754,11 @@ struct MultipleDevicePeerAllocationTest : public ::testing::Test {
}
void TearDown() override {
// cleanup pool before restoring svm manager
for (auto device : driverHandle->devices) {
device->getNEODevice()->cleanupUsmAllocationPool();
device->getNEODevice()->resetUsmAllocationPool(nullptr);
}
driverHandle->svmAllocsManager = prevSvmAllocsManager;
delete currSvmAllocsManager;
driverHandle->setMemoryManager(prevMemoryManager);
@@ -3858,6 +3888,12 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
auto ret = deviceImp1->getCsrForOrdinalAndIndex(&csr, 0u, 0u, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, false);
ASSERT_EQ(ret, ZE_RESULT_SUCCESS);
// disable device usm pooling - allocation will not be pooled but pool will be initialized
for (auto device : driverHandle->devices) {
device->getNEODevice()->cleanupUsmAllocationPool();
device->getNEODevice()->resetUsmAllocationPool(nullptr);
}
size_t size = 1024;
size_t alignment = 1u;
ze_device_mem_alloc_desc_t deviceDesc = {};
@@ -3899,6 +3935,12 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
auto ret = deviceImp1->getCsrForOrdinalAndIndex(&csr, 0u, 0u, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, false);
ASSERT_EQ(ret, ZE_RESULT_SUCCESS);
// disable device usm pooling - allocation will not be pooled but pool will be initialized
for (auto device : driverHandle->devices) {
device->getNEODevice()->cleanupUsmAllocationPool();
device->getNEODevice()->resetUsmAllocationPool(nullptr);
}
size_t size = 1024;
size_t alignment = 1u;
ze_device_mem_alloc_desc_t deviceDesc = {};
@@ -5567,6 +5609,11 @@ struct MemAllocMultiSubDeviceTests : public ::testing::Test {
}
void TearDown() override {
// cleanup pool before restoring svm manager
for (auto device : driverHandle->devices) {
device->getNEODevice()->cleanupUsmAllocationPool();
device->getNEODevice()->resetUsmAllocationPool(nullptr);
}
driverHandle->svmAllocsManager = prevSvmAllocsManager;
delete currSvmAllocsManager;
}

View File

@@ -21,6 +21,7 @@
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/test/unit_tests/mock.h"
#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h"
namespace L0 {
namespace ult {
template <int hostUsmPoolFlag = -1, int deviceUsmPoolFlag = -1, int poolingVersionFlag = -1>
@@ -53,6 +54,8 @@ struct AllocUsmPoolMemoryTest : public ::testing::Test {
driverHandle = std::make_unique<Mock<L0::DriverHandleImp>>();
driverHandle->initialize(std::move(devices));
l0Devices[0] = driverHandle->devices[0];
l0Devices[1] = driverHandle->devices[1];
ze_context_handle_t hContext;
ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
@@ -72,6 +75,7 @@ struct AllocUsmPoolMemoryTest : public ::testing::Test {
std::vector<std::unique_ptr<NEO::Device>> devices;
std::vector<MockProductHelper *> mockProductHelpers;
NEO::ExecutionEnvironment *executionEnvironment;
L0::Device *l0Devices[2];
constexpr static auto poolAllocationThreshold = 1 * MemoryConstants::megaByte;
};
@@ -116,27 +120,46 @@ TEST_F(AllocUsmHostEnabledMemoryTest, givenDriverHandleWhenCallingAllocHostMemWi
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr1Byte);
EXPECT_TRUE(driverHandle->usmHostMemAllocPool.isInPool(ptr1Byte));
EXPECT_EQ(1u, mockHostMemAllocPool->allocations.getNumAllocs());
EXPECT_EQ(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(ptr1Byte));
result = context->freeMem(ptr1Byte);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockHostMemAllocPool->allocations.getNumAllocs());
void *ptrThreshold = nullptr;
result = context->allocHostMem(&hostDesc, poolAllocationThreshold, 0u, &ptrThreshold);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptrThreshold);
EXPECT_TRUE(driverHandle->usmHostMemAllocPool.isInPool(ptrThreshold));
EXPECT_EQ(1u, mockHostMemAllocPool->allocations.getNumAllocs());
EXPECT_EQ(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(ptrThreshold));
result = context->freeMem(ptrThreshold);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockHostMemAllocPool->allocations.getNumAllocs());
void *ptrOverThreshold = nullptr;
result = context->allocHostMem(&hostDesc, poolAllocationThreshold + 1u, 0u, &ptrOverThreshold);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptrOverThreshold);
EXPECT_FALSE(driverHandle->usmHostMemAllocPool.isInPool(ptrOverThreshold));
EXPECT_EQ(0u, mockHostMemAllocPool->allocations.getNumAllocs());
EXPECT_NE(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(ptrOverThreshold));
result = context->freeMem(ptrOverThreshold);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockHostMemAllocPool->allocations.getNumAllocs());
void *ptrFreeMemExt = nullptr;
result = context->allocHostMem(&hostDesc, poolAllocationThreshold, 0u, &ptrFreeMemExt);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptrFreeMemExt);
EXPECT_TRUE(driverHandle->usmHostMemAllocPool.isInPool(ptrFreeMemExt));
EXPECT_EQ(1u, mockHostMemAllocPool->allocations.getNumAllocs());
EXPECT_EQ(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(ptrFreeMemExt));
ze_memory_free_ext_desc_t memFreeDesc = {};
memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE;
result = context->freeMemExt(&memFreeDesc, ptrFreeMemExt);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockHostMemAllocPool->allocations.getNumAllocs());
void *ptrExportMemory = nullptr;
ze_external_memory_export_desc_t externalMemoryDesc{};
@@ -147,9 +170,11 @@ TEST_F(AllocUsmHostEnabledMemoryTest, givenDriverHandleWhenCallingAllocHostMemWi
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptrExportMemory);
EXPECT_FALSE(driverHandle->usmHostMemAllocPool.isInPool(ptrExportMemory));
EXPECT_EQ(0u, mockHostMemAllocPool->allocations.getNumAllocs());
EXPECT_NE(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(ptrExportMemory));
result = context->freeMem(ptrExportMemory);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockHostMemAllocPool->allocations.getNumAllocs());
}
TEST_F(AllocUsmHostEnabledMemoryTest, givenDrmDriverModelWhenOpeningIpcHandleFromPooledAllocationThenOffsetIsApplied) {
@@ -187,6 +212,176 @@ TEST_F(AllocUsmHostEnabledMemoryTest, givenDrmDriverModelWhenOpeningIpcHandleFro
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
using AllocUsmDeviceDefaultMemoryTest = AllocUsmPoolMemoryTest<-1, -1>;
TEST_F(AllocUsmDeviceDefaultMemoryTest, givenDeviceWhenCallingAllocDeviceMemThenDoNotUsePool) {
EXPECT_EQ(nullptr, l0Devices[0]->getNEODevice()->getUsmMemAllocPool());
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_result_t result = context->allocDeviceMem(l0Devices[0], &deviceDesc, 1u, 0u, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
EXPECT_EQ(nullptr, l0Devices[0]->getNEODevice()->getUsmMemAllocPool());
result = context->freeMem(ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
using AllocUsmDeviceDisabledMemoryTest = AllocUsmPoolMemoryTest<-1, 0>;
TEST_F(AllocUsmDeviceDisabledMemoryTest, givenDeviceWhenCallingAllocDeviceMemThenDoNotUsePool) {
EXPECT_EQ(nullptr, l0Devices[0]->getNEODevice()->getUsmMemAllocPool());
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_result_t result = context->allocDeviceMem(l0Devices[0], &deviceDesc, 1u, 0u, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
EXPECT_EQ(nullptr, l0Devices[0]->getNEODevice()->getUsmMemAllocPool());
result = context->freeMem(ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
using AllocUsmDeviceEnabledMemoryTest = AllocUsmPoolMemoryTest<0, 1>;
TEST_F(AllocUsmDeviceEnabledMemoryTest, givenDeviceWhenCallingAllocDeviceMemWithVariousParametersThenUsePoolIfAllowed) {
auto mockDeviceMemAllocPool = reinterpret_cast<MockUsmMemAllocPool *>(l0Devices[0]->getNEODevice()->getUsmMemAllocPool());
ASSERT_NE(nullptr, mockDeviceMemAllocPool);
EXPECT_TRUE(mockDeviceMemAllocPool->isInitialized());
void *ptr1Byte = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_result_t result = context->allocDeviceMem(l0Devices[0], &deviceDesc, 1u, 0u, &ptr1Byte);
EXPECT_TRUE(mockDeviceMemAllocPool->isInitialized());
auto poolAllocationData = driverHandle->svmAllocsManager->getSVMAlloc(mockDeviceMemAllocPool->pool);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr1Byte);
EXPECT_TRUE(mockDeviceMemAllocPool->isInPool(ptr1Byte));
EXPECT_EQ(1u, mockDeviceMemAllocPool->allocations.getNumAllocs());
EXPECT_EQ(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(ptr1Byte));
result = context->freeMem(ptr1Byte);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockDeviceMemAllocPool->allocations.getNumAllocs());
void *ptrThreshold = nullptr;
result = context->allocDeviceMem(l0Devices[0], &deviceDesc, poolAllocationThreshold, 0u, &ptrThreshold);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptrThreshold);
EXPECT_TRUE(mockDeviceMemAllocPool->isInPool(ptrThreshold));
EXPECT_EQ(1u, mockDeviceMemAllocPool->allocations.getNumAllocs());
EXPECT_EQ(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(ptrThreshold));
result = context->freeMem(ptrThreshold);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockDeviceMemAllocPool->allocations.getNumAllocs());
void *ptrOverThreshold = nullptr;
result = context->allocDeviceMem(l0Devices[0], &deviceDesc, poolAllocationThreshold + 1u, 0u, &ptrOverThreshold);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptrOverThreshold);
EXPECT_FALSE(mockDeviceMemAllocPool->isInPool(ptrOverThreshold));
EXPECT_EQ(0u, mockDeviceMemAllocPool->allocations.getNumAllocs());
EXPECT_NE(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(ptrOverThreshold));
result = context->freeMem(ptrOverThreshold);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockDeviceMemAllocPool->allocations.getNumAllocs());
void *ptrFreeMemExt = nullptr;
result = context->allocDeviceMem(l0Devices[0], &deviceDesc, poolAllocationThreshold, 0u, &ptrFreeMemExt);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptrFreeMemExt);
EXPECT_TRUE(mockDeviceMemAllocPool->isInPool(ptrFreeMemExt));
EXPECT_EQ(1u, mockDeviceMemAllocPool->allocations.getNumAllocs());
EXPECT_EQ(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(ptrFreeMemExt));
ze_memory_free_ext_desc_t memFreeDesc = {};
memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE;
result = context->freeMemExt(&memFreeDesc, ptrFreeMemExt);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockDeviceMemAllocPool->allocations.getNumAllocs());
void *ptrExportMemory = nullptr;
ze_external_memory_export_desc_t externalMemoryDesc{};
externalMemoryDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC;
externalMemoryDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF;
deviceDesc.pNext = &externalMemoryDesc;
result = context->allocDeviceMem(l0Devices[0], &deviceDesc, poolAllocationThreshold, 0u, &ptrExportMemory);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptrExportMemory);
EXPECT_FALSE(mockDeviceMemAllocPool->isInPool(ptrExportMemory));
EXPECT_EQ(0u, mockDeviceMemAllocPool->allocations.getNumAllocs());
EXPECT_NE(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(ptrExportMemory));
result = context->freeMemExt(&memFreeDesc, ptrExportMemory);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, mockDeviceMemAllocPool->allocations.getNumAllocs());
}
TEST_F(AllocUsmDeviceEnabledMemoryTest, givenDrmDriverModelWhenOpeningIpcHandleFromPooledAllocationThenOffsetIsApplied) {
auto mockDeviceMemAllocPool = reinterpret_cast<MockUsmMemAllocPool *>(l0Devices[0]->getNEODevice()->getUsmMemAllocPool());
ASSERT_NE(nullptr, mockDeviceMemAllocPool);
EXPECT_TRUE(mockDeviceMemAllocPool->isInitialized());
executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface());
executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique<NEO::MockDriverModelDRM>());
void *pooledAllocation = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_result_t result = context->allocDeviceMem(l0Devices[0], &deviceDesc, 1u, 0u, &pooledAllocation);
auto poolAllocationData = driverHandle->svmAllocsManager->getSVMAlloc(mockDeviceMemAllocPool->pool);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, pooledAllocation);
EXPECT_TRUE(mockDeviceMemAllocPool->isInPool(pooledAllocation));
EXPECT_EQ(poolAllocationData, driverHandle->svmAllocsManager->getSVMAlloc(pooledAllocation));
const auto pooledAllocationOffset = ptrDiff(mockDeviceMemAllocPool->allocations.get(pooledAllocation)->address, castToUint64(mockDeviceMemAllocPool->pool));
EXPECT_NE(0u, pooledAllocationOffset);
ze_ipc_mem_handle_t ipcHandle{};
result = context->getIpcMemHandle(pooledAllocation, &ipcHandle);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
IpcMemoryData &ipcData = *reinterpret_cast<IpcMemoryData *>(ipcHandle.data);
EXPECT_EQ(pooledAllocationOffset, ipcData.poolOffset);
ze_ipc_memory_flags_t ipcFlags{};
void *ipcPointer = nullptr;
result = context->openIpcMemHandle(driverHandle->devices[0]->toHandle(), ipcHandle, ipcFlags, &ipcPointer);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(ptrOffset(addrToPtr(0x1u), pooledAllocationOffset), ipcPointer);
context->closeIpcMemHandle(addrToPtr(0x1u));
result = context->freeMem(pooledAllocation);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
TEST_F(AllocUsmDeviceEnabledMemoryTest, givenDrmDriverModelWhenOpeningIpcHandleFromNotPooledAllocationThenOffsetIsNotApplied) {
auto mockDeviceMemAllocPool = reinterpret_cast<MockUsmMemAllocPool *>(l0Devices[0]->getNEODevice()->getUsmMemAllocPool());
ASSERT_NE(nullptr, mockDeviceMemAllocPool);
EXPECT_TRUE(mockDeviceMemAllocPool->isInitialized());
executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface());
executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique<NEO::MockDriverModelDRM>());
void *allocation = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_result_t result = context->allocDeviceMem(l0Devices[0], &deviceDesc, 1u, 1u, &allocation);
auto allocationData = driverHandle->svmAllocsManager->getSVMAlloc(mockDeviceMemAllocPool->pool);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, allocation);
EXPECT_FALSE(mockDeviceMemAllocPool->isInPool(allocation));
EXPECT_NE(allocationData, driverHandle->svmAllocsManager->getSVMAlloc(allocation));
ze_ipc_mem_handle_t ipcHandle{};
result = context->getIpcMemHandle(allocation, &ipcHandle);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
IpcMemoryData &ipcData = *reinterpret_cast<IpcMemoryData *>(ipcHandle.data);
EXPECT_EQ(0u, ipcData.poolOffset);
ze_ipc_memory_flags_t ipcFlags{};
void *ipcPointer = nullptr;
result = context->openIpcMemHandle(driverHandle->devices[0]->toHandle(), ipcHandle, ipcFlags, &ipcPointer);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(addrToPtr(0x1u), ipcPointer);
context->closeIpcMemHandle(addrToPtr(0x1u));
result = context->freeMem(allocation);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
using AllocUsmDeviceEnabledMemoryNewVersionTest = AllocUsmPoolMemoryTest<-1, 1, 2>;
TEST_F(AllocUsmDeviceEnabledMemoryNewVersionTest, givenContextWhenAllocatingAndFreeingDeviceUsmThenPoolingIsUsed) {

View File

@@ -374,6 +374,7 @@ HWTEST_F(ModuleTest, givenStatefulBufferWhenOffsetIsPatchedThenAllocBaseAddressI
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, gpuAlloc);
const auto gpuAllocAddress = gpuAlloc->getGpuAddress();
uint32_t argIndex = 0u;
uint32_t offset = 0x1234;
@@ -386,7 +387,7 @@ HWTEST_F(ModuleTest, givenStatefulBufferWhenOffsetIsPatchedThenAllocBaseAddressI
auto argInfo = kernelImp->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
auto surfaceStateAddressRaw = ptrOffset(kernelImp->getSurfaceStateHeapData(), argInfo.bindful);
auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
EXPECT_EQ(devicePtr, reinterpret_cast<void *>(surfaceStateAddress->getSurfaceBaseAddress()));
EXPECT_EQ(gpuAllocAddress, surfaceStateAddress->getSurfaceBaseAddress());
// Bindless arg
surfaceStateAddress->setSurfaceBaseAddress(0);
@@ -397,7 +398,7 @@ HWTEST_F(ModuleTest, givenStatefulBufferWhenOffsetIsPatchedThenAllocBaseAddressI
kernelImp->setBufferSurfaceState(argIndex, ptrOffset(devicePtr, offset), gpuAlloc);
surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(kernelImp->getSurfaceStateHeapData()));
EXPECT_EQ(devicePtr, reinterpret_cast<void *>(surfaceStateAddress->getSurfaceBaseAddress()));
EXPECT_EQ(gpuAllocAddress, surfaceStateAddress->getSurfaceBaseAddress());
Kernel::fromHandle(kernelHandle)->destroy();
@@ -446,7 +447,7 @@ HWTEST_F(ModuleTest, givenBufferWhenOffsetIsNotPatchedThenPassedPtrIsSetAsBaseAd
context->freeMem(devicePtr);
}
HWTEST_F(ModuleTest, givenBufferWhenOffsetIsNotPatchedThenSizeIsDecereasedByOffset) {
HWTEST_F(ModuleTest, givenBufferWhenOffsetIsNotPatchedThenSizeIsDecreasedByOffset) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
ze_kernel_handle_t kernelHandle;
@@ -470,6 +471,7 @@ HWTEST_F(ModuleTest, givenBufferWhenOffsetIsNotPatchedThenSizeIsDecereasedByOffs
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, gpuAlloc);
const auto devicePtrOffsetInAlloc = ptrDiff(devicePtr, gpuAlloc->getGpuAddress());
uint32_t argIndex = 0u;
uint32_t offset = 0x1234;
@@ -482,7 +484,8 @@ HWTEST_F(ModuleTest, givenBufferWhenOffsetIsNotPatchedThenSizeIsDecereasedByOffs
auto surfaceStateAddressRaw = ptrOffset(kernelImp->getSurfaceStateHeapData(), argInfo.bindful);
auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
SurfaceStateBufferLength length = {0};
length.length = static_cast<uint32_t>((gpuAlloc->getUnderlyingBufferSize() - offset) - 1);
const auto totalOffset = offset + devicePtrOffsetInAlloc;
length.length = static_cast<uint32_t>((gpuAlloc->getUnderlyingBufferSize() - totalOffset) - 1);
EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast<uint32_t>(length.surfaceState.width + 1));
EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast<uint32_t>(length.surfaceState.height + 1));
EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast<uint32_t>(length.surfaceState.depth + 1));

View File

@@ -206,9 +206,14 @@ bool Device::initializeCommonResources() {
allocateDebugSurface(debugSurfaceSize);
}
if (ApiSpecificConfig::isDeviceUsmPoolingEnabled() &&
getProductHelper().isDeviceUsmPoolAllocatorSupported() &&
NEO::debugManager.flags.ExperimentalUSMAllocationReuseVersion.get() == 2) {
bool usmPoolManagerEnabled = ApiSpecificConfig::isDeviceUsmPoolingEnabled() &&
getProductHelper().isDeviceUsmPoolAllocatorSupported();
if (NEO::debugManager.flags.EnableDeviceUsmAllocationPool.get() != -1) {
usmPoolManagerEnabled = NEO::debugManager.flags.EnableDeviceUsmAllocationPool.get() > 0;
}
if (usmPoolManagerEnabled && NEO::debugManager.flags.ExperimentalUSMAllocationReuseVersion.get() == 2) {
RootDeviceIndicesContainer rootDeviceIndices;
rootDeviceIndices.pushUnique(getRootDeviceIndex());
@@ -251,6 +256,16 @@ bool Device::shouldLimitAllocationsReuse() const {
return getMemoryManager()->getUsedLocalMemorySize(getRootDeviceIndex()) >= this->usmReuseInfo.getLimitAllocationsReuseThreshold();
}
void Device::resetUsmAllocationPool(UsmMemAllocPool *usmMemAllocPool) {
this->usmMemAllocPool.reset(usmMemAllocPool);
}
void Device::cleanupUsmAllocationPool() {
if (usmMemAllocPool) {
usmMemAllocPool->cleanup();
}
}
bool Device::initDeviceFully() {
if (!getRootDeviceEnvironment().isExposeSingleDeviceMode()) {

View File

@@ -42,6 +42,7 @@ class SipExternalLib;
class SubDevice;
class SyncBufferHandler;
class UsmMemAllocPoolsManager;
class UsmMemAllocPool;
enum class EngineGroupType : uint32_t;
struct PhysicalDevicePciBusInfo;
@@ -208,6 +209,9 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
UsmMemAllocPoolsManager *getUsmMemAllocPoolsManager() {
return deviceUsmMemAllocPoolsManager.get();
}
UsmMemAllocPool *getUsmMemAllocPool() {
return usmMemAllocPool.get();
}
MOCKABLE_VIRTUAL void stopDirectSubmissionAndWaitForCompletion();
bool isAnyDirectSubmissionEnabled() const;
bool isAnyDirectSubmissionLightEnabled() const;
@@ -253,6 +257,9 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
UsmReuseInfo usmReuseInfo;
void resetUsmAllocationPool(UsmMemAllocPool *usmMemAllocPool);
void cleanupUsmAllocationPool();
protected:
Device() = delete;
Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex);
@@ -331,6 +338,7 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
ISAPoolAllocator isaPoolAllocator;
TimestampPoolAllocator deviceTimestampPoolAllocator;
std::unique_ptr<UsmMemAllocPoolsManager> deviceUsmMemAllocPoolsManager;
std::unique_ptr<UsmMemAllocPool> usmMemAllocPool;
std::atomic_uint32_t bufferPoolCount = 0u;
uint32_t maxBufferPoolCount = 0u;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -42,6 +42,20 @@ bool UsmMemAllocPool::initialize(SVMAllocsManager *svmMemoryManager, void *ptr,
return true;
}
bool UsmMemAllocPool::ensureInitialized(SVMAllocsManager *svmMemoryManager) {
if (isInitialized()) {
return true;
}
std::unique_lock<std::mutex> lock(mtx);
if (isInitialized()) {
return true;
}
DEBUG_BREAK_IF(0u == poolSize || 0u == maxServicedSize || 0u == rootDeviceIndices.size() || deviceBitFields.empty());
SVMAllocsManager::UnifiedMemoryProperties poolMemoryProperties(poolMemoryType, poolAlignment, rootDeviceIndices, deviceBitFields);
poolMemoryProperties.device = device;
return initialize(svmMemoryManager, poolMemoryProperties, poolSize, minServicedSize, maxServicedSize);
}
bool UsmMemAllocPool::isInitialized() const {
return this->pool;
}
@@ -52,7 +66,8 @@ size_t UsmMemAllocPool::getPoolSize() const {
void UsmMemAllocPool::cleanup() {
if (isInitialized()) {
this->svmMemoryManager->freeSVMAlloc(this->pool, true);
[[maybe_unused]] const auto status = this->svmMemoryManager->freeSVMAlloc(this->pool, true);
DEBUG_BREAK_IF(false == status);
this->svmMemoryManager = nullptr;
this->pool = nullptr;
this->poolEnd = nullptr;
@@ -62,7 +77,7 @@ void UsmMemAllocPool::cleanup() {
}
bool UsmMemAllocPool::alignmentIsAllowed(size_t alignment) {
return alignment % chunkAlignment == 0;
return alignment % chunkAlignment == 0 && alignment <= poolAlignment;
}
bool UsmMemAllocPool::sizeIsAllowed(size_t size) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -26,11 +26,26 @@ class UsmMemAllocPool {
using AllocationsInfoStorage = BaseSortedPointerWithValueVector<AllocationInfo>;
UsmMemAllocPool() = default;
UsmMemAllocPool(const RootDeviceIndicesContainer &rootDeviceIndices,
const std::map<uint32_t, NEO::DeviceBitfield> &deviceBitFields,
Device *device,
InternalMemoryType poolMemoryType,
size_t poolSize,
size_t minServicedSize,
size_t maxServicedSize) : rootDeviceIndices(rootDeviceIndices),
deviceBitFields(deviceBitFields),
device(device),
poolMemoryType(poolMemoryType),
poolSize(poolSize),
minServicedSize(minServicedSize),
maxServicedSize(maxServicedSize){};
virtual ~UsmMemAllocPool() = default;
bool initialize(SVMAllocsManager *svmMemoryManager, const UnifiedMemoryProperties &memoryProperties, size_t poolSize, size_t minServicedSize, size_t maxServicedSize);
bool initialize(SVMAllocsManager *svmMemoryManager, void *ptr, SvmAllocationData *svmData, size_t minServicedSize, size_t maxServicedSize);
bool ensureInitialized(SVMAllocsManager *svmMemoryManager);
bool isInitialized() const;
size_t getPoolSize() const;
void cleanup();
MOCKABLE_VIRTUAL void cleanup();
static bool alignmentIsAllowed(size_t alignment);
static bool flagsAreAllowed(const UnifiedMemoryProperties &memoryProperties);
static double getPercentOfFreeMemoryForRecycling(InternalMemoryType memoryType);
@@ -45,16 +60,20 @@ class UsmMemAllocPool {
size_t getOffsetInPool(const void *ptr) const;
static constexpr auto chunkAlignment = 512u;
static constexpr auto poolAlignment = MemoryConstants::pageSize2M;
protected:
size_t poolSize{};
std::unique_ptr<HeapAllocator> chunkAllocator;
void *pool{};
void *poolEnd{};
SVMAllocsManager *svmMemoryManager{};
AllocationsInfoStorage allocations;
std::mutex mtx;
RootDeviceIndicesContainer rootDeviceIndices;
std::map<uint32_t, NEO::DeviceBitfield> deviceBitFields;
Device *device;
InternalMemoryType poolMemoryType;
size_t poolSize{};
size_t minServicedSize;
size_t maxServicedSize;
};

View File

@@ -73,6 +73,7 @@ class MockDevice : public RootDevice {
using Device::rtMemoryBackedBuffer;
using Device::secondaryCsrs;
using Device::secondaryEngines;
using Device::usmMemAllocPool;
using Device::uuid;
using RootDevice::createEngines;
using RootDevice::defaultEngineIndex;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -18,6 +18,15 @@ class MockUsmMemAllocPool : public UsmMemAllocPool {
using UsmMemAllocPool::poolEnd;
using UsmMemAllocPool::poolMemoryType;
using UsmMemAllocPool::poolSize;
void cleanup() override {
++cleanupCalled;
if (callBaseCleanup) {
UsmMemAllocPool::cleanup();
}
}
uint32_t cleanupCalled = 0u;
bool callBaseCleanup = true;
};
class MockUsmMemAllocPoolsManager : public UsmMemAllocPoolsManager {

View File

@@ -38,10 +38,10 @@
#include "shared/test/common/mocks/mock_os_interface.h"
#include "shared/test/common/mocks/mock_product_helper.h"
#include "shared/test/common/mocks/mock_release_helper.h"
#include "shared/test/common/mocks/mock_usm_memory_pool.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h"
using namespace NEO;
extern ApiSpecificConfig::ApiType apiTypeForUlts;
namespace NEO {
@@ -2330,6 +2330,20 @@ TEST_F(DeviceTests, givenNewUsmPoolingEnabledWhenDeviceInitializedThenUsmMemAllo
auto usmMemAllocPoolsManager = device->getUsmMemAllocPoolsManager();
EXPECT_EQ(nullptr, usmMemAllocPoolsManager);
}
{
DebugManagerStateRestore restorer;
debugManager.flags.ExperimentalUSMAllocationReuseVersion.set(2);
debugManager.flags.EnableDeviceUsmAllocationPool.set(1);
auto executionEnvironment = MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u);
auto mockProductHelper = new MockProductHelper;
executionEnvironment->rootDeviceEnvironments[0]->productHelper.reset(mockProductHelper);
mockProductHelper->isDeviceUsmPoolAllocatorSupportedResult = true;
UltDeviceFactory deviceFactory{1, 1, *executionEnvironment};
auto device = deviceFactory.rootDevices[0];
auto usmMemAllocPoolsManager = device->getUsmMemAllocPoolsManager();
ASSERT_NE(nullptr, usmMemAllocPoolsManager);
EXPECT_FALSE(usmMemAllocPoolsManager->isInitialized());
}
}
TEST(DeviceWithoutAILTest, givenNoAILWhenCreateDeviceThenDeviceIsCreated) {
DebugManagerStateRestore dbgRestorer;
@@ -2377,6 +2391,21 @@ TEST(Device, givenDeviceWhenGettingMicrosecondResolutionThenCorrectValueReturned
EXPECT_EQ(device->getMicrosecondResolution(), expectedMicrosecondResolution);
}
TEST(Device, givenDeviceWhenCallingUsmAllocationPoolMethodsThenCorrectValueReturned) {
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
EXPECT_EQ(nullptr, device->getUsmMemAllocPool());
device->cleanupUsmAllocationPool();
MockUsmMemAllocPool *usmAllocPool = new MockUsmMemAllocPool;
device->resetUsmAllocationPool(usmAllocPool);
EXPECT_EQ(usmAllocPool, device->getUsmMemAllocPool());
usmAllocPool->callBaseCleanup = false;
EXPECT_EQ(0u, usmAllocPool->cleanupCalled);
device->cleanupUsmAllocationPool();
EXPECT_EQ(1u, usmAllocPool->cleanupCalled);
}
TEST(GroupDevicesTest, whenMultipleDevicesAreCreatedThenGroupDevicesCreatesVectorPerEachProductFamilySortedOverGpuTypeAndProductFamily) {
DebugManagerStateRestore restorer;
const size_t numRootDevices = 5u;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,7 @@ TEST_F(UnifiedMemoryPoolingStaticTest, givenUsmAllocPoolWhenCallingStaticMethods
EXPECT_TRUE(UsmMemAllocPool::alignmentIsAllowed(UsmMemAllocPool::chunkAlignment));
EXPECT_TRUE(UsmMemAllocPool::alignmentIsAllowed(UsmMemAllocPool::chunkAlignment * 2));
EXPECT_FALSE(UsmMemAllocPool::alignmentIsAllowed(UsmMemAllocPool::chunkAlignment / 2));
EXPECT_FALSE(UsmMemAllocPool::alignmentIsAllowed(UsmMemAllocPool::poolAlignment + UsmMemAllocPool::chunkAlignment));
const RootDeviceIndicesContainer rootDeviceIndices;
const std::map<uint32_t, DeviceBitfield> deviceBitfields;
@@ -53,7 +54,6 @@ TEST_F(UnifiedMemoryPoolingTest, givenUsmAllocPoolWhenCallingIsInitializedThenRe
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, MemoryConstants::pageSize2M, rootDeviceIndices, deviceBitfields);
unifiedMemoryProperties.device = device;
EXPECT_TRUE(usmMemAllocPool.initialize(svmManager.get(), unifiedMemoryProperties, 1 * MemoryConstants::megaByte, 0u, 1 * MemoryConstants::megaByte));
EXPECT_TRUE(usmMemAllocPool.isInitialized());
@@ -63,6 +63,25 @@ TEST_F(UnifiedMemoryPoolingTest, givenUsmAllocPoolWhenCallingIsInitializedThenRe
EXPECT_FALSE(usmMemAllocPool.freeSVMAlloc(reinterpret_cast<void *>(0x1), true));
}
TEST_F(UnifiedMemoryPoolingTest, givenUsmAllocPoolWhenCallingEnsureInitializedThenReturnCorrectValue) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
UsmMemAllocPool usmMemAllocPool(rootDeviceIndices, deviceBitfields, device, InternalMemoryType::deviceUnifiedMemory, 1 * MemoryConstants::megaByte, 0u, 1 * MemoryConstants::megaByte);
EXPECT_FALSE(usmMemAllocPool.isInitialized());
EXPECT_TRUE(usmMemAllocPool.ensureInitialized(svmManager.get()));
EXPECT_TRUE(usmMemAllocPool.isInitialized());
EXPECT_TRUE(usmMemAllocPool.ensureInitialized(svmManager.get()));
usmMemAllocPool.cleanup();
EXPECT_FALSE(usmMemAllocPool.isInitialized());
EXPECT_FALSE(usmMemAllocPool.freeSVMAlloc(reinterpret_cast<void *>(0x1), true));
}
template <InternalMemoryType poolMemoryType, bool failAllocation>
class InitializedUnifiedMemoryPoolingTest : public UnifiedMemoryPoolingTest {
public: