performance: limit usm host allocation recycle

Query system total memory size and limit usm host allocation recycle to
use at most x%.
x is read from ExperimentalEnableDeviceAllocationCache for device and
ExperimentalEnableHostAllocationCache for host.

Related-To: GSD-7497

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2024-02-05 15:53:23 +00:00
committed by Compute-Runtime-Automation
parent 87990b8cfc
commit 371788210d
6 changed files with 254 additions and 31 deletions

View File

@@ -250,6 +250,7 @@ ze_result_t DriverHandleImp::initialize(std::vector<std::unique_ptr<NEO::Device>
if (this->svmAllocsManager == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
this->svmAllocsManager->initUsmAllocationsCaches(*this->devices[0]->getNEODevice());
this->numDevices = static_cast<uint32_t>(this->devices.size());

View File

@@ -277,6 +277,7 @@ bool Context::createImpl(const cl_context_properties *properties,
if (anySvmSupport) {
this->svmAllocsManager = new SVMAllocsManager(this->memoryManager,
this->areMultiStorageAllocationsPreferred());
this->svmAllocsManager->initUsmAllocationsCaches(device->getDevice());
}
}

View File

@@ -517,8 +517,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, UsePipeControlAfterPartitionedWalker, -1, "-1: d
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionCount, 0, "Experimental implementation: Set number of COMPUTE_WALKERs for a given Partition Type, 0 - do not set the feature.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionType, -1, "Experimental implementation: Set COMPUTE_WALKER Partition Type. Valid values for types from 1 to 3")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableCustomLocalMemoryAlignment, 0, "Align local memory allocations to a given value. Works only with allocations at least as big as the value. 0: no effect, 2097152: 2 megabytes, 1073741824: 1 gigabyte")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable device usm allocation cache.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableHostAllocationCache, -1, "Experimentally enable host usm allocation cache.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable device usm allocation cache. Use X% of device memory.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableHostAllocationCache, -1, "Experimentally enable host usm allocation cache. Use X% of shared system memory.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalH2DCpuCopyThreshold, -1, "Override default threshold (in bytes) for H2D CPU copy.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalD2HCpuCopyThreshold, -1, "Override default threshold (in bytes) for D2H CPU copy.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalCopyThroughLock, -1, "Experimentally copy memory through locked ptr. -1: default 0: disable 1: enable ")

View File

@@ -42,9 +42,14 @@ void SVMAllocsManager::MapBasedAllocationTracker::remove(const SvmAllocationData
allocations.erase(iter);
}
void SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr) {
bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr) {
std::lock_guard<std::mutex> lock(this->mtx);
if (size + this->totalSize > this->maxSize) {
return false;
}
allocations.emplace(std::lower_bound(allocations.begin(), allocations.end(), size), size, ptr);
this->totalSize += size;
return true;
}
void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties, SVMAllocsManager *svmAllocsManager) {
@@ -58,6 +63,7 @@ void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemory
if (svmAllocData->device == unifiedMemoryProperties.device &&
svmAllocData->allocationFlagsProperty.allFlags == unifiedMemoryProperties.allocationFlags.allFlags &&
svmAllocData->allocationFlagsProperty.allAllocFlags == unifiedMemoryProperties.allocationFlags.allAllocFlags) {
totalSize -= allocationIter->allocationSize;
allocations.erase(allocationIter);
return allocationPtr;
}
@@ -73,6 +79,7 @@ void SVMAllocsManager::SvmAllocationCache::trim(SVMAllocsManager *svmAllocsManag
svmAllocsManager->freeSVMAllocImpl(cachedAllocationInfo.allocation, FreePolicyType::none, svmData);
}
this->allocations.clear();
this->totalSize = 0u;
}
SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) {
@@ -166,21 +173,6 @@ void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &co
SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager, bool multiOsContextSupport)
: memoryManager(memoryManager), multiOsContextSupport(multiOsContextSupport) {
this->usmDeviceAllocationsCacheEnabled = NEO::ApiSpecificConfig::isDeviceAllocationCacheEnabled();
if (debugManager.flags.ExperimentalEnableDeviceAllocationCache.get() != -1) {
this->usmDeviceAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableDeviceAllocationCache.get();
}
if (this->usmDeviceAllocationsCacheEnabled) {
this->initUsmDeviceAllocationsCache();
}
this->usmHostAllocationsCacheEnabled = NEO::ApiSpecificConfig::isHostAllocationCacheEnabled();
if (debugManager.flags.ExperimentalEnableHostAllocationCache.get() != -1) {
this->usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get();
}
if (this->usmHostAllocationsCacheEnabled) {
this->initUsmHostAllocationsCache();
}
}
SVMAllocsManager::~SVMAllocsManager() = default;
@@ -457,13 +449,15 @@ bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) {
if (svmData) {
if (InternalMemoryType::deviceUnifiedMemory == svmData->memoryType &&
this->usmDeviceAllocationsCacheEnabled) {
this->usmDeviceAllocationsCache.insert(svmData->size, ptr);
return true;
if (this->usmDeviceAllocationsCache.insert(svmData->size, ptr)) {
return true;
}
}
if (InternalMemoryType::hostUnifiedMemory == svmData->memoryType &&
this->usmHostAllocationsCacheEnabled) {
this->usmHostAllocationsCache.insert(svmData->size, ptr);
return true;
if (this->usmHostAllocationsCache.insert(svmData->size, ptr)) {
return true;
}
}
if (blocking) {
this->freeSVMAllocImpl(ptr, FreePolicyType::blocking, svmData);
@@ -485,13 +479,15 @@ bool SVMAllocsManager::freeSVMAllocDefer(void *ptr) {
if (svmData) {
if (InternalMemoryType::deviceUnifiedMemory == svmData->memoryType &&
this->usmDeviceAllocationsCacheEnabled) {
this->usmDeviceAllocationsCache.insert(svmData->size, ptr);
return true;
if (this->usmDeviceAllocationsCache.insert(svmData->size, ptr)) {
return true;
}
}
if (InternalMemoryType::hostUnifiedMemory == svmData->memoryType &&
this->usmHostAllocationsCacheEnabled) {
this->usmHostAllocationsCache.insert(svmData->size, ptr);
return true;
if (this->usmHostAllocationsCache.insert(svmData->size, ptr)) {
return true;
}
}
this->freeSVMAllocImpl(ptr, FreePolicyType::defer, svmData);
return true;
@@ -684,12 +680,42 @@ void SVMAllocsManager::freeZeroCopySvmAllocation(SvmAllocationData *svmData) {
}
}
void SVMAllocsManager::initUsmDeviceAllocationsCache() {
void SVMAllocsManager::initUsmDeviceAllocationsCache(Device &device) {
this->usmDeviceAllocationsCache.allocations.reserve(128u);
const auto totalDeviceMemory = device.getGlobalMemorySize(static_cast<uint32_t>(device.getDeviceBitfield().to_ulong()));
auto fractionOfTotalMemoryForRecycling = 0.02;
if (debugManager.flags.ExperimentalEnableDeviceAllocationCache.get() != -1) {
fractionOfTotalMemoryForRecycling = 0.01 * std::min(100, debugManager.flags.ExperimentalEnableDeviceAllocationCache.get());
}
this->usmDeviceAllocationsCache.maxSize = static_cast<size_t>(fractionOfTotalMemoryForRecycling * totalDeviceMemory);
}
void SVMAllocsManager::initUsmHostAllocationsCache() {
this->usmHostAllocationsCache.allocations.reserve(128u);
const auto totalSystemMemory = this->memoryManager->getSystemSharedMemory(0u);
auto fractionOfTotalMemoryForRecycling = 0.02;
if (debugManager.flags.ExperimentalEnableHostAllocationCache.get() != -1) {
fractionOfTotalMemoryForRecycling = 0.01 * std::min(100, debugManager.flags.ExperimentalEnableHostAllocationCache.get());
}
this->usmHostAllocationsCache.maxSize = static_cast<size_t>(fractionOfTotalMemoryForRecycling * totalSystemMemory);
}
void SVMAllocsManager::initUsmAllocationsCaches(Device &device) {
this->usmDeviceAllocationsCacheEnabled = NEO::ApiSpecificConfig::isDeviceAllocationCacheEnabled();
if (debugManager.flags.ExperimentalEnableDeviceAllocationCache.get() != -1) {
this->usmDeviceAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableDeviceAllocationCache.get();
}
if (this->usmDeviceAllocationsCacheEnabled) {
this->initUsmDeviceAllocationsCache(device);
}
this->usmHostAllocationsCacheEnabled = NEO::ApiSpecificConfig::isHostAllocationCacheEnabled();
if (debugManager.flags.ExperimentalEnableHostAllocationCache.get() != -1) {
this->usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get();
}
if (this->usmHostAllocationsCacheEnabled) {
this->initUsmHostAllocationsCache();
}
}
void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData) {

View File

@@ -153,11 +153,13 @@ class SVMAllocsManager {
};
struct SvmAllocationCache {
void insert(size_t size, void *);
bool insert(size_t size, void *);
void *get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties, SVMAllocsManager *svmAllocsManager);
void trim(SVMAllocsManager *svmAllocsManager);
std::vector<SvmCacheAllocationInfo> allocations;
std::mutex mtx;
size_t maxSize = 0;
size_t totalSize = 0;
};
enum class FreePolicyType : uint32_t {
@@ -234,6 +236,8 @@ class SVMAllocsManager {
using NonGpuDomainAllocsContainer = std::vector<void *>;
NonGpuDomainAllocsContainer nonGpuDomainAllocs;
void initUsmAllocationsCaches(Device &device);
protected:
void *createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties,
const RootDeviceIndicesContainer &rootDeviceIndices,
@@ -242,7 +246,7 @@ class SVMAllocsManager {
void freeZeroCopySvmAllocation(SvmAllocationData *svmData);
void initUsmDeviceAllocationsCache();
void initUsmDeviceAllocationsCache(Device &device);
void initUsmHostAllocationsCache();
void freeSVMData(SvmAllocationData *svmData);

View File

@@ -101,12 +101,12 @@ using SvmDeviceAllocationCacheTest = Test<SvmAllocationCacheTestFixture>;
TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheDefaultWhenCheckingIfEnabledThenItIsDisabled) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex};
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
ASSERT_EQ(debugManager.flags.ExperimentalEnableDeviceAllocationCache.get(), -1);
EXPECT_FALSE(svmManager->usmDeviceAllocationsCacheEnabled);
svmManager->initUsmAllocationsCaches(*device);
EXPECT_FALSE(svmManager->usmDeviceAllocationsCacheEnabled);
}
struct SvmDeviceAllocationCacheSimpleTestDataType {
@@ -122,7 +122,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDevic
debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k;
auto testDataset = std::vector<SvmDeviceAllocationCacheSimpleTestDataType>(
@@ -161,6 +163,91 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDevic
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
}
TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenInitializedThenMaxSizeIsSetCorrectly) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
auto device = deviceFactory->rootDevices[0];
DebugManagerStateRestore restore;
debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(2);
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
auto expectedMaxSize = static_cast<size_t>(device->getGlobalMemorySize(static_cast<uint32_t>(mockDeviceBitfield.to_ulong())) * 0.02);
EXPECT_EQ(expectedMaxSize, svmManager->usmDeviceAllocationsCache.maxSize);
}
TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceAllocationThenItIsPutIntoCacheOnlyIfMaxSizeWillNotBeExceeded) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex};
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
DebugManagerStateRestore restore;
debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
constexpr auto allocationSize = MemoryConstants::pageSize64k;
svmManager->usmDeviceAllocationsCache.maxSize = allocationSize;
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
unifiedMemoryProperties.device = device;
{
auto allocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
ASSERT_NE(allocation, nullptr);
auto allocation2 = svmManager->createUnifiedMemoryAllocation(1u, unifiedMemoryProperties);
ASSERT_NE(allocation2, nullptr);
EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.allocations.size());
EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize);
svmManager->freeSVMAlloc(allocation);
EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size());
EXPECT_EQ(allocationSize, svmManager->usmDeviceAllocationsCache.totalSize);
svmManager->freeSVMAlloc(allocation2);
EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size());
EXPECT_EQ(allocationSize, svmManager->usmDeviceAllocationsCache.totalSize);
auto recycledAllocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
EXPECT_EQ(recycledAllocation, allocation);
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize);
svmManager->freeSVMAlloc(recycledAllocation);
svmManager->trimUSMDeviceAllocCache();
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize);
}
{
auto allocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
ASSERT_NE(allocation, nullptr);
auto allocation2 = svmManager->createUnifiedMemoryAllocation(1u, unifiedMemoryProperties);
ASSERT_NE(allocation2, nullptr);
EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.allocations.size());
EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize);
svmManager->freeSVMAllocDefer(allocation);
EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size());
EXPECT_EQ(allocationSize, svmManager->usmDeviceAllocationsCache.totalSize);
svmManager->freeSVMAllocDefer(allocation2);
EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size());
EXPECT_EQ(allocationSize, svmManager->usmDeviceAllocationsCache.totalSize);
auto recycledAllocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
EXPECT_EQ(recycledAllocation, allocation);
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize);
svmManager->freeSVMAllocDefer(recycledAllocation);
svmManager->trimUSMDeviceAllocCache();
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize);
}
}
TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocatingAfterFreeThenReturnCorrectCachedAllocation) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex};
@@ -169,7 +256,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAlloc
debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k;
auto testDataset = std::vector<SvmDeviceAllocationCacheSimpleTestDataType>(
@@ -220,7 +309,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfter
debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
size_t allocationSizeBasis = MemoryConstants::pageSize64k;
auto testDataset = std::vector<SvmDeviceAllocationCacheSimpleTestDataType>(
@@ -297,7 +388,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAlloc
auto secondRootDevice = deviceFactory->rootDevices[1];
auto subDevice1 = deviceFactory->subDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(rootDevice->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*rootDevice);
ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
constexpr auto allocationSizeBasis = MemoryConstants::kiloByte;
size_t defaultAllocSize = allocationSizeBasis << 0;
@@ -369,7 +462,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCac
device->injectMemoryManager(new MockMemoryManagerWithCapacity(*device->getExecutionEnvironment()));
MockMemoryManagerWithCapacity *memoryManager = static_cast<MockMemoryManagerWithCapacity *>(device->getMemoryManager());
auto svmManager = std::make_unique<MockSVMAllocsManager>(memoryManager, false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
memoryManager->capacity = MemoryConstants::pageSize64k * 3;
@@ -407,6 +502,8 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheDefaultWhenCheckingIfEnab
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
ASSERT_EQ(debugManager.flags.ExperimentalEnableHostAllocationCache.get(), -1);
EXPECT_FALSE(svmManager->usmHostAllocationsCacheEnabled);
svmManager->initUsmAllocationsCaches(*device);
EXPECT_FALSE(svmManager->usmHostAllocationsCacheEnabled);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
auto allocation = svmManager->createHostUnifiedMemoryAllocation(1u, unifiedMemoryProperties);
@@ -437,7 +534,9 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceA
debugManager.flags.ExperimentalEnableHostAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled);
svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k;
auto testDataset = std::vector<SvmHostAllocationCacheSimpleTestDataType>(
@@ -475,6 +574,90 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceA
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
}
TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenInitializedThenMaxSizeIsSetCorrectly) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
DebugManagerStateRestore restore;
debugManager.flags.ExperimentalEnableHostAllocationCache.set(2);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled);
auto expectedMaxSize = static_cast<size_t>(svmManager->memoryManager->getSystemSharedMemory(mockRootDeviceIndex) * 0.02);
EXPECT_EQ(expectedMaxSize, svmManager->usmHostAllocationsCache.maxSize);
}
TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceAllocationThenItIsPutIntoCacheOnlyIfMaxSizeWillNotBeExceeded) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex};
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
DebugManagerStateRestore restore;
debugManager.flags.ExperimentalEnableHostAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled);
constexpr auto allocationSize = MemoryConstants::pageSize64k;
svmManager->usmHostAllocationsCache.maxSize = allocationSize;
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
{
auto allocation = svmManager->createHostUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
ASSERT_NE(allocation, nullptr);
auto allocation2 = svmManager->createHostUnifiedMemoryAllocation(1u, unifiedMemoryProperties);
ASSERT_NE(allocation2, nullptr);
EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.allocations.size());
EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize);
svmManager->freeSVMAlloc(allocation);
EXPECT_EQ(1u, svmManager->usmHostAllocationsCache.allocations.size());
EXPECT_EQ(allocationSize, svmManager->usmHostAllocationsCache.totalSize);
svmManager->freeSVMAlloc(allocation2);
EXPECT_EQ(1u, svmManager->usmHostAllocationsCache.allocations.size());
EXPECT_EQ(allocationSize, svmManager->usmHostAllocationsCache.totalSize);
auto recycledAllocation = svmManager->createHostUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
EXPECT_EQ(recycledAllocation, allocation);
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize);
svmManager->freeSVMAlloc(recycledAllocation);
svmManager->trimUSMHostAllocCache();
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize);
}
{
auto allocation = svmManager->createHostUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
ASSERT_NE(allocation, nullptr);
auto allocation2 = svmManager->createHostUnifiedMemoryAllocation(1u, unifiedMemoryProperties);
ASSERT_NE(allocation2, nullptr);
EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.allocations.size());
EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize);
svmManager->freeSVMAllocDefer(allocation);
EXPECT_EQ(1u, svmManager->usmHostAllocationsCache.allocations.size());
EXPECT_EQ(allocationSize, svmManager->usmHostAllocationsCache.totalSize);
svmManager->freeSVMAllocDefer(allocation2);
EXPECT_EQ(1u, svmManager->usmHostAllocationsCache.allocations.size());
EXPECT_EQ(allocationSize, svmManager->usmHostAllocationsCache.totalSize);
auto recycledAllocation = svmManager->createHostUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties);
EXPECT_EQ(recycledAllocation, allocation);
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize);
svmManager->freeSVMAllocDefer(recycledAllocation);
svmManager->trimUSMHostAllocCache();
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize);
}
}
TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocatingAfterFreeThenReturnCorrectCachedAllocation) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex};
@@ -483,7 +666,9 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocat
debugManager.flags.ExperimentalEnableHostAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled);
svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k;
auto testDataset = std::vector<SvmHostAllocationCacheSimpleTestDataType>(
@@ -533,7 +718,9 @@ TEST_F(SvmHostAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFr
debugManager.flags.ExperimentalEnableHostAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled);
svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
size_t allocationSizeBasis = MemoryConstants::pageSize64k;
auto testDataset = std::vector<SvmHostAllocationCacheSimpleTestDataType>(
@@ -607,7 +794,9 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAllocat
debugManager.flags.ExperimentalEnableHostAllocationCache.set(1);
auto rootDevice = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(rootDevice->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*rootDevice);
ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled);
svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
constexpr auto allocationSizeBasis = MemoryConstants::kiloByte;
size_t defaultAllocSize = allocationSizeBasis << 0;
@@ -673,7 +862,9 @@ TEST_F(SvmHostAllocationCacheTest, givenHostOutOfMemoryWhenAllocatingThenCacheIs
device->injectMemoryManager(new MockMemoryManagerWithCapacity(*device->getExecutionEnvironment()));
MockMemoryManagerWithCapacity *memoryManager = static_cast<MockMemoryManagerWithCapacity *>(device->getMemoryManager());
auto svmManager = std::make_unique<MockSVMAllocsManager>(memoryManager, false);
svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled);
svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
memoryManager->capacity = MemoryConstants::pageSize64k * 3;