fix: device usm alloc reuse

Do not put into usm reuse if is internal.
Set new isInternalAllocation flag for globals allocations.

Use actual size on device for tracking memory usage.

Related-To: NEO-6893

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2024-05-29 10:52:58 +00:00
committed by Compute-Runtime-Automation
parent f2d56744e3
commit 79b9e73311
6 changed files with 16 additions and 11 deletions

View File

@@ -298,7 +298,7 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size,
if (memoryProperties.memoryType == InternalMemoryType::deviceUnifiedMemory) {
unifiedMemoryProperties.flags.isUSMDeviceAllocation = true;
if (this->usmDeviceAllocationsCacheEnabled &&
false == memoryProperties.needZeroedOutAllocation) {
false == memoryProperties.isInternalAllocation) {
void *allocationFromCache = this->usmDeviceAllocationsCache.get(size, memoryProperties, this);
if (allocationFromCache) {
return allocationFromCache;
@@ -332,6 +332,7 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size,
allocData.allocationFlagsProperty = memoryProperties.allocationFlags;
allocData.device = memoryProperties.device;
allocData.setAllocId(++this->allocationsCounter);
allocData.isInternalAllocation = memoryProperties.isInternalAllocation;
auto retPtr = reinterpret_cast<void *>(unifiedMemoryAllocation->getGpuAddress());
insertSVMAlloc(retPtr, allocData);
@@ -445,8 +446,9 @@ bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) {
SvmAllocationData *svmData = getSVMAlloc(ptr);
if (svmData) {
if (InternalMemoryType::deviceUnifiedMemory == svmData->memoryType &&
false == svmData->isInternalAllocation &&
this->usmDeviceAllocationsCacheEnabled) {
if (this->usmDeviceAllocationsCache.insert(svmData->size, ptr)) {
if (this->usmDeviceAllocationsCache.insert(svmData->gpuAllocations.getDefaultGraphicsAllocation()->getUnderlyingBufferSize(), ptr)) {
return true;
}
}

View File

@@ -41,6 +41,7 @@ struct SvmAllocationData {
this->allocId = svmAllocData.allocId;
this->pageSizeForAlignment = svmAllocData.pageSizeForAlignment;
this->isImportedAllocation = svmAllocData.isImportedAllocation;
this->isInternalAllocation = svmAllocData.isInternalAllocation;
for (auto allocation : svmAllocData.gpuAllocations.getGraphicsAllocations()) {
if (allocation) {
this->gpuAllocations.addAllocation(allocation);
@@ -63,6 +64,7 @@ struct SvmAllocationData {
allocId = id;
}
bool mappedAllocData = false;
bool isInternalAllocation = false;
uint32_t getAllocId() const {
return allocId;
@@ -138,7 +140,7 @@ class SVMAllocsManager {
const RootDeviceIndicesContainer &rootDeviceIndices;
const std::map<uint32_t, DeviceBitfield> &subdeviceBitfields;
AllocationType requestedAllocationType = AllocationType::unknown;
bool needZeroedOutAllocation = false;
bool isInternalAllocation = false;
};
struct SvmCacheAllocationInfo {

View File

@@ -38,7 +38,7 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, subDeviceBitfields);
unifiedMemoryProperties.device = &device;
unifiedMemoryProperties.requestedAllocationType = allocationType;
unifiedMemoryProperties.needZeroedOutAllocation = true;
unifiedMemoryProperties.isInternalAllocation = true;
auto ptr = svmAllocManager->createUnifiedMemoryAllocation(totalSize, unifiedMemoryProperties);
DEBUG_BREAK_IF(ptr == nullptr);
if (ptr == nullptr) {

View File

@@ -34,7 +34,7 @@ struct MockSVMAllocsManager : public SVMAllocsManager {
bool prefetchMemoryCalled = false;
void *createUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties) override {
requestedZeroedOutAllocation = memoryProperties.needZeroedOutAllocation;
requestedZeroedOutAllocation = memoryProperties.isInternalAllocation;
return SVMAllocsManager::createUnifiedMemoryAllocation(size, memoryProperties);
}
bool requestedZeroedOutAllocation = false;

View File

@@ -291,11 +291,8 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAlloc
auto testDataset = std::vector<SvmDeviceAllocationCacheSimpleTestDataType>(
{
{(allocationSizeBasis << 0), nullptr},
{(allocationSizeBasis << 0) + 1, nullptr},
{(allocationSizeBasis << 1), nullptr},
{(allocationSizeBasis << 1) + 1, nullptr},
{(allocationSizeBasis << 2), nullptr},
{(allocationSizeBasis << 2) + 1, nullptr},
});
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
@@ -519,7 +516,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCac
ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
}
TEST_F(SvmDeviceAllocationCacheTest, givenAllocationWithNeedZeroedOutAllocationWhenAllocatingAfterFreeThenDoNotReuseAllocation) {
TEST_F(SvmDeviceAllocationCacheTest, givenAllocationWithIsInternalAllocationSetWhenAllocatingAfterFreeThenDoNotReuseAllocation) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex};
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
@@ -538,13 +535,15 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationWithNeedZeroedOutAllocationW
svmManager->freeSVMAlloc(allocation);
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 1u);
unifiedMemoryProperties.needZeroedOutAllocation = true;
unifiedMemoryProperties.isInternalAllocation = true;
auto testedAllocation = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties);
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 1u);
auto svmData = svmManager->getSVMAlloc(testedAllocation);
EXPECT_NE(nullptr, svmData);
EXPECT_TRUE(svmData->isInternalAllocation);
svmManager->freeSVMAlloc(testedAllocation);
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 1u);
svmManager->trimUSMDeviceAllocCache();
}

View File

@@ -90,7 +90,9 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM
EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size()));
ASSERT_NE(nullptr, svmAllocsManager.getSVMAlloc(reinterpret_cast<void *>(static_cast<uintptr_t>(alloc->getGpuAddress()))));
EXPECT_TRUE(alloc->isMemObjectsAllocationWithWritableFlags());
EXPECT_EQ(InternalMemoryType::deviceUnifiedMemory, svmAllocsManager.getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()))->memoryType);
auto svmData = svmAllocsManager.getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
EXPECT_EQ(InternalMemoryType::deviceUnifiedMemory, svmData->memoryType);
EXPECT_TRUE(svmData->isInternalAllocation);
EXPECT_EQ(AllocationType::constantSurface, alloc->getAllocationType());
EXPECT_FALSE(alloc->getDefaultGmm()->resourceParams.Flags.Info.NotLockable);
EXPECT_TRUE(svmAllocsManager.requestedZeroedOutAllocation);