performance: Iterate over indirect allocations once

Related-To: NEO-11228

Iterate only on new allocations when making indirect
allocations resident.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2024-05-06 11:24:29 +00:00
committed by Compute-Runtime-Automation
parent 92344bfce5
commit 6df46aa062
4 changed files with 161 additions and 137 deletions

View File

@@ -441,3 +441,116 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentWhenLocalMemoryIsEnabledThenSh
svmManager->freeSVMAlloc(ptr);
} while (alignment != 0);
}
TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentThenNewTrackingEntryIsCreated) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
auto device = deviceFactory->rootDevices[0];
auto memoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
auto svmManager = std::make_unique<MockSVMAllocsManager>(memoryManager, false);
auto csr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
csr->setupContext(*device->getDefaultEngine().osContext);
void *cmdQ = reinterpret_cast<void *>(0x12345);
auto mockPageFaultManager = new MockPageFaultManager();
memoryManager->pageFaultManager.reset(mockPageFaultManager);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::sharedUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ);
ASSERT_NE(nullptr, ptr);
auto graphicsAllocation = svmManager->getSVMAlloc(ptr);
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size());
svmManager->makeIndirectAllocationsResident(*csr, 1u);
EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(csr->getOsContext().getContextId()));
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->peekEvictable());
EXPECT_EQ(1u, svmManager->indirectAllocationsResidency.size());
auto internalEntry = svmManager->indirectAllocationsResidency.find(csr.get())->second;
EXPECT_EQ(1u, internalEntry.latestSentTaskCount);
EXPECT_EQ(1u, internalEntry.latestResidentObjectId);
svmManager->freeSVMAlloc(ptr);
}
TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentThenSubsequentCallsDoNotCallResidency) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
auto device = deviceFactory->rootDevices[0];
auto memoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
auto svmManager = std::make_unique<MockSVMAllocsManager>(memoryManager, false);
auto csr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
csr->setupContext(*device->getDefaultEngine().osContext);
void *cmdQ = reinterpret_cast<void *>(0x12345);
auto mockPageFaultManager = new MockPageFaultManager();
memoryManager->pageFaultManager.reset(mockPageFaultManager);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::sharedUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ);
ASSERT_NE(nullptr, ptr);
auto graphicsAllocation = svmManager->getSVMAlloc(ptr);
svmManager->makeIndirectAllocationsResident(*csr, 1u);
EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
// now call with task count 2 , allocations shouldn't change
svmManager->makeIndirectAllocationsResident(*csr, 2u);
auto internalEntry = svmManager->indirectAllocationsResidency.find(csr.get())->second;
EXPECT_EQ(2u, internalEntry.latestSentTaskCount);
EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
// force Graphics Allocation to be non resident
graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, csr->getOsContext().getContextId());
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
// now call with task count 3 , allocations shouldn't change
svmManager->makeIndirectAllocationsResident(*csr, 2u);
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
svmManager->freeSVMAlloc(ptr);
}
TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenNewAllocationIsCreatedThenItIsMadeResident) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
auto device = deviceFactory->rootDevices[0];
auto memoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
auto svmManager = std::make_unique<MockSVMAllocsManager>(memoryManager, false);
auto csr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
csr->setupContext(*device->getDefaultEngine().osContext);
void *cmdQ = reinterpret_cast<void *>(0x12345);
auto mockPageFaultManager = new MockPageFaultManager();
memoryManager->pageFaultManager.reset(mockPageFaultManager);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::sharedUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ);
ASSERT_NE(nullptr, ptr);
auto graphicsAllocation = svmManager->getSVMAlloc(ptr);
svmManager->makeIndirectAllocationsResident(*csr, 1u);
EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
// force to non resident
graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, csr->getOsContext().getContextId());
auto ptr2 = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ);
auto graphicsAllocation2 = svmManager->getSVMAlloc(ptr2);
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
EXPECT_FALSE(graphicsAllocation2->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
// now call with task count 2, first allocation shouldn't be modified
svmManager->makeIndirectAllocationsResident(*csr, 2u);
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
EXPECT_TRUE(graphicsAllocation2->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
svmManager->freeSVMAlloc(ptr);
svmManager->freeSVMAlloc(ptr2);
}