performance: iterate over indirect allocations once

Related-To: NEO-11921

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek 2024-07-09 08:05:59 +00:00 committed by Compute-Runtime-Automation
parent 43c840cb29
commit 457cb005de
7 changed files with 20 additions and 9 deletions

View File

@ -1194,7 +1194,7 @@ ze_result_t ContextImp::mapVirtualMem(const void *ptr,
allocData.device = allocationNode->device;
allocData.size = size;
allocData.pageSizeForAlignment = MemoryConstants::pageSize64k;
allocData.setAllocId(this->driverHandle->svmAllocsManager->allocationsCounter++);
allocData.setAllocId(++this->driverHandle->svmAllocsManager->allocationsCounter);
allocData.memoryType = InternalMemoryType::reservedDeviceMemory;
allocData.virtualReservationData = virtualMemoryReservation;
NEO::MemoryMappedRange *mappedRange = new NEO::MemoryMappedRange;

View File

@ -660,7 +660,7 @@ void *DriverHandleImp::importFdHandle(NEO::Device *neoDevice,
isHostIpcAllocation ? InternalMemoryType::hostUnifiedMemory : InternalMemoryType::deviceUnifiedMemory;
allocDataTmp->device = neoDevice;
allocDataTmp->isImportedAllocation = true;
allocDataTmp->setAllocId(this->getSvmAllocsManager()->allocationsCounter++);
allocDataTmp->setAllocId(++this->getSvmAllocsManager()->allocationsCounter);
if (flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
}
@ -714,7 +714,7 @@ void *DriverHandleImp::importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_fla
allocDataTmp->memoryType = InternalMemoryType::deviceUnifiedMemory;
allocDataTmp->device = neoDevice;
allocDataTmp->isImportedAllocation = true;
allocDataTmp->setAllocId(this->getSvmAllocsManager()->allocationsCounter++);
allocDataTmp->setAllocId(++this->getSvmAllocsManager()->allocationsCounter);
if (flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
@ -891,7 +891,7 @@ void *DriverHandleImp::importNTHandle(ze_device_handle_t hDevice, void *handle,
isHostIpcAllocation ? InternalMemoryType::hostUnifiedMemory : InternalMemoryType::deviceUnifiedMemory;
allocData.device = neoDevice;
allocData.isImportedAllocation = true;
allocData.setAllocId(this->getSvmAllocsManager()->allocationsCounter++);
allocData.setAllocId(++this->getSvmAllocsManager()->allocationsCounter);
this->getSvmAllocsManager()->insertSVMAlloc(allocData);

View File

@ -172,6 +172,7 @@ TEST_F(MultiDeviceContextTests,
uintptr_t peerGpuAddress = 0u;
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
EXPECT_NE(allocData, nullptr);
EXPECT_EQ(driverHandle->getSvmAllocsManager()->allocationsCounter.load(), allocData->getAllocId());
auto peerAlloc = driverHandle->getPeerAllocation(driverHandle->devices[1], allocData, ptr, &peerGpuAddress, nullptr);
EXPECT_NE(peerAlloc, nullptr);

View File

@ -215,7 +215,7 @@ TEST_F(MemoryExportImportImplicitScalingTest,
auto newAllocationCount = usmManager->allocationsCounter.load();
EXPECT_GT(newAllocationCount, currentAllocationCount);
EXPECT_EQ(usmManager->getSVMAlloc(ipcPtr)->getAllocId(), currentAllocationCount);
EXPECT_EQ(usmManager->getSVMAlloc(ipcPtr)->getAllocId(), newAllocationCount);
result = context->closeIpcMemHandle(ipcPtr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@ -258,7 +258,7 @@ TEST_F(MemoryExportImportImplicitScalingTest,
auto newAllocationCount = usmManager->allocationsCounter.load();
EXPECT_GT(newAllocationCount, currentAllocationCount);
EXPECT_EQ(usmManager->getSVMAlloc(ipcPtr)->getAllocId(), currentAllocationCount);
EXPECT_EQ(usmManager->getSVMAlloc(ipcPtr)->getAllocId(), newAllocationCount);
result = context->closeIpcMemHandle(ipcPtr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);

View File

@ -1963,6 +1963,7 @@ HWTEST_P(UpdateResidencyContainerMultipleDevicesTest,
allocData.gpuAllocations.addAllocation(&gfxAllocation);
allocData.memoryType = InternalMemoryType::deviceUnifiedMemory;
allocData.device = &device->getDevice();
allocData.setAllocId(1u);
uint32_t pCmdBufferPeer[1024];
MockGraphicsAllocation gfxAllocationPeer(peerDevice->getDevice().getRootDeviceIndex(),
@ -1971,7 +1972,7 @@ HWTEST_P(UpdateResidencyContainerMultipleDevicesTest,
allocDataPeer.gpuAllocations.addAllocation(&gfxAllocationPeer);
allocDataPeer.memoryType = InternalMemoryType::deviceUnifiedMemory;
allocDataPeer.device = &peerDevice->getDevice();
allocDataPeer.setAllocId(2u);
svmManager->insertSVMAlloc(allocData);
svmManager->insertSVMAlloc(allocDataPeer);
EXPECT_EQ(2u, svmManager->getNumAllocs());
@ -2007,6 +2008,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
allocData.gpuAllocations.addAllocation(&gfxAllocation);
allocData.memoryType = InternalMemoryType::deviceUnifiedMemory;
allocData.device = &device->getDevice();
allocData.setAllocId(1u);
uint32_t pCmdBufferPeer[1024];
MockGraphicsAllocation gfxAllocationPeer(peerDevice->getDevice().getRootDeviceIndex(),
@ -2015,6 +2017,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
allocDataPeer.gpuAllocations.addAllocation(&gfxAllocationPeer);
allocDataPeer.memoryType = InternalMemoryType::deviceUnifiedMemory;
allocDataPeer.device = &peerDevice->getDevice();
allocDataPeer.setAllocId(2u);
svmManager->insertSVMAlloc(allocData);
svmManager->insertSVMAlloc(allocDataPeer);
@ -2083,6 +2086,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
allocData0.gpuAllocations.addAllocation(&gfxAllocation);
allocData0.memoryType = InternalMemoryType::deviceUnifiedMemory;
allocData0.device = &subDevice0->getDevice();
allocData0.setAllocId(1u);
uint32_t pCmdBufferPeer[1024];
MockGraphicsAllocation gfxAllocationPeer(device->getDevice().getRootDeviceIndex(),
@ -2091,6 +2095,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
allocData1.gpuAllocations.addAllocation(&gfxAllocationPeer);
allocData1.memoryType = InternalMemoryType::deviceUnifiedMemory;
allocData1.device = &subDevice1->getDevice();
allocData1.setAllocId(2u);
svmManager->insertSVMAlloc(allocData0);
svmManager->insertSVMAlloc(allocData1);
@ -2114,6 +2119,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
allocData0.gpuAllocations.addAllocation(&gfxAllocation);
allocData0.memoryType = InternalMemoryType::deviceUnifiedMemory;
allocData0.device = &subDevice0->getDevice();
allocData0.setAllocId(1u);
uint32_t pCmdBufferPeer[1024];
MockGraphicsAllocation gfxAllocationPeer(device->getDevice().getRootDeviceIndex(),
@ -2122,6 +2128,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
allocData1.gpuAllocations.addAllocation(&gfxAllocationPeer);
allocData1.memoryType = InternalMemoryType::deviceUnifiedMemory;
allocData1.device = &subDevice1->getDevice();
allocData1.setAllocId(2u);
svmManager->insertSVMAlloc(allocData0);
svmManager->insertSVMAlloc(allocData1);

View File

@ -744,6 +744,7 @@ void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &co
std::unique_lock<std::shared_mutex> lock(mtx);
bool parseAllAllocations = false;
auto entry = indirectAllocationsResidency.find(&commandStreamReceiver);
TaskCountType previousCounter = 0;
if (entry == indirectAllocationsResidency.end()) {
parseAllAllocations = true;
@ -755,13 +756,14 @@ void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &co
} else {
if (this->allocationsCounter > entry->second.latestResidentObjectId) {
parseAllAllocations = true;
previousCounter = entry->second.latestResidentObjectId;
entry->second.latestResidentObjectId = this->allocationsCounter;
}
entry->second.latestSentTaskCount = taskCount;
}
if (parseAllAllocations) {
auto currentCounter = this->allocationsCounter.load();
for (auto allocationId = 1u; allocationId <= currentCounter; allocationId++) {
for (auto allocationId = static_cast<uint32_t>(previousCounter + 1); allocationId <= currentCounter; allocationId++) {
makeResidentForAllocationsWithId(allocationId, commandStreamReceiver);
}
}
@ -887,6 +889,7 @@ std::unique_lock<std::mutex> SVMAllocsManager::obtainOwnership() {
void SVMAllocsManager::insertSVMAlloc(void *svmPtr, const SvmAllocationData &allocData) {
std::unique_lock<std::shared_mutex> lock(mtx);
this->svmAllocs.insert(svmPtr, allocData);
UNRECOVERABLE_IF(internalAllocationsMap.count(allocData.getAllocId()) > 0);
for (auto alloc : allocData.gpuAllocations.getGraphicsAllocations()) {
if (alloc != nullptr) {
internalAllocationsMap.insert({allocData.getAllocId(), alloc});

View File

@ -581,7 +581,7 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenNewAllocationIsCr
// now call with task count 2, first allocation shouldn't be modified
svmManager->makeIndirectAllocationsResident(*csr, 2u);
EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
EXPECT_TRUE(graphicsAllocation2->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
svmManager->freeSVMAlloc(ptr);