performance: iterate over indirect allocations once
Related-To: NEO-11921 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
parent
43c840cb29
commit
457cb005de
|
@ -1194,7 +1194,7 @@ ze_result_t ContextImp::mapVirtualMem(const void *ptr,
|
|||
allocData.device = allocationNode->device;
|
||||
allocData.size = size;
|
||||
allocData.pageSizeForAlignment = MemoryConstants::pageSize64k;
|
||||
allocData.setAllocId(this->driverHandle->svmAllocsManager->allocationsCounter++);
|
||||
allocData.setAllocId(++this->driverHandle->svmAllocsManager->allocationsCounter);
|
||||
allocData.memoryType = InternalMemoryType::reservedDeviceMemory;
|
||||
allocData.virtualReservationData = virtualMemoryReservation;
|
||||
NEO::MemoryMappedRange *mappedRange = new NEO::MemoryMappedRange;
|
||||
|
|
|
@ -660,7 +660,7 @@ void *DriverHandleImp::importFdHandle(NEO::Device *neoDevice,
|
|||
isHostIpcAllocation ? InternalMemoryType::hostUnifiedMemory : InternalMemoryType::deviceUnifiedMemory;
|
||||
allocDataTmp->device = neoDevice;
|
||||
allocDataTmp->isImportedAllocation = true;
|
||||
allocDataTmp->setAllocId(this->getSvmAllocsManager()->allocationsCounter++);
|
||||
allocDataTmp->setAllocId(++this->getSvmAllocsManager()->allocationsCounter);
|
||||
if (flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
|
||||
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
|
||||
}
|
||||
|
@ -714,7 +714,7 @@ void *DriverHandleImp::importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_fla
|
|||
allocDataTmp->memoryType = InternalMemoryType::deviceUnifiedMemory;
|
||||
allocDataTmp->device = neoDevice;
|
||||
allocDataTmp->isImportedAllocation = true;
|
||||
allocDataTmp->setAllocId(this->getSvmAllocsManager()->allocationsCounter++);
|
||||
allocDataTmp->setAllocId(++this->getSvmAllocsManager()->allocationsCounter);
|
||||
|
||||
if (flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
|
||||
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
|
||||
|
@ -891,7 +891,7 @@ void *DriverHandleImp::importNTHandle(ze_device_handle_t hDevice, void *handle,
|
|||
isHostIpcAllocation ? InternalMemoryType::hostUnifiedMemory : InternalMemoryType::deviceUnifiedMemory;
|
||||
allocData.device = neoDevice;
|
||||
allocData.isImportedAllocation = true;
|
||||
allocData.setAllocId(this->getSvmAllocsManager()->allocationsCounter++);
|
||||
allocData.setAllocId(++this->getSvmAllocsManager()->allocationsCounter);
|
||||
|
||||
this->getSvmAllocsManager()->insertSVMAlloc(allocData);
|
||||
|
||||
|
|
|
@ -172,6 +172,7 @@ TEST_F(MultiDeviceContextTests,
|
|||
uintptr_t peerGpuAddress = 0u;
|
||||
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
|
||||
EXPECT_NE(allocData, nullptr);
|
||||
EXPECT_EQ(driverHandle->getSvmAllocsManager()->allocationsCounter.load(), allocData->getAllocId());
|
||||
auto peerAlloc = driverHandle->getPeerAllocation(driverHandle->devices[1], allocData, ptr, &peerGpuAddress, nullptr);
|
||||
EXPECT_NE(peerAlloc, nullptr);
|
||||
|
||||
|
|
|
@ -215,7 +215,7 @@ TEST_F(MemoryExportImportImplicitScalingTest,
|
|||
|
||||
auto newAllocationCount = usmManager->allocationsCounter.load();
|
||||
EXPECT_GT(newAllocationCount, currentAllocationCount);
|
||||
EXPECT_EQ(usmManager->getSVMAlloc(ipcPtr)->getAllocId(), currentAllocationCount);
|
||||
EXPECT_EQ(usmManager->getSVMAlloc(ipcPtr)->getAllocId(), newAllocationCount);
|
||||
|
||||
result = context->closeIpcMemHandle(ipcPtr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
@ -258,7 +258,7 @@ TEST_F(MemoryExportImportImplicitScalingTest,
|
|||
|
||||
auto newAllocationCount = usmManager->allocationsCounter.load();
|
||||
EXPECT_GT(newAllocationCount, currentAllocationCount);
|
||||
EXPECT_EQ(usmManager->getSVMAlloc(ipcPtr)->getAllocId(), currentAllocationCount);
|
||||
EXPECT_EQ(usmManager->getSVMAlloc(ipcPtr)->getAllocId(), newAllocationCount);
|
||||
|
||||
result = context->closeIpcMemHandle(ipcPtr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
|
|
@ -1963,6 +1963,7 @@ HWTEST_P(UpdateResidencyContainerMultipleDevicesTest,
|
|||
allocData.gpuAllocations.addAllocation(&gfxAllocation);
|
||||
allocData.memoryType = InternalMemoryType::deviceUnifiedMemory;
|
||||
allocData.device = &device->getDevice();
|
||||
allocData.setAllocId(1u);
|
||||
|
||||
uint32_t pCmdBufferPeer[1024];
|
||||
MockGraphicsAllocation gfxAllocationPeer(peerDevice->getDevice().getRootDeviceIndex(),
|
||||
|
@ -1971,7 +1972,7 @@ HWTEST_P(UpdateResidencyContainerMultipleDevicesTest,
|
|||
allocDataPeer.gpuAllocations.addAllocation(&gfxAllocationPeer);
|
||||
allocDataPeer.memoryType = InternalMemoryType::deviceUnifiedMemory;
|
||||
allocDataPeer.device = &peerDevice->getDevice();
|
||||
|
||||
allocDataPeer.setAllocId(2u);
|
||||
svmManager->insertSVMAlloc(allocData);
|
||||
svmManager->insertSVMAlloc(allocDataPeer);
|
||||
EXPECT_EQ(2u, svmManager->getNumAllocs());
|
||||
|
@ -2007,6 +2008,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
|
|||
allocData.gpuAllocations.addAllocation(&gfxAllocation);
|
||||
allocData.memoryType = InternalMemoryType::deviceUnifiedMemory;
|
||||
allocData.device = &device->getDevice();
|
||||
allocData.setAllocId(1u);
|
||||
|
||||
uint32_t pCmdBufferPeer[1024];
|
||||
MockGraphicsAllocation gfxAllocationPeer(peerDevice->getDevice().getRootDeviceIndex(),
|
||||
|
@ -2015,6 +2017,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
|
|||
allocDataPeer.gpuAllocations.addAllocation(&gfxAllocationPeer);
|
||||
allocDataPeer.memoryType = InternalMemoryType::deviceUnifiedMemory;
|
||||
allocDataPeer.device = &peerDevice->getDevice();
|
||||
allocDataPeer.setAllocId(2u);
|
||||
|
||||
svmManager->insertSVMAlloc(allocData);
|
||||
svmManager->insertSVMAlloc(allocDataPeer);
|
||||
|
@ -2083,6 +2086,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
|
|||
allocData0.gpuAllocations.addAllocation(&gfxAllocation);
|
||||
allocData0.memoryType = InternalMemoryType::deviceUnifiedMemory;
|
||||
allocData0.device = &subDevice0->getDevice();
|
||||
allocData0.setAllocId(1u);
|
||||
|
||||
uint32_t pCmdBufferPeer[1024];
|
||||
MockGraphicsAllocation gfxAllocationPeer(device->getDevice().getRootDeviceIndex(),
|
||||
|
@ -2091,6 +2095,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
|
|||
allocData1.gpuAllocations.addAllocation(&gfxAllocationPeer);
|
||||
allocData1.memoryType = InternalMemoryType::deviceUnifiedMemory;
|
||||
allocData1.device = &subDevice1->getDevice();
|
||||
allocData1.setAllocId(2u);
|
||||
|
||||
svmManager->insertSVMAlloc(allocData0);
|
||||
svmManager->insertSVMAlloc(allocData1);
|
||||
|
@ -2114,6 +2119,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
|
|||
allocData0.gpuAllocations.addAllocation(&gfxAllocation);
|
||||
allocData0.memoryType = InternalMemoryType::deviceUnifiedMemory;
|
||||
allocData0.device = &subDevice0->getDevice();
|
||||
allocData0.setAllocId(1u);
|
||||
|
||||
uint32_t pCmdBufferPeer[1024];
|
||||
MockGraphicsAllocation gfxAllocationPeer(device->getDevice().getRootDeviceIndex(),
|
||||
|
@ -2122,6 +2128,7 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
|
|||
allocData1.gpuAllocations.addAllocation(&gfxAllocationPeer);
|
||||
allocData1.memoryType = InternalMemoryType::deviceUnifiedMemory;
|
||||
allocData1.device = &subDevice1->getDevice();
|
||||
allocData1.setAllocId(2u);
|
||||
|
||||
svmManager->insertSVMAlloc(allocData0);
|
||||
svmManager->insertSVMAlloc(allocData1);
|
||||
|
|
|
@ -744,6 +744,7 @@ void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &co
|
|||
std::unique_lock<std::shared_mutex> lock(mtx);
|
||||
bool parseAllAllocations = false;
|
||||
auto entry = indirectAllocationsResidency.find(&commandStreamReceiver);
|
||||
TaskCountType previousCounter = 0;
|
||||
if (entry == indirectAllocationsResidency.end()) {
|
||||
parseAllAllocations = true;
|
||||
|
||||
|
@ -755,13 +756,14 @@ void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &co
|
|||
} else {
|
||||
if (this->allocationsCounter > entry->second.latestResidentObjectId) {
|
||||
parseAllAllocations = true;
|
||||
previousCounter = entry->second.latestResidentObjectId;
|
||||
entry->second.latestResidentObjectId = this->allocationsCounter;
|
||||
}
|
||||
entry->second.latestSentTaskCount = taskCount;
|
||||
}
|
||||
if (parseAllAllocations) {
|
||||
auto currentCounter = this->allocationsCounter.load();
|
||||
for (auto allocationId = 1u; allocationId <= currentCounter; allocationId++) {
|
||||
for (auto allocationId = static_cast<uint32_t>(previousCounter + 1); allocationId <= currentCounter; allocationId++) {
|
||||
makeResidentForAllocationsWithId(allocationId, commandStreamReceiver);
|
||||
}
|
||||
}
|
||||
|
@ -887,6 +889,7 @@ std::unique_lock<std::mutex> SVMAllocsManager::obtainOwnership() {
|
|||
void SVMAllocsManager::insertSVMAlloc(void *svmPtr, const SvmAllocationData &allocData) {
|
||||
std::unique_lock<std::shared_mutex> lock(mtx);
|
||||
this->svmAllocs.insert(svmPtr, allocData);
|
||||
UNRECOVERABLE_IF(internalAllocationsMap.count(allocData.getAllocId()) > 0);
|
||||
for (auto alloc : allocData.gpuAllocations.getGraphicsAllocations()) {
|
||||
if (alloc != nullptr) {
|
||||
internalAllocationsMap.insert({allocData.getAllocId(), alloc});
|
||||
|
|
|
@ -581,7 +581,7 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenNewAllocationIsCr
|
|||
// now call with task count 2, first allocation shouldn't be modified
|
||||
svmManager->makeIndirectAllocationsResident(*csr, 2u);
|
||||
|
||||
EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
|
||||
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
|
||||
EXPECT_TRUE(graphicsAllocation2->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
|
||||
|
||||
svmManager->freeSVMAlloc(ptr);
|
||||
|
|
Loading…
Reference in New Issue