Optimize Level Zero indirect allocations handling.

Make them resident directly instead of populating residency container
Remove finds, not needed, CSR resolves duplicates at makeResident calls
Observed gain is 32x for 10k indirect allocations.


Co-authored-by: Michal Mrozek <michal.mrozek@intel.com>

Co-authored-by: Dominik Dabek <dominik.dabek@intel.com>

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Michal Mrozek
2022-01-14 13:56:53 +00:00
committed by Compute-Runtime-Automation
parent 513e6a2ee4
commit 3ecbc55ba9
9 changed files with 192 additions and 79 deletions

View File

@@ -173,13 +173,14 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
UnifiedMemoryControls unifiedMemoryControls = commandList->getUnifiedMemoryControls();
auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
commandList->commandContainer.getResidencyContainer(),
unifiedMemoryControls.generateMask());
svmAllocsManager->makeInternalAllocationsResidentAndMigrateIfNeeded(neoDevice->getRootDeviceIndex(),
unifiedMemoryControls.generateMask(),
*csr, performMigration);
spaceForResidency += svmAllocsManager->getNumAllocs();
}
totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size();
spaceForResidency += commandList->commandContainer.getResidencyContainer().size();
auto commandListPreemption = commandList->getCommandListPreemptionMode();
if (statePreemption != commandListPreemption) {
if (preemptionCmdSyncProgramming) {