From 3ecbc55ba92bfc761d6d3c20cf96b67534ea6d99 Mon Sep 17 00:00:00 2001
From: Michal Mrozek <michal.mrozek@intel.com>
Date: Fri, 14 Jan 2022 13:56:53 +0000
Subject: [PATCH] Optimize Level Zero indirect allocations handling.

Make them resident directly instead of populating residency container
Remove finds, not needed, CSR resolves duplicates at makeResident calls
Observed gain is 32x for 10k indirect allocations.


Co-authored-by: Michal Mrozek <michal.mrozek@intel.com>

Co-authored-by: Dominik Dabek <dominik.dabek@intel.com>

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
---
 level_zero/core/source/cmdlist/cmdlist.cpp    |  15 +--
 .../core/source/cmdqueue/cmdqueue_hw.inl      |   9 +-
 .../sources/cmdqueue/test_cmdqueue_1.cpp      |  16 +--
 .../unit_test/command_queue/CMakeLists.txt    |   3 +-
 .../command_queue_hw_svm_tests.cpp            |  87 ++++++++++++++
 .../command_queue/enqueue_svm_tests.cpp       | 106 ++++++++++--------
 .../memory_manager/unified_memory_manager.cpp |  19 +++-
 .../memory_manager/unified_memory_manager.h   |   9 +-
 .../mocks/mock_command_stream_receiver.h      |   7 ++
 9 files changed, 192 insertions(+), 79 deletions(-)
 create mode 100644 opencl/test/unit_test/command_queue/command_queue_hw_svm_tests.cpp
diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp
index 41db9e53bd..4b0b49ebe1 100644
--- a/level_zero/core/source/cmdlist/cmdlist.cpp
+++ b/level_zero/core/source/cmdlist/cmdlist.cpp
@@ -138,16 +138,13 @@ NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {
 
 void CommandList::makeResidentAndMigrate(bool performMigration) {
     for (auto alloc : commandContainer.getResidencyContainer()) {
-        if (csr->getResidencyAllocations().end() ==
-            std::find(csr->getResidencyAllocations().begin(), csr->getResidencyAllocations().end(), alloc)) {
-            csr->makeResident(*alloc);
+        csr->makeResident(*alloc);
 
-            if (performMigration &&
-                (alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU ||
-                 alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) {
-                auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
-                pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
-            }
+        if (performMigration &&
+            (alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU ||
+             alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) {
+            auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
+            pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
         }
     }
 }
diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl
index 42bf29f0df..20033b2c92 100644
--- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl
+++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl
@@ -173,13 +173,14 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
             UnifiedMemoryControls unifiedMemoryControls = commandList->getUnifiedMemoryControls();
 
             auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
-            svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
-                                                                         commandList->commandContainer.getResidencyContainer(),
-                                                                         unifiedMemoryControls.generateMask());
+            svmAllocsManager->makeInternalAllocationsResidentAndMigrateIfNeeded(neoDevice->getRootDeviceIndex(),
+
+                                                                                unifiedMemoryControls.generateMask(),
+                                                                                *csr, performMigration);
+            spaceForResidency += svmAllocsManager->getNumAllocs();
         }
 
         totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size();
-        spaceForResidency += commandList->commandContainer.getResidencyContainer().size();
         auto commandListPreemption = commandList->getCommandListPreemptionMode();
         if (statePreemption != commandListPreemption) {
             if (preemptionCmdSyncProgramming) {
diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp
index b858b27d8c..8f0a9d5cb6 100644
--- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp
+++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp
@@ -747,7 +747,7 @@ HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenWalkerP
 }
 
 using CommandQueueIndirectAllocations = Test<ModuleFixture>;
-HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandListsThenExpectedIndirectAllocationsAddedToResidencyContainer) {
+HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandListsThenExpectedIndirectAllocationsAreMadeResident) {
     const ze_command_queue_desc_t desc = {};
 
     MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
@@ -786,19 +786,21 @@ HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandL
                                              nullptr);
     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
 
-    auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()),
-                               std::end(commandList->commandContainer.getResidencyContainer()),
+    auto &residencyContainer = csr.rememberedResidencies;
+
+    auto itorEvent = std::find(std::begin(residencyContainer),
+                               std::end(residencyContainer),
                                gpuAlloc);
-    EXPECT_EQ(itorEvent, std::end(commandList->commandContainer.getResidencyContainer()));
+    EXPECT_EQ(itorEvent, std::end(residencyContainer));
 
     auto commandListHandle = commandList->toHandle();
     result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
     ASSERT_EQ(ZE_RESULT_SUCCESS, result);
 
-    itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()),
-                          std::end(commandList->commandContainer.getResidencyContainer()),
+    itorEvent = std::find(std::begin(residencyContainer),
+                          std::end(residencyContainer),
                           gpuAlloc);
-    EXPECT_NE(itorEvent, std::end(commandList->commandContainer.getResidencyContainer()));
+    EXPECT_NE(itorEvent, std::end(residencyContainer));
 
     device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc);
     commandQueue->destroy();
diff --git a/opencl/test/unit_test/command_queue/CMakeLists.txt b/opencl/test/unit_test/command_queue/CMakeLists.txt
index 09d59c1fdf..a477064fad 100644
--- a/opencl/test/unit_test/command_queue/CMakeLists.txt
+++ b/opencl/test/unit_test/command_queue/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2018-2021 Intel Corporation
+# Copyright (C) 2018-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 #
@@ -9,6 +9,7 @@ set(IGDRCL_SRCS_tests_command_queue
     ${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_tests.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/buffer_operations_fixture.h
     ${CMAKE_CURRENT_SOURCE_DIR}/command_enqueue_fixture.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_svm_tests.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_tests.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args_tests.cpp
diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_svm_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_svm_tests.cpp
new file mode 100644
index 0000000000..2ba4963e64
--- /dev/null
+++ b/opencl/test/unit_test/command_queue/command_queue_hw_svm_tests.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2018-2022 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "shared/test/common/helpers/debug_manager_state_restore.h"
+#include "shared/test/common/mocks/mock_memory_manager.h"
+#include "shared/test/common/mocks/mock_svm_manager.h"
+#include "shared/test/common/test_macros/test.h"
+#include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h"
+
+#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
+#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
+#include "opencl/test/unit_test/fixtures/context_fixture.h"
+
+using namespace NEO;
+
+struct CommandQueueHwSvmTest
+    : public ClDeviceFixture,
+      public ContextFixture,
+      public CommandQueueHwFixture,
+      ::testing::Test {
+
+    using ContextFixture::SetUp;
+
+    void SetUp() override {
+        ClDeviceFixture::SetUp();
+        cl_device_id device = pClDevice;
+        ContextFixture::SetUp(1, &device);
+        CommandQueueHwFixture::SetUp(pClDevice, 0);
+        executionEnvironment.initGmm();
+        memoryManager = std::make_unique<MockMemoryManager>(false, true, executionEnvironment);
+        svmManager = std::make_unique<MockSVMAllocsManager>(memoryManager.get(), false);
+        memoryManager->pageFaultManager.reset(new MockPageFaultManager);
+        pPageFaultManager = static_cast<MockPageFaultManager *>(memoryManager->pageFaultManager.get());
+    }
+
+    void TearDown() override {
+        CommandQueueHwFixture::TearDown();
+        ContextFixture::TearDown();
+        ClDeviceFixture::TearDown();
+    }
+
+    cl_command_queue_properties properties;
+    const HardwareInfo *pHwInfo = nullptr;
+    MockExecutionEnvironment executionEnvironment{defaultHwInfo.get()};
+    std::unique_ptr<MockMemoryManager> memoryManager;
+    std::unique_ptr<MockSVMAllocsManager> svmManager;
+    std::set<uint32_t> rootDeviceIndices{mockRootDeviceIndex};
+    std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
+    MockPageFaultManager *pPageFaultManager;
+};
+
+HWTEST_F(CommandQueueHwSvmTest, givenSharedAllocationWhenInternalAllocationsMadeResidentThenTheyAreMigrated) {
+    DebugManagerStateRestore restore;
+    DebugManager.flags.EnableLocalMemory.set(1);
+
+    SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields);
+    auto allocationSize = 4096u;
+    auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, pCmdQ);
+    EXPECT_NE(nullptr, ptr);
+    auto allocation = svmManager->getSVMAlloc(ptr);
+    auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex);
+    EXPECT_NE(nullptr, allocation->cpuAllocation);
+    EXPECT_NE(nullptr, gpuAllocation);
+    EXPECT_EQ(InternalMemoryType::SHARED_UNIFIED_MEMORY, allocation->memoryType);
+    EXPECT_EQ(allocationSize, allocation->size);
+
+    EXPECT_EQ(alignUp(allocationSize, 64 * KB), gpuAllocation->getUnderlyingBufferSize());
+    EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize2Mb), allocation->cpuAllocation->getUnderlyingBufferSize());
+
+    EXPECT_EQ(GraphicsAllocation::AllocationType::SVM_GPU, gpuAllocation->getAllocationType());
+    EXPECT_EQ(GraphicsAllocation::AllocationType::SVM_CPU, allocation->cpuAllocation->getAllocationType());
+
+    EXPECT_EQ(gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory);
+    EXPECT_NE(allocation->cpuAllocation->getMemoryPool(), MemoryPool::LocalMemory);
+
+    EXPECT_NE(nullptr, gpuAllocation->getUnderlyingBuffer());
+    EXPECT_EQ(0, pPageFaultManager->transferToGpuCalled);
+    EXPECT_EQ(0, pPageFaultManager->protectMemoryCalled);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(mockRootDeviceIndex, InternalMemoryType::SHARED_UNIFIED_MEMORY, *pCmdQ->getDevice().getDefaultEngine().commandStreamReceiver, true);
+    EXPECT_EQ(1, pPageFaultManager->transferToGpuCalled);
+    EXPECT_EQ(1, pPageFaultManager->protectMemoryCalled);
+    svmManager->freeSVMAlloc(ptr);
+}
diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp
index 99e67efc7e..0775b03eb6 100644
--- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -1390,12 +1390,13 @@ HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreAddedToResidencyContainerThen
     EXPECT_NE(nullptr, unifiedMemoryPtr);
     EXPECT_EQ(2u, svmManager->getNumAllocs());
 
-    ResidencyContainer residencyContainer;
+    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
+    ResidencyContainer &residencyContainer = csr.getResidencyAllocations();
     EXPECT_EQ(0u, residencyContainer.size());
 
-    svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           InternalMemoryType::DEVICE_UNIFIED_MEMORY);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(pDevice->getRootDeviceIndex(),
+                                                                  InternalMemoryType::DEVICE_UNIFIED_MEMORY,
+                                                                  csr, true);
 
     //only unified memory allocation is added to residency container
     EXPECT_EQ(1u, residencyContainer.size());
@@ -1414,20 +1415,21 @@ HWTEST_F(EnqueueSvmTest, whenInternalAllocationIsTriedToBeAddedTwiceToResidencyC
     EXPECT_NE(nullptr, unifiedMemoryPtr);
     EXPECT_EQ(2u, svmManager->getNumAllocs());
 
-    ResidencyContainer residencyContainer;
+    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
+    ResidencyContainer &residencyContainer = csr.getResidencyAllocations();
     EXPECT_EQ(0u, residencyContainer.size());
 
-    svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           InternalMemoryType::DEVICE_UNIFIED_MEMORY);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(pDevice->getRootDeviceIndex(),
+                                                                  InternalMemoryType::DEVICE_UNIFIED_MEMORY,
+                                                                  csr, true);
 
     //only unified memory allocation is added to residency container
     EXPECT_EQ(1u, residencyContainer.size());
     EXPECT_EQ(residencyContainer[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr));
 
-    svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           InternalMemoryType::DEVICE_UNIFIED_MEMORY);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(pDevice->getRootDeviceIndex(),
+                                                                  InternalMemoryType::DEVICE_UNIFIED_MEMORY,
+                                                                  csr, true);
     EXPECT_EQ(1u, residencyContainer.size());
 
     svmManager->freeSVMAlloc(unifiedMemoryPtr);
@@ -1603,11 +1605,12 @@ struct UpdateResidencyContainerMultipleDevicesTest : public ::testing::WithParam
 
 HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
          givenNoAllocationsCreatedThenNoInternalAllocationsAreAddedToResidencyContainer) {
-    ResidencyContainer residencyContainer;
+    auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
+    ResidencyContainer &residencyContainer = csr.getResidencyAllocations();
     EXPECT_EQ(0u, residencyContainer.size());
-    svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           InternalMemoryType::DEVICE_UNIFIED_MEMORY);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(),
+                                                                  InternalMemoryType::DEVICE_UNIFIED_MEMORY,
+                                                                  csr, true);
     EXPECT_EQ(0u, residencyContainer.size());
 }
 
@@ -1627,12 +1630,12 @@ HWTEST_P(UpdateResidencyContainerMultipleDevicesTest, givenAllocationThenItIsAdd
     svmManager->insertSVMAlloc(allocData);
     EXPECT_EQ(1u, svmManager->getNumAllocs());
 
-    ResidencyContainer residencyContainer;
+    auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
+    ResidencyContainer &residencyContainer = csr.getResidencyAllocations();
     EXPECT_EQ(0u, residencyContainer.size());
-
-    svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           mask);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(),
+                                                                  mask,
+                                                                  csr, true);
 
     if (mask == static_cast<uint32_t>(type)) {
         EXPECT_EQ(1u, residencyContainer.size());
@@ -1664,16 +1667,17 @@ HWTEST_P(UpdateResidencyContainerMultipleDevicesTest,
     svmManager->insertSVMAlloc(allocDataPeer);
     EXPECT_EQ(2u, svmManager->getNumAllocs());
 
-    ResidencyContainer residencyContainer;
+    auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
+    ResidencyContainer &residencyContainer = csr.getResidencyAllocations();
     EXPECT_EQ(0u, residencyContainer.size());
-    svmManager->addInternalAllocationsToResidencyContainer(numRootDevices + 1,
-                                                           residencyContainer,
-                                                           InternalMemoryType::DEVICE_UNIFIED_MEMORY);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(numRootDevices + 1,
+                                                                  InternalMemoryType::DEVICE_UNIFIED_MEMORY,
+                                                                  csr, true);
     EXPECT_EQ(0u, residencyContainer.size());
 
-    svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           InternalMemoryType::DEVICE_UNIFIED_MEMORY);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(),
+                                                                  InternalMemoryType::DEVICE_UNIFIED_MEMORY,
+                                                                  csr, true);
     EXPECT_EQ(1u, residencyContainer.size());
     EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress());
 }
@@ -1708,11 +1712,12 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
     svmManager->insertSVMAlloc(allocDataPeer);
     EXPECT_EQ(2u, svmManager->getNumAllocs());
 
-    ResidencyContainer residencyContainer;
+    auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
+    ResidencyContainer &residencyContainer = csr.getResidencyAllocations();
     EXPECT_EQ(0u, residencyContainer.size());
-    svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           InternalMemoryType::DEVICE_UNIFIED_MEMORY);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(),
+                                                                  InternalMemoryType::DEVICE_UNIFIED_MEMORY,
+                                                                  csr, true);
     EXPECT_EQ(1u, residencyContainer.size());
     EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress());
 }
@@ -1730,11 +1735,12 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
     svmManager->insertSVMAlloc(allocData);
     EXPECT_EQ(1u, svmManager->getNumAllocs());
 
-    ResidencyContainer residencyContainer;
+    auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
+    ResidencyContainer &residencyContainer = csr.getResidencyAllocations();
     EXPECT_EQ(0u, residencyContainer.size());
-    svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           InternalMemoryType::SHARED_UNIFIED_MEMORY);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(),
+                                                                  InternalMemoryType::SHARED_UNIFIED_MEMORY,
+                                                                  csr, true);
     EXPECT_EQ(1u, residencyContainer.size());
     EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress());
 }
@@ -1753,11 +1759,12 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
     svmManager->insertSVMAlloc(allocData);
     EXPECT_EQ(1u, svmManager->getNumAllocs());
 
-    ResidencyContainer residencyContainer;
+    auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
+    ResidencyContainer &residencyContainer = csr.getResidencyAllocations();
     EXPECT_EQ(0u, residencyContainer.size());
-    svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           InternalMemoryType::SHARED_UNIFIED_MEMORY);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(),
+                                                                  InternalMemoryType::SHARED_UNIFIED_MEMORY,
+                                                                  csr, true);
     EXPECT_EQ(0u, residencyContainer.size());
 }
 
@@ -1784,11 +1791,12 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
     svmManager->insertSVMAlloc(allocData1);
     EXPECT_EQ(2u, svmManager->getNumAllocs());
 
-    ResidencyContainer residencyContainer;
+    auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
+    ResidencyContainer &residencyContainer = csr.getResidencyAllocations();
     EXPECT_EQ(0u, residencyContainer.size());
-    svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           InternalMemoryType::DEVICE_UNIFIED_MEMORY);
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(),
+                                                                  InternalMemoryType::DEVICE_UNIFIED_MEMORY,
+                                                                  csr, true);
     EXPECT_EQ(2u, residencyContainer.size());
 }
 
@@ -1815,11 +1823,13 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest,
     svmManager->insertSVMAlloc(allocData1);
     EXPECT_EQ(2u, svmManager->getNumAllocs());
 
-    ResidencyContainer residencyContainer;
+    auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
+    ResidencyContainer &residencyContainer = csr.getResidencyAllocations();
     EXPECT_EQ(0u, residencyContainer.size());
-    svmManager->addInternalAllocationsToResidencyContainer(peerDevice->getDevice().getRootDeviceIndex(),
-                                                           residencyContainer,
-                                                           InternalMemoryType::DEVICE_UNIFIED_MEMORY);
+
+    svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(peerDevice->getDevice().getRootDeviceIndex(),
+                                                                  InternalMemoryType::DEVICE_UNIFIED_MEMORY,
+                                                                  csr, true);
     EXPECT_EQ(0u, residencyContainer.size());
 }
 
diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp
index 35f8ed5aec..7d4b1161e8 100644
--- a/shared/source/memory_manager/unified_memory_manager.cpp
+++ b/shared/source/memory_manager/unified_memory_manager.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019-2021 Intel Corporation
+ * Copyright (C) 2019-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,10 +70,12 @@ SvmMapOperation *SVMAllocsManager::MapOperationsTracker::get(const void *regionP
     return &iter->second;
 }
 
-void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex,
-                                                                  ResidencyContainer &residencyContainer,
-                                                                  uint32_t requestedTypesMask) {
+void SVMAllocsManager::makeInternalAllocationsResidentAndMigrateIfNeeded(uint32_t rootDeviceIndex,
+                                                                         uint32_t requestedTypesMask,
+                                                                         CommandStreamReceiver &commandStreamReceiver,
+                                                                         bool performMigration) {
     std::unique_lock<SpinLock> lock(mtx);
+
     for (auto &allocation : this->SVMAllocs.allocations) {
         if (rootDeviceIndex >= allocation.second.gpuAllocations.getGraphicsAllocations().size()) {
             continue;
@@ -85,8 +87,13 @@ void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootD
         }
 
         auto alloc = allocation.second.gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
-        if (residencyContainer.end() == std::find(residencyContainer.begin(), residencyContainer.end(), alloc)) {
-            residencyContainer.push_back(alloc);
+        commandStreamReceiver.makeResident(*alloc);
+
+        if (performMigration &&
+            (alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU ||
+             alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) {
+            auto pageFaultManager = memoryManager->getPageFaultManager();
+            pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
         }
     }
 }
diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h
index ccdcf8f56b..cfdbf94b89 100644
--- a/shared/source/memory_manager/unified_memory_manager.h
+++ b/shared/source/memory_manager/unified_memory_manager.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019-2021 Intel Corporation
+ * Copyright (C) 2019-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -141,9 +141,10 @@ class SVMAllocsManager {
     MOCKABLE_VIRTUAL void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap);
     void removeSvmMapOperation(const void *regionSvmPtr);
     SvmMapOperation *getSvmMapOperation(const void *regionPtr);
-    void addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex,
-                                                    ResidencyContainer &residencyContainer,
-                                                    uint32_t requestedTypesMask);
+    void makeInternalAllocationsResidentAndMigrateIfNeeded(uint32_t rootDeviceIndex,
+                                                           uint32_t requestedTypesMask,
+                                                           CommandStreamReceiver &commandStreamReceiver,
+                                                           bool performMigration);
     void makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask);
     void *createUnifiedAllocationWithDeviceStorage(size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties);
     void freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData);
diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h
index c78cbdf418..3bc62f5618 100644
--- a/shared/test/common/mocks/mock_command_stream_receiver.h
+++ b/shared/test/common/mocks/mock_command_stream_receiver.h
@@ -252,6 +252,12 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
         return taskCount;
     }
 
+    void makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency) override {
+        for (auto &surface : allocationsForResidency) {
+            rememberedResidencies.push_back(surface);
+        }
+        CommandStreamReceiver::makeSurfacePackNonResident(allocationsForResidency);
+    }
     void programHardwareContext(LinearStream &cmdStream) override {
         programHardwareContextCalled = true;
     }
@@ -261,6 +267,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
     std::unique_ptr<uint8_t> storedTaskStream;
     size_t storedTaskStreamSize = 0;
 
+    ResidencyContainer rememberedResidencies;
     int flushCalledCount = 0;
     std::unique_ptr<CommandBuffer> recordedCommandBuffer = nullptr;
     ResidencyContainer copyOfAllocations;