From 3ecbc55ba92bfc761d6d3c20cf96b67534ea6d99 Mon Sep 17 00:00:00 2001 From: Michal Mrozek Date: Fri, 14 Jan 2022 13:56:53 +0000 Subject: [PATCH] Optimize Level Zero indirect allocations handling. Make them resident directly instead of populating residency container Remove finds, not needed, CSR resolves duplicates at makeResident calls Observed gain is 32x for 10k indirect allocations. Co-authored-by: Michal Mrozek Co-authored-by: Dominik Dabek Signed-off-by: Dominik Dabek --- level_zero/core/source/cmdlist/cmdlist.cpp | 15 +-- .../core/source/cmdqueue/cmdqueue_hw.inl | 9 +- .../sources/cmdqueue/test_cmdqueue_1.cpp | 16 +-- .../unit_test/command_queue/CMakeLists.txt | 3 +- .../command_queue_hw_svm_tests.cpp | 87 ++++++++++++++ .../command_queue/enqueue_svm_tests.cpp | 106 ++++++++++-------- .../memory_manager/unified_memory_manager.cpp | 19 +++- .../memory_manager/unified_memory_manager.h | 9 +- .../mocks/mock_command_stream_receiver.h | 7 ++ 9 files changed, 192 insertions(+), 79 deletions(-) create mode 100644 opencl/test/unit_test/command_queue/command_queue_hw_svm_tests.cpp diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp index 41db9e53bd..4b0b49ebe1 100644 --- a/level_zero/core/source/cmdlist/cmdlist.cpp +++ b/level_zero/core/source/cmdlist/cmdlist.cpp @@ -138,16 +138,13 @@ NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) { void CommandList::makeResidentAndMigrate(bool performMigration) { for (auto alloc : commandContainer.getResidencyContainer()) { - if (csr->getResidencyAllocations().end() == - std::find(csr->getResidencyAllocations().begin(), csr->getResidencyAllocations().end(), alloc)) { - csr->makeResident(*alloc); + csr->makeResident(*alloc); - if (performMigration && - (alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU || - alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) { - auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); - pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(alloc->getGpuAddress())); - } + if (performMigration && + (alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU || + alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) { + auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); + pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(alloc->getGpuAddress())); } } } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 42bf29f0df..20033b2c92 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -173,13 +173,14 @@ ze_result_t CommandQueueHw::executeCommandLists( UnifiedMemoryControls unifiedMemoryControls = commandList->getUnifiedMemoryControls(); auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); - svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(), - commandList->commandContainer.getResidencyContainer(), - unifiedMemoryControls.generateMask()); + svmAllocsManager->makeInternalAllocationsResidentAndMigrateIfNeeded(neoDevice->getRootDeviceIndex(), + + unifiedMemoryControls.generateMask(), + *csr, performMigration); + spaceForResidency += svmAllocsManager->getNumAllocs(); } totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size(); - spaceForResidency += commandList->commandContainer.getResidencyContainer().size(); auto commandListPreemption = commandList->getCommandListPreemptionMode(); if (statePreemption != commandListPreemption) { if (preemptionCmdSyncProgramming) { diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index b858b27d8c..8f0a9d5cb6 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -747,7 +747,7 @@ HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenWalkerP } using CommandQueueIndirectAllocations = Test; -HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandListsThenExpectedIndirectAllocationsAddedToResidencyContainer) { +HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandListsThenExpectedIndirectAllocationsAreMadeResident) { const ze_command_queue_desc_t desc = {}; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); @@ -786,19 +786,21 @@ HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandL nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), - std::end(commandList->commandContainer.getResidencyContainer()), + auto &residencyContainer = csr.rememberedResidencies; + + auto itorEvent = std::find(std::begin(residencyContainer), + std::end(residencyContainer), gpuAlloc); - EXPECT_EQ(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); + EXPECT_EQ(itorEvent, std::end(residencyContainer)); auto commandListHandle = commandList->toHandle(); result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), - std::end(commandList->commandContainer.getResidencyContainer()), + itorEvent = std::find(std::begin(residencyContainer), + std::end(residencyContainer), gpuAlloc); - EXPECT_NE(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); + EXPECT_NE(itorEvent, std::end(residencyContainer)); device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc); commandQueue->destroy(); diff --git a/opencl/test/unit_test/command_queue/CMakeLists.txt b/opencl/test/unit_test/command_queue/CMakeLists.txt index 09d59c1fdf..a477064fad 100644 --- a/opencl/test/unit_test/command_queue/CMakeLists.txt +++ b/opencl/test/unit_test/command_queue/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2021 Intel Corporation +# Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -9,6 +9,7 @@ set(IGDRCL_SRCS_tests_command_queue ${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer_operations_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/command_enqueue_fixture.h + ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_svm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args_tests.cpp diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_svm_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_svm_tests.cpp new file mode 100644 index 0000000000..2ba4963e64 --- /dev/null +++ b/opencl/test/unit_test/command_queue/command_queue_hw_svm_tests.cpp @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2018-2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_memory_manager.h" +#include "shared/test/common/mocks/mock_svm_manager.h" +#include "shared/test/common/test_macros/test.h" +#include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" + +#include "opencl/test/unit_test/command_queue/command_queue_fixture.h" +#include "opencl/test/unit_test/fixtures/cl_device_fixture.h" +#include "opencl/test/unit_test/fixtures/context_fixture.h" + +using namespace NEO; + +struct CommandQueueHwSvmTest + : public ClDeviceFixture, + public ContextFixture, + public CommandQueueHwFixture, + ::testing::Test { + + using ContextFixture::SetUp; + + void SetUp() override { + ClDeviceFixture::SetUp(); + cl_device_id device = pClDevice; + ContextFixture::SetUp(1, &device); + CommandQueueHwFixture::SetUp(pClDevice, 0); + executionEnvironment.initGmm(); + memoryManager = std::make_unique(false, true, executionEnvironment); + svmManager = std::make_unique(memoryManager.get(), false); + memoryManager->pageFaultManager.reset(new MockPageFaultManager); + pPageFaultManager = static_cast(memoryManager->pageFaultManager.get()); + } + + void TearDown() override { + CommandQueueHwFixture::TearDown(); + ContextFixture::TearDown(); + ClDeviceFixture::TearDown(); + } + + cl_command_queue_properties properties; + const HardwareInfo *pHwInfo = nullptr; + MockExecutionEnvironment executionEnvironment{defaultHwInfo.get()}; + std::unique_ptr memoryManager; + std::unique_ptr svmManager; + std::set rootDeviceIndices{mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + MockPageFaultManager *pPageFaultManager; +}; + +HWTEST_F(CommandQueueHwSvmTest, givenSharedAllocationWhenInternalAllocationsMadeResidentThenTheyAreMigrated) { + DebugManagerStateRestore restore; + DebugManager.flags.EnableLocalMemory.set(1); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); + auto allocationSize = 4096u; + auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, pCmdQ); + EXPECT_NE(nullptr, ptr); + auto allocation = svmManager->getSVMAlloc(ptr); + auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); + EXPECT_NE(nullptr, allocation->cpuAllocation); + EXPECT_NE(nullptr, gpuAllocation); + EXPECT_EQ(InternalMemoryType::SHARED_UNIFIED_MEMORY, allocation->memoryType); + EXPECT_EQ(allocationSize, allocation->size); + + EXPECT_EQ(alignUp(allocationSize, 64 * KB), gpuAllocation->getUnderlyingBufferSize()); + EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize2Mb), allocation->cpuAllocation->getUnderlyingBufferSize()); + + EXPECT_EQ(GraphicsAllocation::AllocationType::SVM_GPU, gpuAllocation->getAllocationType()); + EXPECT_EQ(GraphicsAllocation::AllocationType::SVM_CPU, allocation->cpuAllocation->getAllocationType()); + + EXPECT_EQ(gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); + EXPECT_NE(allocation->cpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); + + EXPECT_NE(nullptr, gpuAllocation->getUnderlyingBuffer()); + EXPECT_EQ(0, pPageFaultManager->transferToGpuCalled); + EXPECT_EQ(0, pPageFaultManager->protectMemoryCalled); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(mockRootDeviceIndex, InternalMemoryType::SHARED_UNIFIED_MEMORY, *pCmdQ->getDevice().getDefaultEngine().commandStreamReceiver, true); + EXPECT_EQ(1, pPageFaultManager->transferToGpuCalled); + EXPECT_EQ(1, pPageFaultManager->protectMemoryCalled); + svmManager->freeSVMAlloc(ptr); +} diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index 99e67efc7e..0775b03eb6 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1390,12 +1390,13 @@ HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreAddedToResidencyContainerThen EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(2u, svmManager->getNumAllocs()); - ResidencyContainer residencyContainer; + auto &csr = pDevice->getUltCommandStreamReceiver(); + ResidencyContainer &residencyContainer = csr.getResidencyAllocations(); EXPECT_EQ(0u, residencyContainer.size()); - svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(), - residencyContainer, - InternalMemoryType::DEVICE_UNIFIED_MEMORY); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(pDevice->getRootDeviceIndex(), + InternalMemoryType::DEVICE_UNIFIED_MEMORY, + csr, true); //only unified memory allocation is added to residency container EXPECT_EQ(1u, residencyContainer.size()); @@ -1414,20 +1415,21 @@ HWTEST_F(EnqueueSvmTest, whenInternalAllocationIsTriedToBeAddedTwiceToResidencyC EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(2u, svmManager->getNumAllocs()); - ResidencyContainer residencyContainer; + auto &csr = pDevice->getUltCommandStreamReceiver(); + ResidencyContainer &residencyContainer = csr.getResidencyAllocations(); EXPECT_EQ(0u, residencyContainer.size()); - svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(), - residencyContainer, - InternalMemoryType::DEVICE_UNIFIED_MEMORY); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(pDevice->getRootDeviceIndex(), + InternalMemoryType::DEVICE_UNIFIED_MEMORY, + csr, true); //only unified memory allocation is added to residency container EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr)); - svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(), - residencyContainer, - InternalMemoryType::DEVICE_UNIFIED_MEMORY); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(pDevice->getRootDeviceIndex(), + InternalMemoryType::DEVICE_UNIFIED_MEMORY, + csr, true); EXPECT_EQ(1u, residencyContainer.size()); svmManager->freeSVMAlloc(unifiedMemoryPtr); @@ -1603,11 +1605,12 @@ struct UpdateResidencyContainerMultipleDevicesTest : public ::testing::WithParam HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenNoAllocationsCreatedThenNoInternalAllocationsAreAddedToResidencyContainer) { - ResidencyContainer residencyContainer; + auto &csr = device->getUltCommandStreamReceiver(); + ResidencyContainer &residencyContainer = csr.getResidencyAllocations(); EXPECT_EQ(0u, residencyContainer.size()); - svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), - residencyContainer, - InternalMemoryType::DEVICE_UNIFIED_MEMORY); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(), + InternalMemoryType::DEVICE_UNIFIED_MEMORY, + csr, true); EXPECT_EQ(0u, residencyContainer.size()); } @@ -1627,12 +1630,12 @@ HWTEST_P(UpdateResidencyContainerMultipleDevicesTest, givenAllocationThenItIsAdd svmManager->insertSVMAlloc(allocData); EXPECT_EQ(1u, svmManager->getNumAllocs()); - ResidencyContainer residencyContainer; + auto &csr = device->getUltCommandStreamReceiver(); + ResidencyContainer &residencyContainer = csr.getResidencyAllocations(); EXPECT_EQ(0u, residencyContainer.size()); - - svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), - residencyContainer, - mask); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(), + mask, + csr, true); if (mask == static_cast(type)) { EXPECT_EQ(1u, residencyContainer.size()); @@ -1664,16 +1667,17 @@ HWTEST_P(UpdateResidencyContainerMultipleDevicesTest, svmManager->insertSVMAlloc(allocDataPeer); EXPECT_EQ(2u, svmManager->getNumAllocs()); - ResidencyContainer residencyContainer; + auto &csr = device->getUltCommandStreamReceiver(); + ResidencyContainer &residencyContainer = csr.getResidencyAllocations(); EXPECT_EQ(0u, residencyContainer.size()); - svmManager->addInternalAllocationsToResidencyContainer(numRootDevices + 1, - residencyContainer, - InternalMemoryType::DEVICE_UNIFIED_MEMORY); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(numRootDevices + 1, + InternalMemoryType::DEVICE_UNIFIED_MEMORY, + csr, true); EXPECT_EQ(0u, residencyContainer.size()); - svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), - residencyContainer, - InternalMemoryType::DEVICE_UNIFIED_MEMORY); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(), + InternalMemoryType::DEVICE_UNIFIED_MEMORY, + csr, true); EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } @@ -1708,11 +1712,12 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, svmManager->insertSVMAlloc(allocDataPeer); EXPECT_EQ(2u, svmManager->getNumAllocs()); - ResidencyContainer residencyContainer; + auto &csr = device->getUltCommandStreamReceiver(); + ResidencyContainer &residencyContainer = csr.getResidencyAllocations(); EXPECT_EQ(0u, residencyContainer.size()); - svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), - residencyContainer, - InternalMemoryType::DEVICE_UNIFIED_MEMORY); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(), + InternalMemoryType::DEVICE_UNIFIED_MEMORY, + csr, true); EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } @@ -1730,11 +1735,12 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, svmManager->insertSVMAlloc(allocData); EXPECT_EQ(1u, svmManager->getNumAllocs()); - ResidencyContainer residencyContainer; + auto &csr = device->getUltCommandStreamReceiver(); + ResidencyContainer &residencyContainer = csr.getResidencyAllocations(); EXPECT_EQ(0u, residencyContainer.size()); - svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), - residencyContainer, - InternalMemoryType::SHARED_UNIFIED_MEMORY); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(), + InternalMemoryType::SHARED_UNIFIED_MEMORY, + csr, true); EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } @@ -1753,11 +1759,12 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, svmManager->insertSVMAlloc(allocData); EXPECT_EQ(1u, svmManager->getNumAllocs()); - ResidencyContainer residencyContainer; + auto &csr = device->getUltCommandStreamReceiver(); + ResidencyContainer &residencyContainer = csr.getResidencyAllocations(); EXPECT_EQ(0u, residencyContainer.size()); - svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), - residencyContainer, - InternalMemoryType::SHARED_UNIFIED_MEMORY); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(), + InternalMemoryType::SHARED_UNIFIED_MEMORY, + csr, true); EXPECT_EQ(0u, residencyContainer.size()); } @@ -1784,11 +1791,12 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, svmManager->insertSVMAlloc(allocData1); EXPECT_EQ(2u, svmManager->getNumAllocs()); - ResidencyContainer residencyContainer; + auto &csr = device->getUltCommandStreamReceiver(); + ResidencyContainer &residencyContainer = csr.getResidencyAllocations(); EXPECT_EQ(0u, residencyContainer.size()); - svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), - residencyContainer, - InternalMemoryType::DEVICE_UNIFIED_MEMORY); + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(device->getDevice().getRootDeviceIndex(), + InternalMemoryType::DEVICE_UNIFIED_MEMORY, + csr, true); EXPECT_EQ(2u, residencyContainer.size()); } @@ -1815,11 +1823,13 @@ HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, svmManager->insertSVMAlloc(allocData1); EXPECT_EQ(2u, svmManager->getNumAllocs()); - ResidencyContainer residencyContainer; + auto &csr = device->getUltCommandStreamReceiver(); + ResidencyContainer &residencyContainer = csr.getResidencyAllocations(); EXPECT_EQ(0u, residencyContainer.size()); - svmManager->addInternalAllocationsToResidencyContainer(peerDevice->getDevice().getRootDeviceIndex(), - residencyContainer, - InternalMemoryType::DEVICE_UNIFIED_MEMORY); + + svmManager->makeInternalAllocationsResidentAndMigrateIfNeeded(peerDevice->getDevice().getRootDeviceIndex(), + InternalMemoryType::DEVICE_UNIFIED_MEMORY, + csr, true); EXPECT_EQ(0u, residencyContainer.size()); } diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 35f8ed5aec..7d4b1161e8 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -70,10 +70,12 @@ SvmMapOperation *SVMAllocsManager::MapOperationsTracker::get(const void *regionP return &iter->second; } -void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex, - ResidencyContainer &residencyContainer, - uint32_t requestedTypesMask) { +void SVMAllocsManager::makeInternalAllocationsResidentAndMigrateIfNeeded(uint32_t rootDeviceIndex, + uint32_t requestedTypesMask, + CommandStreamReceiver &commandStreamReceiver, + bool performMigration) { std::unique_lock lock(mtx); + for (auto &allocation : this->SVMAllocs.allocations) { if (rootDeviceIndex >= allocation.second.gpuAllocations.getGraphicsAllocations().size()) { continue; @@ -85,8 +87,13 @@ void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootD } auto alloc = allocation.second.gpuAllocations.getGraphicsAllocation(rootDeviceIndex); - if (residencyContainer.end() == std::find(residencyContainer.begin(), residencyContainer.end(), alloc)) { - residencyContainer.push_back(alloc); + commandStreamReceiver.makeResident(*alloc); + + if (performMigration && + (alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU || + alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) { + auto pageFaultManager = memoryManager->getPageFaultManager(); + pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(alloc->getGpuAddress())); } } } diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index ccdcf8f56b..cfdbf94b89 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -141,9 +141,10 @@ class SVMAllocsManager { MOCKABLE_VIRTUAL void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap); void removeSvmMapOperation(const void *regionSvmPtr); SvmMapOperation *getSvmMapOperation(const void *regionPtr); - void addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex, - ResidencyContainer &residencyContainer, - uint32_t requestedTypesMask); + void makeInternalAllocationsResidentAndMigrateIfNeeded(uint32_t rootDeviceIndex, + uint32_t requestedTypesMask, + CommandStreamReceiver &commandStreamReceiver, + bool performMigration); void makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask); void *createUnifiedAllocationWithDeviceStorage(size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties); void freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData); diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index c78cbdf418..3bc62f5618 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -252,6 +252,12 @@ class MockCsrHw2 : public CommandStreamReceiverHw { return taskCount; } + void makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency) override { + for (auto &surface : allocationsForResidency) { + rememberedResidencies.push_back(surface); + } + CommandStreamReceiver::makeSurfacePackNonResident(allocationsForResidency); + } void programHardwareContext(LinearStream &cmdStream) override { programHardwareContextCalled = true; } @@ -261,6 +267,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw { std::unique_ptr storedTaskStream; size_t storedTaskStreamSize = 0; + ResidencyContainer rememberedResidencies; int flushCalledCount = 0; std::unique_ptr recordedCommandBuffer = nullptr; ResidencyContainer copyOfAllocations;