From d46bb5950846d4c17429ce34727ac95004bd05a2 Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Thu, 10 Dec 2020 07:54:28 +0000 Subject: [PATCH] Migrate shared-allocations made resident with makeMemoryResident Shared-allocations are currently migrated to GPU by the page-fault manager when calling executeCommandLists. Allocations to migrate are taken from the lists container. However, if a shared-allocation has been made resident with zeContextMakeMemoryResident(), it is not added to the list container, and hence it is not migrated to device. So, add a container of resident allocations to the driver and migrate them along with the other allocations. Signed-off-by: Jaime Arteaga --- .../core/source/cmdqueue/cmdqueue_hw.inl | 24 +- .../core/source/context/context_imp.cpp | 21 +- .../core/source/driver/driver_handle_imp.h | 3 + .../sources/context/test_context.cpp | 281 ++++++++++++++++++ .../cpu_page_fault_manager.h | 2 +- 5 files changed, 321 insertions(+), 10 deletions(-) diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 8924c87d8c..38d91bea7e 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -29,6 +29,7 @@ #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/source/device/device.h" +#include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/tools/source/metrics/metric.h" @@ -259,6 +260,15 @@ ze_result_t CommandQueueHw::executeCommandLists( residencyContainer.push_back(device->getDebugSurface()); } } + + NEO::PageFaultManager *pageFaultManager = nullptr; + if (performMigration) { + pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); + if (pageFaultManager == nullptr) { + performMigration = false; + } + } + for (auto i = 0u; i < numCommandLists; ++i) { auto commandList = CommandList::fromHandle(phCommandLists[i]); auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations(); @@ -284,14 +294,6 @@ ze_result_t CommandQueueHw::executeCommandLists( commandList->getPrintfFunctionContainer().begin(), commandList->getPrintfFunctionContainer().end()); - NEO::PageFaultManager *pageFaultManager = nullptr; - if (performMigration) { - pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); - if (pageFaultManager == nullptr) { - performMigration = false; - } - } - for (auto alloc : commandList->commandContainer.getResidencyContainer()) { if (residencyContainer.end() == std::find(residencyContainer.begin(), residencyContainer.end(), alloc)) { @@ -308,6 +310,12 @@ ze_result_t CommandQueueHw::executeCommandLists( } } + if (performMigration) { + for (auto alloc : static_cast(device->getDriverHandle())->sharedMakeResidentAllocations) { + pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(alloc.second->getGpuAddress())); + } + } + commandQueuePreemptionMode = statePreemption; if (hFence) { diff --git a/level_zero/core/source/context/context_imp.cpp b/level_zero/core/source/context/context_imp.cpp index 85aedda231..2d0ee45be2 100644 --- a/level_zero/core/source/context/context_imp.cpp +++ b/level_zero/core/source/context/context_imp.cpp @@ -8,8 +8,10 @@ #include "level_zero/core/source/context/context_imp.h" #include "shared/source/memory_manager/memory_operations_handler.h" +#include "shared/source/memory_manager/unified_memory_manager.h" #include "level_zero/core/source/device/device_imp.h" +#include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/memory/memory_operations_helper.h" @@ -93,7 +95,18 @@ ze_result_t ContextImp::makeMemoryResident(ze_device_handle_t hDevice, void *ptr NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto success = memoryOperationsIface->makeResident(neoDevice, ArrayRef(&allocation, 1)); - return changeMemoryOperationStatusToL0ResultType(success); + ze_result_t res = changeMemoryOperationStatusToL0ResultType(success); + + if (ZE_RESULT_SUCCESS == res) { + auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); + if (allocData && allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { + DriverHandleImp *driverHandleImp = static_cast(device->getDriverHandle()); + std::lock_guard lock(driverHandleImp->sharedMakeResidentAllocationsLock); + driverHandleImp->sharedMakeResidentAllocations.insert({ptr, allocation}); + } + } + + return res; } ze_result_t ContextImp::evictMemory(ze_device_handle_t hDevice, void *ptr, size_t size) { @@ -104,6 +117,12 @@ ze_result_t ContextImp::evictMemory(ze_device_handle_t hDevice, void *ptr, size_ return ZE_RESULT_ERROR_INVALID_ARGUMENT; } + { + DriverHandleImp *driverHandleImp = static_cast(device->getDriverHandle()); + std::lock_guard lock(driverHandleImp->sharedMakeResidentAllocationsLock); + driverHandleImp->sharedMakeResidentAllocations.erase(ptr); + } + NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto success = memoryOperationsIface->evict(neoDevice, *allocation); return changeMemoryOperationStatusToL0ResultType(success); diff --git a/level_zero/core/source/driver/driver_handle_imp.h b/level_zero/core/source/driver/driver_handle_imp.h index 3cc6e7ad0f..5488791d5c 100644 --- a/level_zero/core/source/driver/driver_handle_imp.h +++ b/level_zero/core/source/driver/driver_handle_imp.h @@ -84,6 +84,9 @@ struct DriverHandleImp : public DriverHandle { // Experimental functions std::unordered_map extensionFunctionsLookupMap; + std::mutex sharedMakeResidentAllocationsLock; + std::map sharedMakeResidentAllocations; + std::string affinityMaskString = ""; std::vector devices; // Spec extensions diff --git a/level_zero/core/test/unit_tests/sources/context/test_context.cpp b/level_zero/core/test/unit_tests/sources/context/test_context.cpp index 60412f8353..876a99f485 100644 --- a/level_zero/core/test/unit_tests/sources/context/test_context.cpp +++ b/level_zero/core/test/unit_tests/sources/context/test_context.cpp @@ -5,10 +5,15 @@ * */ +#include "shared/test/unit_test/mocks/mock_command_stream_receiver.h" +#include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" + +#include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "gtest/gtest.h" @@ -53,5 +58,281 @@ TEST_F(ContextTest, whenCreatingAndDestroyingContextThenSuccessIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, res); } +using ContextMakeMemoryResidentTests = Test; + +TEST_F(ContextMakeMemoryResidentTests, + givenUknownPointerPassedToMakeMemoryResidentThenInvalidArgumentIsReturned) { + const size_t size = 4096; + uint8_t *ptr = new uint8_t[size]; + ze_result_t res = context->makeMemoryResident(device, ptr, size); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); + + delete[] ptr; +} + +TEST_F(ContextMakeMemoryResidentTests, + givenValidPointerPassedToMakeMemoryResidentThenSuccessIsReturned) { + const size_t size = 4096; + void *ptr = nullptr; + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_result_t res = context->allocSharedMem(device->toHandle(), + &deviceDesc, + &hostDesc, + size, + 0, + &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + EXPECT_CALL(*mockMemoryInterface, makeResident) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS)); + res = context->makeMemoryResident(device, ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + EXPECT_CALL(*mockMemoryInterface, evict) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS)); + res = context->evictMemory(device, ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + context->freeMem(ptr); +} + +TEST_F(ContextMakeMemoryResidentTests, + whenMakingASharedMemoryResidentThenIsAddedToVectorOfResidentAllocations) { + const size_t size = 4096; + void *ptr = nullptr; + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_result_t res = context->allocSharedMem(device->toHandle(), + &deviceDesc, + &hostDesc, + size, + 0, + &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); + size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); + + EXPECT_CALL(*mockMemoryInterface, makeResident) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS)); + res = context->makeMemoryResident(device, ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); + EXPECT_EQ(previousSize + 1, currentSize); + + EXPECT_CALL(*mockMemoryInterface, evict) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS)); + res = context->evictMemory(device, ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + size_t finalSize = driverHandleImp->sharedMakeResidentAllocations.size(); + EXPECT_EQ(previousSize, finalSize); + + context->freeMem(ptr); +} + +TEST_F(ContextMakeMemoryResidentTests, + whenMakingADeviceMemoryResidentThenIsNotAddedToVectorOfResidentAllocations) { + const size_t size = 4096; + void *ptr = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_result_t res = context->allocDeviceMem(device->toHandle(), + &deviceDesc, + size, + 0, + &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); + size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); + + EXPECT_CALL(*mockMemoryInterface, makeResident) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS)); + res = context->makeMemoryResident(device, ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); + EXPECT_EQ(previousSize, currentSize); + + EXPECT_CALL(*mockMemoryInterface, evict) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS)); + res = context->evictMemory(device, ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + context->freeMem(ptr); +} + +TEST_F(ContextMakeMemoryResidentTests, + whenMakingASharedMemoryResidentButMemoryInterfaceFailsThenIsNotAddedToVectorOfResidentAllocations) { + const size_t size = 4096; + void *ptr = nullptr; + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_result_t res = context->allocSharedMem(device->toHandle(), + &deviceDesc, + &hostDesc, + size, + 0, + &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); + size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); + + EXPECT_CALL(*mockMemoryInterface, makeResident) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::FAILED)); + res = context->makeMemoryResident(device, ptr, size); + EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, res); + + size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); + EXPECT_EQ(previousSize, currentSize); + + context->freeMem(ptr); +} + +struct ContextMakeMemoryResidentAndMigrationTests : public ContextMakeMemoryResidentTests { + struct MockResidentTestsPageFaultManager : public MockPageFaultManager { + void moveAllocationToGpuDomain(void *ptr) override { + moveAllocationToGpuDomainCalledTimes++; + migratedAddress = ptr; + } + uint32_t moveAllocationToGpuDomainCalledTimes = 0; + void *migratedAddress = nullptr; + }; + + void SetUp() override { + ContextMakeMemoryResidentTests::SetUp(); + mockMemoryManager = std::make_unique(); + mockPageFaultManager = new MockResidentTestsPageFaultManager; + mockMemoryManager->pageFaultManager.reset(mockPageFaultManager); + memoryManager = device->getDriverHandle()->getMemoryManager(); + device->getDriverHandle()->setMemoryManager(mockMemoryManager.get()); + + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_result_t res = context->allocSharedMem(device->toHandle(), + &deviceDesc, + &hostDesc, + size, + 0, + &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + } + + void TearDown() override { + device->getDriverHandle()->setMemoryManager(memoryManager); + ContextMakeMemoryResidentTests::TearDown(); + } + + const size_t size = 4096; + void *ptr = nullptr; + + std::unique_ptr mockMemoryManager; + MockResidentTestsPageFaultManager *mockPageFaultManager = nullptr; + NEO::MemoryManager *memoryManager = nullptr; +}; + +HWTEST_F(ContextMakeMemoryResidentAndMigrationTests, + whenExecutingCommandListsWithMigrationThenMemoryFromMakeResidentIsMovedToGpu) { + DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); + size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); + + EXPECT_CALL(*mockMemoryInterface, makeResident) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS)); + ze_result_t res = context->makeMemoryResident(device, ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); + EXPECT_EQ(previousSize + 1, currentSize); + + const ze_command_queue_desc_t desc = {}; + MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); + csr.initializeTagAllocation(); + csr.setupContext(*neoDevice->getDefaultEngine().osContext); + + ze_result_t returnValue; + L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, + device, + &csr, + &desc, + true, + false, + returnValue); + EXPECT_NE(nullptr, commandQueue); + + EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 0u); + + std::unique_ptr commandList(CommandList::create(productFamily, + device, + NEO::EngineGroupType::Copy, + returnValue)); + auto commandListHandle = commandList->toHandle(); + res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 1u); + EXPECT_EQ(mockPageFaultManager->migratedAddress, ptr); + + EXPECT_CALL(*mockMemoryInterface, evict) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS)); + res = context->evictMemory(device, ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + commandQueue->destroy(); + context->freeMem(ptr); +} + +HWTEST_F(ContextMakeMemoryResidentAndMigrationTests, + whenExecutingCommandListsWithNoMigrationThenMemoryFromMakeResidentIsNotMovedToGpu) { + DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); + size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); + + EXPECT_CALL(*mockMemoryInterface, makeResident) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS)); + ze_result_t res = context->makeMemoryResident(device, ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); + EXPECT_EQ(previousSize + 1, currentSize); + + const ze_command_queue_desc_t desc = {}; + MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); + csr.initializeTagAllocation(); + csr.setupContext(*neoDevice->getDefaultEngine().osContext); + + ze_result_t returnValue; + L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, + device, + &csr, + &desc, + true, + false, + returnValue); + EXPECT_NE(nullptr, commandQueue); + + EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 0u); + + std::unique_ptr commandList(CommandList::create(productFamily, + device, + NEO::EngineGroupType::Copy, + returnValue)); + auto commandListHandle = commandList->toHandle(); + res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 0u); + EXPECT_EQ(mockPageFaultManager->migratedAddress, nullptr); + + EXPECT_CALL(*mockMemoryInterface, evict) + .WillRepeatedly(testing::Return(NEO::MemoryOperationsStatus::SUCCESS)); + res = context->evictMemory(device, ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + commandQueue->destroy(); + context->freeMem(ptr); +} + } // namespace ult } // namespace L0 \ No newline at end of file diff --git a/shared/source/page_fault_manager/cpu_page_fault_manager.h b/shared/source/page_fault_manager/cpu_page_fault_manager.h index 10ae12b114..427b389e01 100644 --- a/shared/source/page_fault_manager/cpu_page_fault_manager.h +++ b/shared/source/page_fault_manager/cpu_page_fault_manager.h @@ -26,7 +26,7 @@ class PageFaultManager : public NonCopyableOrMovableClass { virtual ~PageFaultManager() = default; - void moveAllocationToGpuDomain(void *ptr); + MOCKABLE_VIRTUAL void moveAllocationToGpuDomain(void *ptr); void moveAllocationsWithinUMAllocsManagerToGpuDomain(SVMAllocsManager *unifiedMemoryManager); void insertAllocation(void *ptr, size_t size, SVMAllocsManager *unifiedMemoryManager, void *cmdQ, const MemoryProperties &memoryProperties); void removeAllocation(void *ptr);