From 60a4448a07ec3625d8b5721952a9c7ab7e5f1036 Mon Sep 17 00:00:00 2001 From: "Milczarek, Slawomir" Date: Sat, 22 Jul 2023 13:34:23 +0000 Subject: [PATCH] feature: Add CPU side USM allocation to trim candidate list on page fage fault Enable eviction of CPU side USM allocation for UMD migrations on Windows. Related-To: NEO-8015 Signed-off-by: Milczarek, Slawomir --- .../memory/cpu_page_fault_memory_manager.cpp | 16 ++++++++ .../sources/cmdlist/test_cmdlist_1.cpp | 37 +++++++++++++++++++ .../cpu_page_fault_manager_memory_sync.cpp | 17 +++++++++ ...u_page_fault_manager_memory_sync_tests.cpp | 33 ++++++++++++++++- .../cpu_page_fault_manager.cpp | 1 + .../cpu_page_fault_manager.h | 4 ++ .../linux/cpu_page_fault_manager_linux.cpp | 4 +- .../linux/cpu_page_fault_manager_linux.h | 1 + .../cpu_page_fault_manager_windows.cpp | 17 +++++++++ .../windows/cpu_page_fault_manager_windows.h | 1 + .../mocks/mock_cpu_page_fault_manager.h | 19 ++++++++++ .../cpu_page_fault_manager_tests.cpp | 3 ++ shared/test/unit_test/ult_specific_config.cpp | 2 + 13 files changed, 153 insertions(+), 2 deletions(-) diff --git a/level_zero/core/source/memory/cpu_page_fault_memory_manager.cpp b/level_zero/core/source/memory/cpu_page_fault_memory_manager.cpp index 5053cc6101..81c0c5d431 100644 --- a/level_zero/core/source/memory/cpu_page_fault_memory_manager.cpp +++ b/level_zero/core/source/memory/cpu_page_fault_memory_manager.cpp @@ -6,6 +6,8 @@ */ #include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/os_interface/os_interface.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "level_zero/core/source/cmdlist/cmdlist.h" @@ -40,6 +42,20 @@ void PageFaultManager::transferToGpu(void *ptr, void *device) { this->evictMemoryAfterImplCopy(allocData->cpuAllocation, deviceImp->getNEODevice()); } +void PageFaultManager::allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) { + L0::DeviceImp *deviceImp = static_cast(pageFaultData.cmdQ); + + CommandStreamReceiver *csr = nullptr; + if (deviceImp->getActiveDevice()->getInternalCopyEngine()) { + csr = deviceImp->getActiveDevice()->getInternalCopyEngine()->commandStreamReceiver; + } else { + csr = deviceImp->getActiveDevice()->getInternalEngine().commandStreamReceiver; + } + UNRECOVERABLE_IF(csr == nullptr); + auto osInterface = deviceImp->getNEODevice()->getRootDeviceEnvironment().osInterface.get(); + + allowCPUMemoryEvictionImpl(ptr, *csr, osInterface); +} } // namespace NEO namespace L0 { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 8cdd38aa8b..e7d5e29e9c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -783,6 +783,43 @@ TEST_F(CommandListMemAdvisePageFault, givenInvalidPtrAndPageFaultHandlerAndGpuDo ASSERT_EQ(res, ZE_RESULT_SUCCESS); } +TEST_F(CommandListMemAdvisePageFault, givenUnifiedMemoryAllocWhenAllowCPUMemoryEvictionIsCalledThenSelectCorrectCsrWithOsContextForEviction) { + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_host_mem_alloc_desc_t hostDesc = {}; + auto res = context->allocSharedMem(device->toHandle(), + &deviceDesc, + &hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_NE(nullptr, ptr); + + L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); + + NEO::PageFaultManager::PageFaultData pageData; + pageData.cmdQ = deviceImp; + + mockPageFaultManager->baseAllowCPUMemoryEviction(ptr, pageData); + EXPECT_EQ(mockPageFaultManager->allowCPUMemoryEvictionImplCalled, 1); + + CommandStreamReceiver *csr = nullptr; + if (deviceImp->getActiveDevice()->getInternalCopyEngine()) { + csr = deviceImp->getActiveDevice()->getInternalCopyEngine()->commandStreamReceiver; + } else { + csr = deviceImp->getActiveDevice()->getInternalEngine().commandStreamReceiver; + } + ASSERT_NE(csr, nullptr); + + EXPECT_EQ(mockPageFaultManager->engineType, csr->getOsContext().getEngineType()); + EXPECT_EQ(mockPageFaultManager->engineUsage, csr->getOsContext().getEngineUsage()); + + res = context->freeMem(ptr); + ASSERT_EQ(res, ZE_RESULT_SUCCESS); +} + TEST_F(CommandListCreate, givenValidPtrThenAppendMemoryPrefetchReturnsSuccess) { size_t size = 10; size_t alignment = 1u; diff --git a/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp b/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp index 803260ad8a..096dda916b 100644 --- a/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp +++ b/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp @@ -5,11 +5,15 @@ * */ +#include "shared/source/device/device.h" +#include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/source/os_interface/os_interface.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "opencl/source/command_queue/command_queue.h" +#include "opencl/source/command_queue/csr_selection_args.h" namespace NEO { void PageFaultManager::transferToCpu(void *ptr, size_t size, void *cmdQ) { @@ -26,6 +30,19 @@ void PageFaultManager::transferToGpu(void *ptr, void *cmdQ) { UNRECOVERABLE_IF(retVal); auto allocData = memoryData[ptr].unifiedMemoryManager->getSVMAlloc(ptr); + UNRECOVERABLE_IF(allocData == nullptr); this->evictMemoryAfterImplCopy(allocData->cpuAllocation, &commandQueue->getDevice()); } +void PageFaultManager::allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) { + auto commandQueue = static_cast(pageFaultData.cmdQ); + + auto allocData = memoryData[ptr].unifiedMemoryManager->getSVMAlloc(ptr); + UNRECOVERABLE_IF(allocData == nullptr); + CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, &allocData->gpuAllocations, {}, commandQueue->getDevice().getRootDeviceIndex(), nullptr}; + auto &csr = commandQueue->selectCsrForBuiltinOperation(csrSelectionArgs); + auto osInterface = commandQueue->getDevice().getRootDeviceEnvironment().osInterface.get(); + + allowCPUMemoryEvictionImpl(ptr, csr, osInterface); +} + } // namespace NEO diff --git a/opencl/test/unit_test/memory_manager/cpu_page_fault_manager_memory_sync_tests.cpp b/opencl/test/unit_test/memory_manager/cpu_page_fault_manager_memory_sync_tests.cpp index bf79d190a9..75fd403651 100644 --- a/opencl/test/unit_test/memory_manager/cpu_page_fault_manager_memory_sync_tests.cpp +++ b/opencl/test/unit_test/memory_manager/cpu_page_fault_manager_memory_sync_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2022 Intel Corporation + * Copyright (C) 2019-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -105,3 +105,34 @@ TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenGpuTransferIsInvokedThen svmAllocsManager->freeSVMAlloc(alloc); cmdQ->device = nullptr; } + +TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenAllowCPUMemoryEvictionIsCalledThenSelectCorrectCsrWithOsContextForEviction) { + MockExecutionEnvironment executionEnvironment; + REQUIRE_SVM_OR_SKIP(executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()); + + auto memoryManager = std::make_unique(executionEnvironment); + auto svmAllocsManager = std::make_unique(memoryManager.get(), false); + auto device = std::unique_ptr(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}); + auto rootDeviceIndex = device->getRootDeviceIndex(); + RootDeviceIndicesContainer rootDeviceIndices = {rootDeviceIndex}; + std::map deviceBitfields{{rootDeviceIndex, device->getDeviceBitfield()}}; + void *alloc = svmAllocsManager->createSVMAlloc(256, {}, rootDeviceIndices, deviceBitfields); + auto cmdQ = std::make_unique(); + cmdQ->device = device.get(); + pageFaultManager->insertAllocation(alloc, 256, svmAllocsManager.get(), cmdQ.get(), {}); + + NEO::PageFaultManager::PageFaultData pageData; + pageData.cmdQ = cmdQ.get(); + + pageFaultManager->baseAllowCPUMemoryEviction(alloc, pageData); + EXPECT_EQ(pageFaultManager->allowCPUMemoryEvictionImplCalled, 1); + + auto allocData = svmAllocsManager->getSVMAlloc(alloc); + CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, &allocData->gpuAllocations, {}, cmdQ->getDevice().getRootDeviceIndex(), nullptr}; + auto &csr = cmdQ->selectCsrForBuiltinOperation(csrSelectionArgs); + EXPECT_EQ(pageFaultManager->engineType, csr.getOsContext().getEngineType()); + EXPECT_EQ(pageFaultManager->engineUsage, csr.getOsContext().getEngineUsage()); + + svmAllocsManager->freeSVMAlloc(alloc); + cmdQ->device = nullptr; +} diff --git a/shared/source/page_fault_manager/cpu_page_fault_manager.cpp b/shared/source/page_fault_manager/cpu_page_fault_manager.cpp index 262c6668c3..c9390c1a25 100644 --- a/shared/source/page_fault_manager/cpu_page_fault_manager.cpp +++ b/shared/source/page_fault_manager/cpu_page_fault_manager.cpp @@ -113,6 +113,7 @@ void PageFaultManager::transferAndUnprotectMemory(PageFaultManager *pageFaultHan pageFaultHandler->migrateStorageToCpuDomain(allocPtr, pageFaultData); pageFaultHandler->allowCPUMemoryAccess(allocPtr, pageFaultData.size); pageFaultHandler->setCpuAllocEvictable(true, allocPtr, pageFaultData.unifiedMemoryManager); + pageFaultHandler->allowCPUMemoryEviction(allocPtr, pageFaultData); } void PageFaultManager::unprotectAndTransferMemory(PageFaultManager *pageFaultHandler, void *allocPtr, PageFaultData &pageFaultData) { diff --git a/shared/source/page_fault_manager/cpu_page_fault_manager.h b/shared/source/page_fault_manager/cpu_page_fault_manager.h index 9f18dc783a..1a018d1e70 100644 --- a/shared/source/page_fault_manager/cpu_page_fault_manager.h +++ b/shared/source/page_fault_manager/cpu_page_fault_manager.h @@ -15,9 +15,11 @@ namespace NEO { struct MemoryProperties; +class CommandStreamReceiver; class GraphicsAllocation; class Device; class SVMAllocsManager; +class OSInterface; class PageFaultManager : public NonCopyableOrMovableClass { public: @@ -53,11 +55,13 @@ class PageFaultManager : public NonCopyableOrMovableClass { protected: virtual void evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) = 0; + virtual void allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) = 0; MOCKABLE_VIRTUAL bool verifyPageFault(void *ptr); MOCKABLE_VIRTUAL void transferToGpu(void *ptr, void *cmdQ); MOCKABLE_VIRTUAL void setAubWritable(bool writable, void *ptr, SVMAllocsManager *unifiedMemoryManager); MOCKABLE_VIRTUAL void setCpuAllocEvictable(bool evictable, void *ptr, SVMAllocsManager *unifiedMemoryManager); + MOCKABLE_VIRTUAL void allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData); static void transferAndUnprotectMemory(PageFaultManager *pageFaultHandler, void *alloc, PageFaultData &pageFaultData); static void unprotectAndTransferMemory(PageFaultManager *pageFaultHandler, void *alloc, PageFaultData &pageFaultData); diff --git a/shared/source/page_fault_manager/linux/cpu_page_fault_manager_linux.cpp b/shared/source/page_fault_manager/linux/cpu_page_fault_manager_linux.cpp index 274ea825f6..706ed8852a 100644 --- a/shared/source/page_fault_manager/linux/cpu_page_fault_manager_linux.cpp +++ b/shared/source/page_fault_manager/linux/cpu_page_fault_manager_linux.cpp @@ -84,6 +84,8 @@ void PageFaultManagerLinux::evictMemoryAfterImplCopy(GraphicsAllocation *allocat if (evictMemoryAfterCopy) { device->getRootDeviceEnvironment().memoryOperationsInterface->evict(device, *allocation); } -}; +} + +void PageFaultManagerLinux::allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) {} } // namespace NEO diff --git a/shared/source/page_fault_manager/linux/cpu_page_fault_manager_linux.h b/shared/source/page_fault_manager/linux/cpu_page_fault_manager_linux.h index a64e39b10b..ae770b81da 100644 --- a/shared/source/page_fault_manager/linux/cpu_page_fault_manager_linux.h +++ b/shared/source/page_fault_manager/linux/cpu_page_fault_manager_linux.h @@ -25,6 +25,7 @@ class PageFaultManagerLinux : public PageFaultManager { void protectCPUMemoryAccess(void *ptr, size_t size) override; void evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) override; + void allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) override; void callPreviousHandler(int signal, siginfo_t *info, void *context); bool previousHandlerRestored = false; diff --git a/shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.cpp b/shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.cpp index 6c375530be..d5e339846c 100644 --- a/shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.cpp +++ b/shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.cpp @@ -7,7 +7,12 @@ #include "shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.h" +#include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/device/device.h" #include "shared/source/helpers/debug_helpers.h" +#include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/source/os_interface/os_interface.h" +#include "shared/source/os_interface/windows/os_context_win.h" namespace NEO { std::unique_ptr PageFaultManager::create() { @@ -56,4 +61,16 @@ void PageFaultManagerWindows::protectCPUMemoryAccess(void *ptr, size_t size) { void PageFaultManagerWindows::evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) {} +void PageFaultManagerWindows::allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) { + NEO::SvmAllocationData *allocData = memoryData[ptr].unifiedMemoryManager->getSVMAlloc(ptr); + UNRECOVERABLE_IF(allocData == nullptr); + + if (osInterface) { + auto &residencyController = static_cast(&csr.getOsContext())->getResidencyController(); + + auto lock = residencyController.acquireLock(); + residencyController.addToTrimCandidateList(allocData->cpuAllocation); + } +} + } // namespace NEO diff --git a/shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.h b/shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.h index 3fe0723f0e..a5877105b9 100644 --- a/shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.h +++ b/shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.h @@ -26,6 +26,7 @@ class PageFaultManagerWindows : public PageFaultManager { void protectCPUMemoryAccess(void *ptr, size_t size) override; void evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) override; + void allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) override; static std::function pageFaultHandler; PVOID previousHandler; diff --git a/shared/test/common/mocks/mock_cpu_page_fault_manager.h b/shared/test/common/mocks/mock_cpu_page_fault_manager.h index ad618bb971..0031daaa41 100644 --- a/shared/test/common/mocks/mock_cpu_page_fault_manager.h +++ b/shared/test/common/mocks/mock_cpu_page_fault_manager.h @@ -7,6 +7,9 @@ #pragma once +#include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/helpers/engine_node_helper.h" +#include "shared/source/os_interface/os_context.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" using namespace NEO; @@ -48,6 +51,9 @@ class MockPageFaultManager : public PageFaultManager { setCpuAllocEvictableCalled++; isCpuAllocEvictable = evictable; } + void allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) override { + allowCPUMemoryEvictionCalled++; + } void baseAubWritable(bool writable, void *ptr, SVMAllocsManager *unifiedMemoryManager) { PageFaultManager::setAubWritable(writable, ptr, unifiedMemoryManager); } @@ -60,8 +66,17 @@ class MockPageFaultManager : public PageFaultManager { void baseCpuAllocEvictable(bool evictable, void *ptr, SVMAllocsManager *unifiedMemoryManager) { PageFaultManager::setCpuAllocEvictable(evictable, ptr, unifiedMemoryManager); } + void baseAllowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) { + PageFaultManager::allowCPUMemoryEviction(ptr, pageFaultData); + } void evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) override {} + void allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) override { + allowCPUMemoryEvictionImplCalled++; + engineType = csr.getOsContext().getEngineType(); + engineUsage = csr.getOsContext().getEngineUsage(); + } + void *getHwHandlerAddress() { return reinterpret_cast(PageFaultManager::transferAndUnprotectMemory); } @@ -80,6 +95,8 @@ class MockPageFaultManager : public PageFaultManager { int transferToGpuCalled = 0; int moveAllocationToGpuDomainCalled = 0; int setCpuAllocEvictableCalled = 0; + int allowCPUMemoryEvictionCalled = 0; + int allowCPUMemoryEvictionImplCalled = 0; void *transferToCpuAddress = nullptr; void *transferToGpuAddress = nullptr; void *allowedMemoryAccessAddress = nullptr; @@ -89,6 +106,8 @@ class MockPageFaultManager : public PageFaultManager { size_t protectedSize = 0; bool isAubWritable = true; bool isCpuAllocEvictable = true; + aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES; + EngineUsage engineUsage = EngineUsage::EngineUsageCount; }; template diff --git a/shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests.cpp b/shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests.cpp index f88a460cee..c229b01df6 100644 --- a/shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests.cpp +++ b/shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests.cpp @@ -649,12 +649,14 @@ TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenMigratedBetweenCpuAndGpu EXPECT_EQ(pageFaultManager->transferToGpuCalled, 0); EXPECT_EQ(pageFaultManager->transferToCpuCalled, 0); EXPECT_EQ(pageFaultManager->setCpuAllocEvictableCalled, 0); + EXPECT_EQ(pageFaultManager->allowCPUMemoryEvictionCalled, 0); EXPECT_EQ(pageFaultManager->memoryData.size(), 1u); EXPECT_EQ(pageFaultManager->isCpuAllocEvictable, 1); pageFaultManager->moveAllocationToGpuDomain(ptr); EXPECT_EQ(pageFaultManager->moveAllocationToGpuDomainCalled, 1); EXPECT_EQ(pageFaultManager->setCpuAllocEvictableCalled, 1); + EXPECT_EQ(pageFaultManager->allowCPUMemoryEvictionCalled, 0); EXPECT_EQ(pageFaultManager->transferToGpuCalled, 1); EXPECT_EQ(pageFaultManager->protectMemoryCalled, 1); EXPECT_EQ(pageFaultManager->isCpuAllocEvictable, 0); @@ -666,6 +668,7 @@ TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenMigratedBetweenCpuAndGpu EXPECT_EQ(pageFaultManager->transferToCpuCalled, 1); EXPECT_EQ(pageFaultManager->allowMemoryAccessCalled, 1); EXPECT_EQ(pageFaultManager->setCpuAllocEvictableCalled, 2); + EXPECT_EQ(pageFaultManager->allowCPUMemoryEvictionCalled, 1); EXPECT_EQ(pageFaultManager->allowedMemoryAccessAddress, ptr); EXPECT_EQ(pageFaultManager->accessAllowedSize, 10u); EXPECT_EQ(pageFaultManager->isCpuAllocEvictable, 1); diff --git a/shared/test/unit_test/ult_specific_config.cpp b/shared/test/unit_test/ult_specific_config.cpp index 279ddc86d0..7f002e081b 100644 --- a/shared/test/unit_test/ult_specific_config.cpp +++ b/shared/test/unit_test/ult_specific_config.cpp @@ -25,6 +25,8 @@ void PageFaultManager::transferToCpu(void *ptr, size_t size, void *cmdQ) { } void PageFaultManager::transferToGpu(void *ptr, void *cmdQ) { } +void PageFaultManager::allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) { +} CompilerCacheConfig getDefaultCompilerCacheConfig() { return {}; } const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin) { return nullptr; }