feature: Add CPU side USM allocation to trim candidate list on page fage fault

Enable eviction of CPU side USM allocation for UMD migrations on Windows.

Related-To: NEO-8015
Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir 2023-07-22 13:34:23 +00:00 committed by Compute-Runtime-Automation
parent 7fe3ca010e
commit 60a4448a07
13 changed files with 153 additions and 2 deletions

View File

@ -6,6 +6,8 @@
*/
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/page_fault_manager/cpu_page_fault_manager.h"
#include "level_zero/core/source/cmdlist/cmdlist.h"
@ -40,6 +42,20 @@ void PageFaultManager::transferToGpu(void *ptr, void *device) {
this->evictMemoryAfterImplCopy(allocData->cpuAllocation, deviceImp->getNEODevice());
}
void PageFaultManager::allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) {
L0::DeviceImp *deviceImp = static_cast<L0::DeviceImp *>(pageFaultData.cmdQ);
CommandStreamReceiver *csr = nullptr;
if (deviceImp->getActiveDevice()->getInternalCopyEngine()) {
csr = deviceImp->getActiveDevice()->getInternalCopyEngine()->commandStreamReceiver;
} else {
csr = deviceImp->getActiveDevice()->getInternalEngine().commandStreamReceiver;
}
UNRECOVERABLE_IF(csr == nullptr);
auto osInterface = deviceImp->getNEODevice()->getRootDeviceEnvironment().osInterface.get();
allowCPUMemoryEvictionImpl(ptr, *csr, osInterface);
}
} // namespace NEO
namespace L0 {

View File

@ -783,6 +783,43 @@ TEST_F(CommandListMemAdvisePageFault, givenInvalidPtrAndPageFaultHandlerAndGpuDo
ASSERT_EQ(res, ZE_RESULT_SUCCESS);
}
TEST_F(CommandListMemAdvisePageFault, givenUnifiedMemoryAllocWhenAllowCPUMemoryEvictionIsCalledThenSelectCorrectCsrWithOsContextForEviction) {
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
auto res = context->allocSharedMem(device->toHandle(),
&deviceDesc,
&hostDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_NE(nullptr, ptr);
L0::DeviceImp *deviceImp = static_cast<L0::DeviceImp *>((L0::Device::fromHandle(device)));
NEO::PageFaultManager::PageFaultData pageData;
pageData.cmdQ = deviceImp;
mockPageFaultManager->baseAllowCPUMemoryEviction(ptr, pageData);
EXPECT_EQ(mockPageFaultManager->allowCPUMemoryEvictionImplCalled, 1);
CommandStreamReceiver *csr = nullptr;
if (deviceImp->getActiveDevice()->getInternalCopyEngine()) {
csr = deviceImp->getActiveDevice()->getInternalCopyEngine()->commandStreamReceiver;
} else {
csr = deviceImp->getActiveDevice()->getInternalEngine().commandStreamReceiver;
}
ASSERT_NE(csr, nullptr);
EXPECT_EQ(mockPageFaultManager->engineType, csr->getOsContext().getEngineType());
EXPECT_EQ(mockPageFaultManager->engineUsage, csr->getOsContext().getEngineUsage());
res = context->freeMem(ptr);
ASSERT_EQ(res, ZE_RESULT_SUCCESS);
}
TEST_F(CommandListCreate, givenValidPtrThenAppendMemoryPrefetchReturnsSuccess) {
size_t size = 10;
size_t alignment = 1u;

View File

@ -5,11 +5,15 @@
*
*/
#include "shared/source/device/device.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/page_fault_manager/cpu_page_fault_manager.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/command_queue/csr_selection_args.h"
namespace NEO {
void PageFaultManager::transferToCpu(void *ptr, size_t size, void *cmdQ) {
@ -26,6 +30,19 @@ void PageFaultManager::transferToGpu(void *ptr, void *cmdQ) {
UNRECOVERABLE_IF(retVal);
auto allocData = memoryData[ptr].unifiedMemoryManager->getSVMAlloc(ptr);
UNRECOVERABLE_IF(allocData == nullptr);
this->evictMemoryAfterImplCopy(allocData->cpuAllocation, &commandQueue->getDevice());
}
void PageFaultManager::allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) {
auto commandQueue = static_cast<CommandQueue *>(pageFaultData.cmdQ);
auto allocData = memoryData[ptr].unifiedMemoryManager->getSVMAlloc(ptr);
UNRECOVERABLE_IF(allocData == nullptr);
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, &allocData->gpuAllocations, {}, commandQueue->getDevice().getRootDeviceIndex(), nullptr};
auto &csr = commandQueue->selectCsrForBuiltinOperation(csrSelectionArgs);
auto osInterface = commandQueue->getDevice().getRootDeviceEnvironment().osInterface.get();
allowCPUMemoryEvictionImpl(ptr, csr, osInterface);
}
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2022 Intel Corporation
* Copyright (C) 2019-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -105,3 +105,34 @@ TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenGpuTransferIsInvokedThen
svmAllocsManager->freeSVMAlloc(alloc);
cmdQ->device = nullptr;
}
TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenAllowCPUMemoryEvictionIsCalledThenSelectCorrectCsrWithOsContextForEviction) {
MockExecutionEnvironment executionEnvironment;
REQUIRE_SVM_OR_SKIP(executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo());
auto memoryManager = std::make_unique<MockMemoryManager>(executionEnvironment);
auto svmAllocsManager = std::make_unique<SVMAllocsManager>(memoryManager.get(), false);
auto device = std::unique_ptr<MockClDevice>(new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)});
auto rootDeviceIndex = device->getRootDeviceIndex();
RootDeviceIndicesContainer rootDeviceIndices = {rootDeviceIndex};
std::map<uint32_t, DeviceBitfield> deviceBitfields{{rootDeviceIndex, device->getDeviceBitfield()}};
void *alloc = svmAllocsManager->createSVMAlloc(256, {}, rootDeviceIndices, deviceBitfields);
auto cmdQ = std::make_unique<CommandQueueMock>();
cmdQ->device = device.get();
pageFaultManager->insertAllocation(alloc, 256, svmAllocsManager.get(), cmdQ.get(), {});
NEO::PageFaultManager::PageFaultData pageData;
pageData.cmdQ = cmdQ.get();
pageFaultManager->baseAllowCPUMemoryEviction(alloc, pageData);
EXPECT_EQ(pageFaultManager->allowCPUMemoryEvictionImplCalled, 1);
auto allocData = svmAllocsManager->getSVMAlloc(alloc);
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, &allocData->gpuAllocations, {}, cmdQ->getDevice().getRootDeviceIndex(), nullptr};
auto &csr = cmdQ->selectCsrForBuiltinOperation(csrSelectionArgs);
EXPECT_EQ(pageFaultManager->engineType, csr.getOsContext().getEngineType());
EXPECT_EQ(pageFaultManager->engineUsage, csr.getOsContext().getEngineUsage());
svmAllocsManager->freeSVMAlloc(alloc);
cmdQ->device = nullptr;
}

View File

@ -113,6 +113,7 @@ void PageFaultManager::transferAndUnprotectMemory(PageFaultManager *pageFaultHan
pageFaultHandler->migrateStorageToCpuDomain(allocPtr, pageFaultData);
pageFaultHandler->allowCPUMemoryAccess(allocPtr, pageFaultData.size);
pageFaultHandler->setCpuAllocEvictable(true, allocPtr, pageFaultData.unifiedMemoryManager);
pageFaultHandler->allowCPUMemoryEviction(allocPtr, pageFaultData);
}
void PageFaultManager::unprotectAndTransferMemory(PageFaultManager *pageFaultHandler, void *allocPtr, PageFaultData &pageFaultData) {

View File

@ -15,9 +15,11 @@
namespace NEO {
struct MemoryProperties;
class CommandStreamReceiver;
class GraphicsAllocation;
class Device;
class SVMAllocsManager;
class OSInterface;
class PageFaultManager : public NonCopyableOrMovableClass {
public:
@ -53,11 +55,13 @@ class PageFaultManager : public NonCopyableOrMovableClass {
protected:
virtual void evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) = 0;
virtual void allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) = 0;
MOCKABLE_VIRTUAL bool verifyPageFault(void *ptr);
MOCKABLE_VIRTUAL void transferToGpu(void *ptr, void *cmdQ);
MOCKABLE_VIRTUAL void setAubWritable(bool writable, void *ptr, SVMAllocsManager *unifiedMemoryManager);
MOCKABLE_VIRTUAL void setCpuAllocEvictable(bool evictable, void *ptr, SVMAllocsManager *unifiedMemoryManager);
MOCKABLE_VIRTUAL void allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData);
static void transferAndUnprotectMemory(PageFaultManager *pageFaultHandler, void *alloc, PageFaultData &pageFaultData);
static void unprotectAndTransferMemory(PageFaultManager *pageFaultHandler, void *alloc, PageFaultData &pageFaultData);

View File

@ -84,6 +84,8 @@ void PageFaultManagerLinux::evictMemoryAfterImplCopy(GraphicsAllocation *allocat
if (evictMemoryAfterCopy) {
device->getRootDeviceEnvironment().memoryOperationsInterface->evict(device, *allocation);
}
};
}
void PageFaultManagerLinux::allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) {}
} // namespace NEO

View File

@ -25,6 +25,7 @@ class PageFaultManagerLinux : public PageFaultManager {
void protectCPUMemoryAccess(void *ptr, size_t size) override;
void evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) override;
void allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) override;
void callPreviousHandler(int signal, siginfo_t *info, void *context);
bool previousHandlerRestored = false;

View File

@ -7,7 +7,12 @@
#include "shared/source/page_fault_manager/windows/cpu_page_fault_manager_windows.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/os_interface/windows/os_context_win.h"
namespace NEO {
std::unique_ptr<PageFaultManager> PageFaultManager::create() {
@ -56,4 +61,16 @@ void PageFaultManagerWindows::protectCPUMemoryAccess(void *ptr, size_t size) {
void PageFaultManagerWindows::evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) {}
void PageFaultManagerWindows::allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) {
NEO::SvmAllocationData *allocData = memoryData[ptr].unifiedMemoryManager->getSVMAlloc(ptr);
UNRECOVERABLE_IF(allocData == nullptr);
if (osInterface) {
auto &residencyController = static_cast<OsContextWin *>(&csr.getOsContext())->getResidencyController();
auto lock = residencyController.acquireLock();
residencyController.addToTrimCandidateList(allocData->cpuAllocation);
}
}
} // namespace NEO

View File

@ -26,6 +26,7 @@ class PageFaultManagerWindows : public PageFaultManager {
void protectCPUMemoryAccess(void *ptr, size_t size) override;
void evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) override;
void allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) override;
static std::function<LONG(struct _EXCEPTION_POINTERS *exceptionInfo)> pageFaultHandler;
PVOID previousHandler;

View File

@ -7,6 +7,9 @@
#pragma once
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/page_fault_manager/cpu_page_fault_manager.h"
using namespace NEO;
@ -48,6 +51,9 @@ class MockPageFaultManager : public PageFaultManager {
setCpuAllocEvictableCalled++;
isCpuAllocEvictable = evictable;
}
void allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) override {
allowCPUMemoryEvictionCalled++;
}
void baseAubWritable(bool writable, void *ptr, SVMAllocsManager *unifiedMemoryManager) {
PageFaultManager::setAubWritable(writable, ptr, unifiedMemoryManager);
}
@ -60,8 +66,17 @@ class MockPageFaultManager : public PageFaultManager {
void baseCpuAllocEvictable(bool evictable, void *ptr, SVMAllocsManager *unifiedMemoryManager) {
PageFaultManager::setCpuAllocEvictable(evictable, ptr, unifiedMemoryManager);
}
void baseAllowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) {
PageFaultManager::allowCPUMemoryEviction(ptr, pageFaultData);
}
void evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) override {}
void allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) override {
allowCPUMemoryEvictionImplCalled++;
engineType = csr.getOsContext().getEngineType();
engineUsage = csr.getOsContext().getEngineUsage();
}
void *getHwHandlerAddress() {
return reinterpret_cast<void *>(PageFaultManager::transferAndUnprotectMemory);
}
@ -80,6 +95,8 @@ class MockPageFaultManager : public PageFaultManager {
int transferToGpuCalled = 0;
int moveAllocationToGpuDomainCalled = 0;
int setCpuAllocEvictableCalled = 0;
int allowCPUMemoryEvictionCalled = 0;
int allowCPUMemoryEvictionImplCalled = 0;
void *transferToCpuAddress = nullptr;
void *transferToGpuAddress = nullptr;
void *allowedMemoryAccessAddress = nullptr;
@ -89,6 +106,8 @@ class MockPageFaultManager : public PageFaultManager {
size_t protectedSize = 0;
bool isAubWritable = true;
bool isCpuAllocEvictable = true;
aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES;
EngineUsage engineUsage = EngineUsage::EngineUsageCount;
};
template <class T>

View File

@ -649,12 +649,14 @@ TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenMigratedBetweenCpuAndGpu
EXPECT_EQ(pageFaultManager->transferToGpuCalled, 0);
EXPECT_EQ(pageFaultManager->transferToCpuCalled, 0);
EXPECT_EQ(pageFaultManager->setCpuAllocEvictableCalled, 0);
EXPECT_EQ(pageFaultManager->allowCPUMemoryEvictionCalled, 0);
EXPECT_EQ(pageFaultManager->memoryData.size(), 1u);
EXPECT_EQ(pageFaultManager->isCpuAllocEvictable, 1);
pageFaultManager->moveAllocationToGpuDomain(ptr);
EXPECT_EQ(pageFaultManager->moveAllocationToGpuDomainCalled, 1);
EXPECT_EQ(pageFaultManager->setCpuAllocEvictableCalled, 1);
EXPECT_EQ(pageFaultManager->allowCPUMemoryEvictionCalled, 0);
EXPECT_EQ(pageFaultManager->transferToGpuCalled, 1);
EXPECT_EQ(pageFaultManager->protectMemoryCalled, 1);
EXPECT_EQ(pageFaultManager->isCpuAllocEvictable, 0);
@ -666,6 +668,7 @@ TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenMigratedBetweenCpuAndGpu
EXPECT_EQ(pageFaultManager->transferToCpuCalled, 1);
EXPECT_EQ(pageFaultManager->allowMemoryAccessCalled, 1);
EXPECT_EQ(pageFaultManager->setCpuAllocEvictableCalled, 2);
EXPECT_EQ(pageFaultManager->allowCPUMemoryEvictionCalled, 1);
EXPECT_EQ(pageFaultManager->allowedMemoryAccessAddress, ptr);
EXPECT_EQ(pageFaultManager->accessAllowedSize, 10u);
EXPECT_EQ(pageFaultManager->isCpuAllocEvictable, 1);

View File

@ -25,6 +25,8 @@ void PageFaultManager::transferToCpu(void *ptr, size_t size, void *cmdQ) {
}
void PageFaultManager::transferToGpu(void *ptr, void *cmdQ) {
}
void PageFaultManager::allowCPUMemoryEviction(void *ptr, PageFaultData &pageFaultData) {
}
CompilerCacheConfig getDefaultCompilerCacheConfig() { return {}; }
const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin) { return nullptr; }