feature: capture multiple cpu pagefault handler

Recorded multiple page fault handlers by using vector in
cpu_page_fault_manager_linux.

Added a static handlerIndex in order to track the depth of
handler logic to call appropriate previous handlers.

Related-To: NEO-11563
Signed-off-by: Young Jin Yoon <young.jin.yoon@intel.com>
This commit is contained in:
Young Jin Yoon
2024-09-02 16:46:37 +00:00
committed by Compute-Runtime-Automation
parent 8e5e3d1e6c
commit 4b3a6e9cfe
11 changed files with 317 additions and 35 deletions

View File

@@ -95,14 +95,16 @@ inline void PageFaultManager::migrateStorageToGpuDomain(void *ptr, PageFaultData
pageFaultData.domain = AllocationDomain::gpu;
}
bool PageFaultManager::verifyPageFault(void *ptr) {
bool PageFaultManager::verifyAndHandlePageFault(void *ptr, bool handlePageFault) {
std::unique_lock<SpinLock> lock{mtx};
for (auto &alloc : this->memoryData) {
auto allocPtr = alloc.first;
auto &pageFaultData = alloc.second;
if (ptr >= allocPtr && ptr < ptrOffset(allocPtr, pageFaultData.size)) {
this->setAubWritable(true, allocPtr, pageFaultData.unifiedMemoryManager);
gpuDomainHandler(this, allocPtr, pageFaultData);
if (handlePageFault) {
this->setAubWritable(true, allocPtr, pageFaultData.unifiedMemoryManager);
gpuDomainHandler(this, allocPtr, pageFaultData);
}
return true;
}
}

View File

@@ -59,7 +59,7 @@ class PageFaultManager : public NonCopyableOrMovableClass {
virtual void evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) = 0;
virtual void allowCPUMemoryEvictionImpl(void *ptr, CommandStreamReceiver &csr, OSInterface *osInterface) = 0;
MOCKABLE_VIRTUAL bool verifyPageFault(void *ptr);
MOCKABLE_VIRTUAL bool verifyAndHandlePageFault(void *ptr, bool handlePageFault);
MOCKABLE_VIRTUAL void transferToGpu(void *ptr, void *cmdQ);
MOCKABLE_VIRTUAL void setAubWritable(bool writable, void *ptr, SVMAllocsManager *unifiedMemoryManager);
MOCKABLE_VIRTUAL void setCpuAllocEvictable(bool evictable, void *ptr, SVMAllocsManager *unifiedMemoryManager);

View File

@@ -13,6 +13,7 @@
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/memory_manager/memory_operations_handler.h"
#include <algorithm>
#include <sys/mman.h>
namespace NEO {
@@ -35,8 +36,9 @@ PageFaultManagerLinux::PageFaultManagerLinux() {
PageFaultManagerLinux::~PageFaultManagerLinux() {
if (!previousHandlerRestored) {
auto retVal = sigaction(SIGSEGV, &previousPageFaultHandler, nullptr);
auto retVal = sigaction(SIGSEGV, &previousPageFaultHandlers[0], nullptr);
UNRECOVERABLE_IF(retVal != 0);
previousPageFaultHandlers.clear();
}
}
@@ -47,8 +49,21 @@ bool PageFaultManagerLinux::checkFaultHandlerFromPageFaultManager() {
}
void PageFaultManagerLinux::registerFaultHandler() {
struct sigaction previousPageFaultHandler = {};
auto retVal = sigaction(SIGSEGV, nullptr, &previousPageFaultHandler);
UNRECOVERABLE_IF(retVal != 0);
auto compareHandler = [&ph = previousPageFaultHandler](const struct sigaction &h) -> bool {
return (h.sa_flags & SA_SIGINFO) ? (h.sa_sigaction == ph.sa_sigaction) : (h.sa_handler == ph.sa_handler);
};
if (std::find_if(previousPageFaultHandlers.begin(),
previousPageFaultHandlers.end(),
compareHandler) == previousPageFaultHandlers.end()) {
previousPageFaultHandlers.push_back(previousPageFaultHandler);
}
pageFaultHandler = [&](int signal, siginfo_t *info, void *context) {
if (!this->verifyPageFault(info->si_addr)) {
if (!this->verifyAndHandlePageFault(info->si_addr, this->handlerIndex == 0)) {
callPreviousHandler(signal, info, context);
}
};
@@ -57,7 +72,7 @@ void PageFaultManagerLinux::registerFaultHandler() {
pageFaultManagerHandler.sa_flags = SA_SIGINFO;
pageFaultManagerHandler.sa_sigaction = pageFaultHandlerWrapper;
auto retVal = sigaction(SIGSEGV, &pageFaultManagerHandler, &previousPageFaultHandler);
retVal = sigaction(SIGSEGV, &pageFaultManagerHandler, &previousPageFaultHandler);
UNRECOVERABLE_IF(retVal != 0);
}
@@ -76,6 +91,9 @@ void PageFaultManagerLinux::protectCPUMemoryAccess(void *ptr, size_t size) {
}
void PageFaultManagerLinux::callPreviousHandler(int signal, siginfo_t *info, void *context) {
handlerIndex++;
UNRECOVERABLE_IF(handlerIndex < 0 && handlerIndex >= static_cast<int>(previousPageFaultHandlers.size()));
auto previousPageFaultHandler = previousPageFaultHandlers[previousPageFaultHandlers.size() - handlerIndex];
if (previousPageFaultHandler.sa_flags & SA_SIGINFO) {
previousPageFaultHandler.sa_sigaction(signal, info, context);
} else {
@@ -83,12 +101,12 @@ void PageFaultManagerLinux::callPreviousHandler(int signal, siginfo_t *info, voi
auto retVal = sigaction(SIGSEGV, &previousPageFaultHandler, nullptr);
UNRECOVERABLE_IF(retVal != 0);
previousHandlerRestored = true;
} else if (previousPageFaultHandler.sa_handler == SIG_IGN) {
return;
} else {
previousPageFaultHandlers.clear();
} else if (previousPageFaultHandler.sa_handler != SIG_IGN) {
previousPageFaultHandler.sa_handler(signal);
}
}
handlerIndex--;
}
void PageFaultManagerLinux::evictMemoryAfterImplCopy(GraphicsAllocation *allocation, Device *device) {

View File

@@ -11,6 +11,7 @@
#include <csignal>
#include <functional>
#include <vector>
namespace NEO {
class PageFaultManagerLinux : public PageFaultManager {
@@ -35,8 +36,9 @@ class PageFaultManagerLinux : public PageFaultManager {
static std::function<void(int signal, siginfo_t *info, void *context)> pageFaultHandler;
struct sigaction previousPageFaultHandler = {};
std::vector<struct sigaction> previousPageFaultHandlers;
bool evictMemoryAfterCopy = false;
int handlerIndex = 0;
};
} // namespace NEO

View File

@@ -39,7 +39,7 @@ bool PageFaultManagerWindows::checkFaultHandlerFromPageFaultManager() {
void PageFaultManagerWindows::registerFaultHandler() {
pageFaultHandler = [this](struct _EXCEPTION_POINTERS *exceptionInfo) {
if (static_cast<long>(exceptionInfo->ExceptionRecord->ExceptionCode) == EXCEPTION_ACCESS_VIOLATION) {
if (this->verifyPageFault(reinterpret_cast<void *>(exceptionInfo->ExceptionRecord->ExceptionInformation[1]))) {
if (this->verifyAndHandlePageFault(reinterpret_cast<void *>(exceptionInfo->ExceptionRecord->ExceptionInformation[1]), true)) {
// this is our fault that we serviced, continue app execution
return EXCEPTION_CONTINUE_EXECUTION;
}