mirror of
https://github.com/intel/llvm.git
synced 2026-01-17 06:40:01 +08:00
adds huge pages support of PIE/no-PIE binaries
This patch adds the huge pages support (-hugify) for PIE/no-PIE binaries. Also returned functionality to support the kernels < 5.10 where there is a problem in a dynamic loader with the alignment of pages addresses. Differential Revision: https://reviews.llvm.org/D129107
This commit is contained in:
29
bolt/include/bolt/Passes/Hugify.h
Normal file
29
bolt/include/bolt/Passes/Hugify.h
Normal file
@@ -0,0 +1,29 @@
|
||||
//===- bolt/Passes/Hugify.h -------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef BOLT_PASSES_HUGIFY_H
|
||||
#define BOLT_PASSES_HUGIFY_H
|
||||
|
||||
#include "bolt/Passes/BinaryPasses.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class HugePage : public BinaryFunctionPass {
|
||||
public:
|
||||
HugePage(const cl::opt<bool> &PrintPass) : BinaryFunctionPass(PrintPass) {}
|
||||
|
||||
void runOnFunctions(BinaryContext &BC) override;
|
||||
|
||||
const char *getName() const override { return "HugePage"; }
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
@@ -22,13 +22,11 @@ class HugifyRuntimeLibrary : public RuntimeLibrary {
|
||||
public:
|
||||
/// Add custom section names generated by the runtime libraries to \p
|
||||
/// SecNames.
|
||||
void addRuntimeLibSections(std::vector<std::string> &SecNames) const final {
|
||||
SecNames.push_back(".bolt.hugify.entries");
|
||||
}
|
||||
void addRuntimeLibSections(std::vector<std::string> &SecNames) const final {}
|
||||
|
||||
void adjustCommandLineOptions(const BinaryContext &BC) const final;
|
||||
|
||||
void emitBinary(BinaryContext &BC, MCStreamer &Streamer) final;
|
||||
void emitBinary(BinaryContext &BC, MCStreamer &Streamer) final {}
|
||||
|
||||
void link(BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld,
|
||||
std::function<void(RuntimeDyld &)> OnLoad) final;
|
||||
|
||||
@@ -44,6 +44,7 @@ extern llvm::cl::opt<unsigned long long> HeatmapMinAddress;
|
||||
extern llvm::cl::opt<bool> HotData;
|
||||
extern llvm::cl::opt<bool> HotFunctionsAtEnd;
|
||||
extern llvm::cl::opt<bool> HotText;
|
||||
extern llvm::cl::opt<bool> Hugify;
|
||||
extern llvm::cl::opt<bool> Instrument;
|
||||
extern llvm::cl::opt<std::string> OutputFilename;
|
||||
extern llvm::cl::opt<std::string> PerfData;
|
||||
|
||||
@@ -15,6 +15,7 @@ add_llvm_library(LLVMBOLTPasses
|
||||
FrameOptimizer.cpp
|
||||
HFSort.cpp
|
||||
HFSortPlus.cpp
|
||||
Hugify.cpp
|
||||
IdenticalCodeFolding.cpp
|
||||
IndirectCallPromotion.cpp
|
||||
Inliner.cpp
|
||||
|
||||
50
bolt/lib/Passes/Hugify.cpp
Normal file
50
bolt/lib/Passes/Hugify.cpp
Normal file
@@ -0,0 +1,50 @@
|
||||
//===--- bolt/Passes/Hugify.cpp -------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "bolt/Passes/Hugify.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
#define DEBUG_TYPE "bolt-hugify"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
void HugePage::runOnFunctions(BinaryContext &BC) {
|
||||
auto *RtLibrary = BC.getRuntimeLibrary();
|
||||
if (!RtLibrary || !BC.isELF() || !BC.StartFunctionAddress) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto createSimpleFunction =
|
||||
[&](std::string Title, std::vector<MCInst> Instrs) -> BinaryFunction * {
|
||||
BinaryFunction *Func = BC.createInjectedBinaryFunction(Title);
|
||||
|
||||
std::vector<std::unique_ptr<BinaryBasicBlock>> BBs;
|
||||
BBs.emplace_back(Func->createBasicBlock(nullptr));
|
||||
BBs.back()->addInstructions(Instrs.begin(), Instrs.end());
|
||||
BBs.back()->setCFIState(0);
|
||||
BBs.back()->setOffset(BinaryBasicBlock::INVALID_OFFSET);
|
||||
|
||||
Func->insertBasicBlocks(nullptr, std::move(BBs),
|
||||
/*UpdateLayout=*/true,
|
||||
/*UpdateCFIState=*/false);
|
||||
Func->updateState(BinaryFunction::State::CFG_Finalized);
|
||||
return Func;
|
||||
};
|
||||
|
||||
const BinaryFunction *const Start =
|
||||
BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
|
||||
assert(Start && "Entry point function not found");
|
||||
const MCSymbol *StartSym = Start->getSymbol();
|
||||
createSimpleFunction("__bolt_hugify_start_program",
|
||||
BC.MIB->createSymbolTrampoline(StartSym, BC.Ctx.get()));
|
||||
}
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "bolt/Passes/AsmDump.h"
|
||||
#include "bolt/Passes/CMOVConversion.h"
|
||||
#include "bolt/Passes/FrameOptimizer.h"
|
||||
#include "bolt/Passes/Hugify.h"
|
||||
#include "bolt/Passes/IdenticalCodeFolding.h"
|
||||
#include "bolt/Passes/IndirectCallPromotion.h"
|
||||
#include "bolt/Passes/Inliner.h"
|
||||
@@ -333,6 +334,8 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
|
||||
|
||||
if (opts::Instrument)
|
||||
Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
|
||||
else if (opts::Hugify)
|
||||
Manager.registerPass(std::make_unique<HugePage>(NeverPrint));
|
||||
|
||||
Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
|
||||
|
||||
|
||||
@@ -479,6 +479,11 @@ Error RewriteInstance::discoverStorage() {
|
||||
NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign);
|
||||
NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign);
|
||||
|
||||
// Hugify: Additional huge page from left side due to
|
||||
// weird ASLR mapping addresses (4KB aligned)
|
||||
if (opts::Hugify && !BC->HasFixedLoadAddress)
|
||||
NextAvailableAddress += BC->PageAlign;
|
||||
|
||||
if (!opts::UseGnuStack) {
|
||||
// This is where the black magic happens. Creating PHDR table in a segment
|
||||
// other than that containing ELF header is tricky. Some loaders and/or
|
||||
@@ -3719,6 +3724,12 @@ void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) {
|
||||
Address = alignTo(Address, Section->getAlignment());
|
||||
Section->setOutputAddress(Address);
|
||||
Address += Section->getOutputSize();
|
||||
|
||||
// Hugify: Additional huge page from right side due to
|
||||
// weird ASLR mapping addresses (4KB aligned)
|
||||
if (opts::Hugify && !BC->HasFixedLoadAddress &&
|
||||
Section->getName() == BC->getMainCodeSectionName())
|
||||
Address = alignTo(Address, Section->getAlignment());
|
||||
}
|
||||
|
||||
// Make sure we allocate enough space for huge pages.
|
||||
|
||||
@@ -60,35 +60,6 @@ void HugifyRuntimeLibrary::adjustCommandLineOptions(
|
||||
}
|
||||
}
|
||||
|
||||
void HugifyRuntimeLibrary::emitBinary(BinaryContext &BC, MCStreamer &Streamer) {
|
||||
const BinaryFunction *StartFunction =
|
||||
BC.getBinaryFunctionAtAddress(*(BC.StartFunctionAddress));
|
||||
assert(!StartFunction->isFragment() && "expected main function fragment");
|
||||
if (!StartFunction) {
|
||||
errs() << "BOLT-ERROR: failed to locate function at binary start address\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
const auto Flags = BinarySection::getFlags(/*IsReadOnly=*/false,
|
||||
/*IsText=*/false,
|
||||
/*IsAllocatable=*/true);
|
||||
MCSectionELF *Section =
|
||||
BC.Ctx->getELFSection(".bolt.hugify.entries", ELF::SHT_PROGBITS, Flags);
|
||||
|
||||
// __bolt_hugify_init_ptr stores the poiter the hugify library needs to
|
||||
// jump to after finishing the init code.
|
||||
MCSymbol *InitPtr = BC.Ctx->getOrCreateSymbol("__bolt_hugify_init_ptr");
|
||||
|
||||
Section->setAlignment(llvm::Align(BC.RegularPageSize));
|
||||
Streamer.switchSection(Section);
|
||||
|
||||
Streamer.emitLabel(InitPtr);
|
||||
Streamer.emitSymbolAttribute(InitPtr, MCSymbolAttr::MCSA_Global);
|
||||
Streamer.emitValue(
|
||||
MCSymbolRefExpr::create(StartFunction->getSymbol(), *(BC.Ctx)),
|
||||
/*Size=*/8);
|
||||
}
|
||||
|
||||
void HugifyRuntimeLibrary::link(BinaryContext &BC, StringRef ToolPath,
|
||||
RuntimeDyld &RTDyld,
|
||||
std::function<void(RuntimeDyld &)> OnLoad) {
|
||||
|
||||
@@ -27,10 +27,11 @@ set(BOLT_RT_FLAGS
|
||||
-fno-exceptions
|
||||
-fno-rtti
|
||||
-fno-stack-protector
|
||||
-mno-sse)
|
||||
-mno-sse
|
||||
-fPIE)
|
||||
|
||||
# Don't let the compiler think it can create calls to standard libs
|
||||
target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE)
|
||||
target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS})
|
||||
target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS})
|
||||
target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
@@ -283,6 +283,22 @@ uint32_t strLen(const char *Str) {
|
||||
return Size;
|
||||
}
|
||||
|
||||
void *strStr(const char *const Haystack, const char *const Needle) {
|
||||
int j = 0;
|
||||
|
||||
for (int i = 0; i < strLen(Haystack); i++) {
|
||||
if (Haystack[i] == Needle[0]) {
|
||||
for (j = 1; j < strLen(Needle); j++) {
|
||||
if (Haystack[i + j] != Needle[j])
|
||||
break;
|
||||
}
|
||||
if (j == strLen(Needle))
|
||||
return (void *)&Haystack[i];
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void reportNumber(const char *Msg, uint64_t Num, uint32_t Base) {
|
||||
char Buf[BufSize];
|
||||
char *Ptr = Buf;
|
||||
@@ -310,6 +326,25 @@ unsigned long hexToLong(const char *Str, char Terminator = '\0') {
|
||||
return Res;
|
||||
}
|
||||
|
||||
/// Starting from character at \p buf, find the longest consecutive sequence
|
||||
/// of digits (0-9) and convert it to uint32_t. The converted value
|
||||
/// is put into \p ret. \p end marks the end of the buffer to avoid buffer
|
||||
/// overflow. The function \returns whether a valid uint32_t value is found.
|
||||
/// \p buf will be updated to the next character right after the digits.
|
||||
static bool scanUInt32(const char *&Buf, const char *End, uint32_t &Ret) {
|
||||
uint64_t Result = 0;
|
||||
const char *OldBuf = Buf;
|
||||
while (Buf < End && ((*Buf) >= '0' && (*Buf) <= '9')) {
|
||||
Result = Result * 10 + (*Buf) - '0';
|
||||
++Buf;
|
||||
}
|
||||
if (OldBuf != Buf && Result <= 0xFFFFFFFFu) {
|
||||
Ret = static_cast<uint32_t>(Result);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#if !defined(__APPLE__)
|
||||
// We use a stack-allocated buffer for string manipulation in many pieces of
|
||||
// this code, including the code that prints each line of the fdata file. This
|
||||
@@ -387,6 +422,28 @@ int __madvise(void *addr, size_t length, int advice) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define _UTSNAME_LENGTH 65
|
||||
|
||||
struct UtsNameTy {
|
||||
char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */
|
||||
char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined
|
||||
network" */
|
||||
char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */
|
||||
char version[_UTSNAME_LENGTH]; /* Operating system version */
|
||||
char machine[_UTSNAME_LENGTH]; /* Hardware identifier */
|
||||
char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */
|
||||
};
|
||||
|
||||
int __uname(struct UtsNameTy *Buf) {
|
||||
int Ret;
|
||||
__asm__ __volatile__("movq $63, %%rax\n"
|
||||
"syscall\n"
|
||||
: "=a"(Ret)
|
||||
: "D"(Buf)
|
||||
: "cc", "rcx", "r11", "memory");
|
||||
return Ret;
|
||||
}
|
||||
|
||||
struct timespec {
|
||||
uint64_t tv_sec; /* seconds */
|
||||
uint64_t tv_nsec; /* nanoseconds */
|
||||
@@ -482,6 +539,23 @@ int __fsync(int fd) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// %rdi %rsi %rdx %r10 %r8
|
||||
// sys_prctl int option unsigned unsigned unsigned unsigned
|
||||
// long arg2 long arg3 long arg4 long arg5
|
||||
int __prctl(int Option, unsigned long Arg2, unsigned long Arg3,
|
||||
unsigned long Arg4, unsigned long Arg5) {
|
||||
int Ret;
|
||||
register long rdx asm("rdx") = Arg3;
|
||||
register long r8 asm("r8") = Arg5;
|
||||
register long r10 asm("r10") = Arg4;
|
||||
__asm__ __volatile__("movq $157, %%rax\n"
|
||||
"syscall\n"
|
||||
: "=a"(Ret)
|
||||
: "D"(Option), "S"(Arg2), "d"(rdx), "r"(r10), "r"(r8)
|
||||
:);
|
||||
return Ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void reportError(const char *Msg, uint64_t Size) {
|
||||
|
||||
@@ -1,129 +1,179 @@
|
||||
//===- bolt/runtime/hugify.cpp --------------------------------------------===//
|
||||
//===- bolt/runtime/hugify.cpp -------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
#if defined (__x86_64__)
|
||||
#if !defined(__APPLE__)
|
||||
|
||||
#include "common.h"
|
||||
#include <sys/mman.h>
|
||||
|
||||
#pragma GCC visibility push(hidden)
|
||||
|
||||
// Enables a very verbose logging to stderr useful when debugging
|
||||
//#define ENABLE_DEBUG
|
||||
// #define ENABLE_DEBUG
|
||||
|
||||
// Function pointers to init routines in the binary, so we can resume
|
||||
// regular execution of the function that we hooked.
|
||||
extern void (*__bolt_hugify_init_ptr)();
|
||||
#ifdef ENABLE_DEBUG
|
||||
#define DEBUG(X) \
|
||||
{ X; }
|
||||
#else
|
||||
#define DEBUG(X) \
|
||||
{}
|
||||
#endif
|
||||
|
||||
// Function constains trampoline to _start,
|
||||
// so we can resume regular execution of the function that we hooked.
|
||||
extern void __bolt_hugify_start_program();
|
||||
|
||||
// The __hot_start and __hot_end symbols set by Bolt. We use them to figure
|
||||
// out the rage for marking huge pages.
|
||||
extern uint64_t __hot_start;
|
||||
extern uint64_t __hot_end;
|
||||
|
||||
#ifdef MADV_HUGEPAGE
|
||||
/// Check whether the kernel supports THP via corresponding sysfs entry.
|
||||
static bool has_pagecache_thp_support() {
|
||||
char buf[256] = {0};
|
||||
const char *madviseStr = "always [madvise] never";
|
||||
static void getKernelVersion(uint32_t *Val) {
|
||||
// release should be in the format: %d.%d.%d
|
||||
// major, minor, release
|
||||
struct UtsNameTy UtsName;
|
||||
int Ret = __uname(&UtsName);
|
||||
const char *Buf = UtsName.release;
|
||||
const char *End = Buf + strLen(Buf);
|
||||
const char Delims[2][2] = {".", "."};
|
||||
|
||||
int fd = __open("/sys/kernel/mm/transparent_hugepage/enabled",
|
||||
0 /* O_RDONLY */, 0);
|
||||
if (fd < 0)
|
||||
return false;
|
||||
|
||||
size_t res = __read(fd, buf, 256);
|
||||
if (res < 0)
|
||||
return false;
|
||||
|
||||
int cmp = strnCmp(buf, madviseStr, strLen(madviseStr));
|
||||
return cmp == 0;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (!scanUInt32(Buf, End, Val[i])) {
|
||||
return;
|
||||
}
|
||||
if (i < sizeof(Delims) / sizeof(Delims[0])) {
|
||||
const char *Ptr = Delims[i];
|
||||
while (*Ptr != '\0') {
|
||||
if (*Ptr != *Buf) {
|
||||
return;
|
||||
}
|
||||
++Ptr;
|
||||
++Buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void hugify_for_old_kernel(uint8_t *from, uint8_t *to) {
|
||||
size_t size = to - from;
|
||||
/// Check whether the kernel supports THP via corresponding sysfs entry.
|
||||
/// thp works only starting from 5.10
|
||||
static bool hasPagecacheTHPSupport() {
|
||||
char Buf[64];
|
||||
|
||||
uint8_t *mem = reinterpret_cast<uint8_t *>(
|
||||
__mmap(0, size, 0x3 /* PROT_READ | PROT_WRITE*/,
|
||||
0x22 /* MAP_PRIVATE | MAP_ANONYMOUS*/, -1, 0));
|
||||
int FD = __open("/sys/kernel/mm/transparent_hugepage/enabled",
|
||||
0 /* O_RDONLY */, 0);
|
||||
if (FD < 0)
|
||||
return false;
|
||||
|
||||
if (mem == (void *)MAP_FAILED) {
|
||||
char msg[] = "Could not allocate memory for text move\n";
|
||||
reportError(msg, sizeof(msg));
|
||||
memset(Buf, 0, sizeof(Buf));
|
||||
const size_t Res = __read(FD, Buf, sizeof(Buf));
|
||||
if (Res < 0)
|
||||
return false;
|
||||
|
||||
if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]"))
|
||||
return false;
|
||||
|
||||
struct KernelVersionTy {
|
||||
uint32_t major;
|
||||
uint32_t minor;
|
||||
uint32_t release;
|
||||
};
|
||||
|
||||
KernelVersionTy KernelVersion;
|
||||
|
||||
getKernelVersion((uint32_t *)&KernelVersion);
|
||||
if (KernelVersion.major >= 5 && KernelVersion.minor >= 10)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void hugifyForOldKernel(uint8_t *From, uint8_t *To) {
|
||||
const size_t Size = To - From;
|
||||
|
||||
uint8_t *Mem = reinterpret_cast<uint8_t *>(
|
||||
__mmap(0, Size, 0x3 /* PROT_READ | PROT_WRITE */,
|
||||
0x22 /* MAP_PRIVATE | MAP_ANONYMOUS */, -1, 0));
|
||||
|
||||
if (Mem == ((void *)-1) /* MAP_FAILED */) {
|
||||
char Msg[] = "[hugify] could not allocate memory for text move\n";
|
||||
reportError(Msg, sizeof(Msg));
|
||||
}
|
||||
#ifdef ENABLE_DEBUG
|
||||
reportNumber("Allocated temporary space: ", (uint64_t)mem, 16);
|
||||
#endif
|
||||
|
||||
// Copy the hot code to a temproary location.
|
||||
memcpy(mem, from, size);
|
||||
DEBUG(reportNumber("[hugify] allocated temporary address: ", (uint64_t)Mem,
|
||||
16);)
|
||||
DEBUG(reportNumber("[hugify] allocated size: ", (uint64_t)Size, 16);)
|
||||
|
||||
// Copy the hot code to a temporary location.
|
||||
memcpy(Mem, From, Size);
|
||||
|
||||
__prctl(41 /* PR_SET_THP_DISABLE */, 0, 0, 0, 0);
|
||||
// Maps out the existing hot code.
|
||||
if (__mmap(reinterpret_cast<uint64_t>(from), size,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1,
|
||||
0) == (void *)MAP_FAILED) {
|
||||
char msg[] = "failed to mmap memory for large page move terminating\n";
|
||||
reportError(msg, sizeof(msg));
|
||||
if (__mmap(reinterpret_cast<uint64_t>(From), Size,
|
||||
0x3 /* PROT_READ | PROT_WRITE */,
|
||||
0x32 /* MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE */, -1,
|
||||
0) == ((void *)-1) /*MAP_FAILED*/) {
|
||||
char Msg[] =
|
||||
"[hugify] failed to mmap memory for large page move terminating\n";
|
||||
reportError(Msg, sizeof(Msg));
|
||||
}
|
||||
|
||||
// Mark the hot code page to be huge page.
|
||||
if (__madvise(from, size, MADV_HUGEPAGE) == -1) {
|
||||
char msg[] = "failed to allocate large page\n";
|
||||
reportError(msg, sizeof(msg));
|
||||
if (__madvise(From, Size, 14 /* MADV_HUGEPAGE */) == -1) {
|
||||
char Msg[] = "[hugify] setting MADV_HUGEPAGE is failed\n";
|
||||
reportError(Msg, sizeof(Msg));
|
||||
}
|
||||
|
||||
// Copy the hot code back.
|
||||
memcpy(from, mem, size);
|
||||
memcpy(From, Mem, Size);
|
||||
|
||||
// Change permission back to read-only, ignore failure
|
||||
__mprotect(from, size, PROT_READ | PROT_EXEC);
|
||||
__mprotect(From, Size, 0x5 /* PROT_READ | PROT_EXEC */);
|
||||
|
||||
__munmap(mem, size);
|
||||
__munmap(Mem, Size);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" void __bolt_hugify_self_impl() {
|
||||
#ifdef MADV_HUGEPAGE
|
||||
uint8_t *hotStart = (uint8_t *)&__hot_start;
|
||||
uint8_t *hotEnd = (uint8_t *)&__hot_end;
|
||||
uint8_t *HotStart = (uint8_t *)&__hot_start;
|
||||
uint8_t *HotEnd = (uint8_t *)&__hot_end;
|
||||
// Make sure the start and end are aligned with huge page address
|
||||
const size_t hugePageBytes = 2L * 1024 * 1024;
|
||||
uint8_t *from = hotStart - ((intptr_t)hotStart & (hugePageBytes - 1));
|
||||
uint8_t *to = hotEnd + (hugePageBytes - 1);
|
||||
to -= (intptr_t)to & (hugePageBytes - 1);
|
||||
const size_t HugePageBytes = 2L * 1024 * 1024;
|
||||
uint8_t *From = HotStart - ((intptr_t)HotStart & (HugePageBytes - 1));
|
||||
uint8_t *To = HotEnd + (HugePageBytes - 1);
|
||||
To -= (intptr_t)To & (HugePageBytes - 1);
|
||||
|
||||
#ifdef ENABLE_DEBUG
|
||||
reportNumber("[hugify] hot start: ", (uint64_t)hotStart, 16);
|
||||
reportNumber("[hugify] hot end: ", (uint64_t)hotEnd, 16);
|
||||
reportNumber("[hugify] aligned huge page from: ", (uint64_t)from, 16);
|
||||
reportNumber("[hugify] aligned huge page to: ", (uint64_t)to, 16);
|
||||
#endif
|
||||
DEBUG(reportNumber("[hugify] hot start: ", (uint64_t)HotStart, 16);)
|
||||
DEBUG(reportNumber("[hugify] hot end: ", (uint64_t)HotEnd, 16);)
|
||||
DEBUG(reportNumber("[hugify] aligned huge page from: ", (uint64_t)From, 16);)
|
||||
DEBUG(reportNumber("[hugify] aligned huge page to: ", (uint64_t)To, 16);)
|
||||
|
||||
if (!has_pagecache_thp_support()) {
|
||||
hugify_for_old_kernel(from, to);
|
||||
if (!hasPagecacheTHPSupport()) {
|
||||
DEBUG(report(
|
||||
"[hugify] workaround with memory alignment for kernel < 5.10\n");)
|
||||
hugifyForOldKernel(From, To);
|
||||
return;
|
||||
}
|
||||
|
||||
if (__madvise(from, (to - from), MADV_HUGEPAGE) == -1) {
|
||||
char msg[] = "failed to allocate large page\n";
|
||||
if (__madvise(From, (To - From), 14 /* MADV_HUGEPAGE */) == -1) {
|
||||
char Msg[] = "[hugify] failed to allocate large page\n";
|
||||
// TODO: allow user to control the failure behavior.
|
||||
reportError(msg, sizeof(msg));
|
||||
reportError(Msg, sizeof(Msg));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/// This is hooking ELF's entry, it needs to save all machine state.
|
||||
extern "C" __attribute((naked)) void __bolt_hugify_self() {
|
||||
__asm__ __volatile__(SAVE_ALL
|
||||
"call __bolt_hugify_self_impl\n"
|
||||
RESTORE_ALL
|
||||
"jmp *__bolt_hugify_init_ptr(%%rip)\n"
|
||||
:::);
|
||||
#if defined(__x86_64__)
|
||||
__asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL
|
||||
"jmp __bolt_hugify_start_program\n" ::
|
||||
:);
|
||||
#else
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
27
bolt/test/runtime/X86/hugify.c
Normal file
27
bolt/test/runtime/X86/hugify.c
Normal file
@@ -0,0 +1,27 @@
|
||||
// Make sure BOLT correctly processes --hugify option
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
printf("Hello world\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
REQUIRES: system-linux,bolt-runtime
|
||||
|
||||
RUN: %clang %cflags -no-pie %s -o %t.nopie.exe -Wl,-q
|
||||
RUN: %clang %cflags -fpic -pie %s -o %t.pie.exe -Wl,-q
|
||||
|
||||
RUN: llvm-bolt %t.nopie.exe --lite=0 -o %t.nopie --hugify
|
||||
RUN: llvm-bolt %t.pie.exe --lite=0 -o %t.pie --hugify
|
||||
|
||||
RUN: %t.nopie | FileCheck %s -check-prefix=CHECK-NOPIE
|
||||
|
||||
CHECK-NOPIE: Hello world
|
||||
|
||||
RUN: %t.pie | FileCheck %s -check-prefix=CHECK-PIE
|
||||
|
||||
CHECK-PIE: Hello world
|
||||
|
||||
*/
|
||||
Reference in New Issue
Block a user