diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h index 2be89f757d30..2a312120d32e 100644 --- a/bolt/runtime/common.h +++ b/bolt/runtime/common.h @@ -492,6 +492,27 @@ public: } }; +/// RAII wrapper for Mutex +class TryLock { + Mutex &M; + bool Locked = false; + +public: + TryLock(Mutex &M) : M(M) { + int Retry = 100; + while (--Retry && !M.acquire()) + ; + if (Retry) + Locked = true; + } + bool isLocked() { return Locked; } + + ~TryLock() { + if (isLocked()) + M.release(); + } +}; + inline uint64_t alignTo(uint64_t Value, uint64_t Align) { return (Value + Align - 1) / Align * Align; } diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp index c5374c17c316..7ff34409062f 100644 --- a/bolt/runtime/instr.cpp +++ b/bolt/runtime/instr.cpp @@ -240,6 +240,9 @@ void operator delete(void *Ptr, BumpPtrAllocator &A) { A.deallocate(Ptr); } namespace { +// Disable instrumentation optimizations that sacrifice profile accuracy +extern "C" bool __bolt_instr_conservative; + /// Basic key-val atom stored in our hash struct SimpleHashTableEntryBase { uint64_t Key; @@ -272,10 +275,14 @@ public: /// avoid storing a pointer to it as part of this table (remember there is one /// hash for each indirect call site, so we wan't to minimize our footprint). MapEntry &get(uint64_t Key, BumpPtrAllocator &Alloc) { + if (!__bolt_instr_conservative) { + TryLock L(M); + if (!L.isLocked()) + return NoEntry; + return getOrAllocEntry(Key, Alloc); + } Lock L(M); - if (TableRoot) - return getEntry(TableRoot, Key, Key, Alloc, 0); - return firstAllocation(Key, Alloc); + return getOrAllocEntry(Key, Alloc); } /// Traverses all elements in the table @@ -293,6 +300,7 @@ private: constexpr static uint64_t FollowUpTableMarker = 0x8000000000000000ull; MapEntry *TableRoot{nullptr}; + MapEntry NoEntry; Mutex M; template @@ -355,6 +363,12 @@ private: Entry.Key = reinterpret_cast(NextLevelTbl) | FollowUpTableMarker; return getEntry(NextLevelTbl, Key, Remainder, Alloc, CurLevel + 1); } + + MapEntry &getOrAllocEntry(uint64_t Key, BumpPtrAllocator &Alloc) { + if (TableRoot) + return getEntry(TableRoot, Key, Key, Alloc, 0); + return firstAllocation(Key, Alloc); + } }; template void resetIndCallCounter(T &Entry) { diff --git a/bolt/src/Passes/Instrumentation.cpp b/bolt/src/Passes/Instrumentation.cpp index b116ce5cecc9..5531b216a85b 100644 --- a/bolt/src/Passes/Instrumentation.cpp +++ b/bolt/src/Passes/Instrumentation.cpp @@ -43,9 +43,8 @@ cl::opt InstrumentationFileAppendPID( cl::opt ConservativeInstrumentation( "conservative-instrumentation", - cl::desc( - "don't trust our CFG and disable spanning trees and any counter " - "inference, put a counter everywhere (for debugging, default: false)"), + cl::desc("disable instrumentation optimizations that sacrifice profile " + "accuracy (for debugging, default: false)"), cl::init(false), cl::Optional, cl::cat(BoltInstrCategory)); cl::opt InstrumentationSleepTime( diff --git a/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp b/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp index 4412628b4257..1bc276d682b1 100644 --- a/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp +++ b/bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp @@ -24,6 +24,7 @@ namespace opts { extern cl::OptionCategory BoltOptCategory; extern cl::opt InstrumentationFileAppendPID; +extern cl::opt ConservativeInstrumentation; extern cl::opt InstrumentationFilename; extern cl::opt InstrumentationBinpath; extern cl::opt InstrumentationSleepTime; @@ -169,6 +170,8 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime); emitIntValue("__bolt_instr_no_counters_clear", !!opts::InstrumentationNoCountersClear, 1); + emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation, + 1); emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1); emitIntValue("__bolt_num_counters", Summary->Counters.size()); emitValue(Summary->IndCallCounterFuncPtr, nullptr); diff --git a/bolt/test/X86/instrumentation-indirect.c b/bolt/test/X86/instrumentation-indirect.c new file mode 100644 index 000000000000..d8b533deba26 --- /dev/null +++ b/bolt/test/X86/instrumentation-indirect.c @@ -0,0 +1,66 @@ +/* Checks that BOLT correctly handles instrumentation of indirect calls + * including case with indirect calls in signals handlers. + */ +#include +#include +#include +#include +#include + +int foo(int x) { return x + 1; } + +int bar(int (*fn)(int), int val) { return fn(val); } + +void sigHandler(int signum) { bar(foo, 3); } + +int main(int argc, char **argv) { + long long i; + pid_t pid, wpid; + int wstatus; + signal(SIGUSR1, sigHandler); + pid = fork(); + if (pid) { + do { + kill(pid, SIGUSR1); + usleep(0); + wpid = waitpid(pid, &wstatus, WNOHANG); + } while (wpid == 0); + printf("[parent]\n"); + } else { + for (i = 0; i < 100000; i++) { + bar(foo, i % 10); + } + printf("[child]\n"); + } + return 0; +} + +/* +REQUIRES: system-linux && lit-max-individual-test-time + +RUN: %host_cc %cflags %s -o %t.exe -Wl,-q -pie -fpie + +RUN: llvm-bolt %t.exe -instrument -instrumentation-file=%t.fdata \ +RUN: -instrumentation-wait-forks=1 -conservative-instrumentation \ +RUN: -o %t.instrumented_conservative + +# Instrumented program needs to finish returning zero +RUN: %t.instrumented_conservative | FileCheck %s -check-prefix=CHECK-OUTPUT + +RUN: llvm-bolt %t.exe -instrument -instrumentation-file=%t.fdata \ +RUN: -instrumentation-wait-forks=1 \ +RUN: -o %t.instrumented + +# Instrumented program needs to finish returning zero +RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT + +# Test that the instrumented data makes sense +RUN: llvm-bolt %t.exe -o %t.bolted -data %t.fdata \ +RUN: -reorder-blocks=cache+ -reorder-functions=hfsort+ \ +RUN: -print-only=interp -print-finalized + +RUN: %t.bolted | FileCheck %s -check-prefix=CHECK-OUTPUT + +CHECK-OUTPUT: [child] +CHECK-OUTPUT: [parent] +*/