[PR] Instrumentation: use TryLock for SimpleHashTable getter

Summary:
This commit introduces TryLock usage for SimpleHashTable getter to
avoid deadlock and relax syscalls usage which causes significant
overhead in runtime.
The old behavior left under -conservative-instrumentation option passed
to instrumentation library.
Also, this commit includes a corresponding test case: instrumentation of
executable which performs indirect calls from common code and signal
handler.

Note: in case if TryLock was failed to acquire the lock - this indirect
call will not be accounted in the resulting profile.

Vasily Leonenko,
Advanced Software Technology Lab, Huawei

(cherry picked from FBD30821949)
This commit is contained in:
Vasily Leonenko
2021-08-08 04:50:06 +08:00
committed by Maksim Panchenko
parent e2480fcc98
commit 9aa134dc2d
5 changed files with 109 additions and 6 deletions

View File

@@ -492,6 +492,27 @@ public:
}
};
/// RAII wrapper for Mutex
class TryLock {
Mutex &M;
bool Locked = false;
public:
TryLock(Mutex &M) : M(M) {
int Retry = 100;
while (--Retry && !M.acquire())
;
if (Retry)
Locked = true;
}
bool isLocked() { return Locked; }
~TryLock() {
if (isLocked())
M.release();
}
};
inline uint64_t alignTo(uint64_t Value, uint64_t Align) {
return (Value + Align - 1) / Align * Align;
}

View File

@@ -240,6 +240,9 @@ void operator delete(void *Ptr, BumpPtrAllocator &A) { A.deallocate(Ptr); }
namespace {
// Disable instrumentation optimizations that sacrifice profile accuracy
extern "C" bool __bolt_instr_conservative;
/// Basic key-val atom stored in our hash
struct SimpleHashTableEntryBase {
uint64_t Key;
@@ -272,10 +275,14 @@ public:
/// avoid storing a pointer to it as part of this table (remember there is one
/// hash for each indirect call site, so we wan't to minimize our footprint).
MapEntry &get(uint64_t Key, BumpPtrAllocator &Alloc) {
if (!__bolt_instr_conservative) {
TryLock L(M);
if (!L.isLocked())
return NoEntry;
return getOrAllocEntry(Key, Alloc);
}
Lock L(M);
if (TableRoot)
return getEntry(TableRoot, Key, Key, Alloc, 0);
return firstAllocation(Key, Alloc);
return getOrAllocEntry(Key, Alloc);
}
/// Traverses all elements in the table
@@ -293,6 +300,7 @@ private:
constexpr static uint64_t FollowUpTableMarker = 0x8000000000000000ull;
MapEntry *TableRoot{nullptr};
MapEntry NoEntry;
Mutex M;
template <typename... Args>
@@ -355,6 +363,12 @@ private:
Entry.Key = reinterpret_cast<uint64_t>(NextLevelTbl) | FollowUpTableMarker;
return getEntry(NextLevelTbl, Key, Remainder, Alloc, CurLevel + 1);
}
MapEntry &getOrAllocEntry(uint64_t Key, BumpPtrAllocator &Alloc) {
if (TableRoot)
return getEntry(TableRoot, Key, Key, Alloc, 0);
return firstAllocation(Key, Alloc);
}
};
template <typename T> void resetIndCallCounter(T &Entry) {

View File

@@ -43,9 +43,8 @@ cl::opt<bool> InstrumentationFileAppendPID(
cl::opt<bool> ConservativeInstrumentation(
"conservative-instrumentation",
cl::desc(
"don't trust our CFG and disable spanning trees and any counter "
"inference, put a counter everywhere (for debugging, default: false)"),
cl::desc("disable instrumentation optimizations that sacrifice profile "
"accuracy (for debugging, default: false)"),
cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
cl::opt<uint32_t> InstrumentationSleepTime(

View File

@@ -24,6 +24,7 @@ namespace opts {
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> InstrumentationFileAppendPID;
extern cl::opt<bool> ConservativeInstrumentation;
extern cl::opt<std::string> InstrumentationFilename;
extern cl::opt<std::string> InstrumentationBinpath;
extern cl::opt<uint32_t> InstrumentationSleepTime;
@@ -169,6 +170,8 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
emitIntValue("__bolt_instr_sleep_time", opts::InstrumentationSleepTime);
emitIntValue("__bolt_instr_no_counters_clear",
!!opts::InstrumentationNoCountersClear, 1);
emitIntValue("__bolt_instr_conservative", !!opts::ConservativeInstrumentation,
1);
emitIntValue("__bolt_instr_wait_forks", !!opts::InstrumentationWaitForks, 1);
emitIntValue("__bolt_num_counters", Summary->Counters.size());
emitValue(Summary->IndCallCounterFuncPtr, nullptr);

View File

@@ -0,0 +1,66 @@
/* Checks that BOLT correctly handles instrumentation of indirect calls
* including case with indirect calls in signals handlers.
*/
#include <signal.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
int foo(int x) { return x + 1; }
int bar(int (*fn)(int), int val) { return fn(val); }
void sigHandler(int signum) { bar(foo, 3); }
int main(int argc, char **argv) {
long long i;
pid_t pid, wpid;
int wstatus;
signal(SIGUSR1, sigHandler);
pid = fork();
if (pid) {
do {
kill(pid, SIGUSR1);
usleep(0);
wpid = waitpid(pid, &wstatus, WNOHANG);
} while (wpid == 0);
printf("[parent]\n");
} else {
for (i = 0; i < 100000; i++) {
bar(foo, i % 10);
}
printf("[child]\n");
}
return 0;
}
/*
REQUIRES: system-linux && lit-max-individual-test-time
RUN: %host_cc %cflags %s -o %t.exe -Wl,-q -pie -fpie
RUN: llvm-bolt %t.exe -instrument -instrumentation-file=%t.fdata \
RUN: -instrumentation-wait-forks=1 -conservative-instrumentation \
RUN: -o %t.instrumented_conservative
# Instrumented program needs to finish returning zero
RUN: %t.instrumented_conservative | FileCheck %s -check-prefix=CHECK-OUTPUT
RUN: llvm-bolt %t.exe -instrument -instrumentation-file=%t.fdata \
RUN: -instrumentation-wait-forks=1 \
RUN: -o %t.instrumented
# Instrumented program needs to finish returning zero
RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT
# Test that the instrumented data makes sense
RUN: llvm-bolt %t.exe -o %t.bolted -data %t.fdata \
RUN: -reorder-blocks=cache+ -reorder-functions=hfsort+ \
RUN: -print-only=interp -print-finalized
RUN: %t.bolted | FileCheck %s -check-prefix=CHECK-OUTPUT
CHECK-OUTPUT: [child]
CHECK-OUTPUT: [parent]
*/