mirror of
https://github.com/intel/llvm.git
synced 2026-02-05 22:17:23 +08:00
[XRay][compiler-rt] Runtime changes to support custom event logging
Summary: This change implements support for the custom event logging sleds and intrinsics at runtime. For now it only supports handling the sleds in x86_64, with the implementations for other architectures stubbed out to do nothing. NOTE: Work in progress, uploaded for exposition/exploration purposes. Depends on D27503, D30018, and D33032. Reviewers: echristo, javed.absar, timshen Subscribers: mehdi_amini, nemanjai, llvm-commits Differential Revision: https://reviews.llvm.org/D30630 llvm-svn: 302857
This commit is contained in:
@@ -25,6 +25,7 @@ enum XRayEntryType {
|
||||
EXIT = 1,
|
||||
TAIL = 2,
|
||||
LOG_ARGS_ENTRY = 3,
|
||||
CUSTOM_EVENT = 4,
|
||||
};
|
||||
|
||||
/// Provide a function to invoke for when instrumentation points are hit. This
|
||||
@@ -64,6 +65,9 @@ extern int __xray_set_handler_arg1(void (*)(int32_t, XRayEntryType, uint64_t));
|
||||
/// Returns 1 on success, 0 on error.
|
||||
extern int __xray_remove_handler_arg1();
|
||||
|
||||
/// Provide a function to invoke when XRay encounters a custom event.
|
||||
extern int __xray_set_customevent_handler(void (*entry)(void*, std::size_t));
|
||||
|
||||
enum XRayPatchingStatus {
|
||||
NOT_INITIALIZED = 0,
|
||||
SUCCESS = 1,
|
||||
|
||||
@@ -18,8 +18,7 @@
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
|
||||
|
||||
extern "C" void __clear_cache(void* start, void* end);
|
||||
extern "C" void __clear_cache(void *start, void *end);
|
||||
|
||||
namespace __xray {
|
||||
|
||||
@@ -86,8 +85,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
|
||||
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
|
||||
uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
|
||||
}
|
||||
__clear_cache(reinterpret_cast<char*>(FirstAddress),
|
||||
reinterpret_cast<char*>(CurAddress));
|
||||
__clear_cache(reinterpret_cast<char *>(FirstAddress),
|
||||
reinterpret_cast<char *>(CurAddress));
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -107,6 +106,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
|
||||
return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
|
||||
}
|
||||
|
||||
bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySled &Sled)
|
||||
XRAY_NEVER_INSTRUMENT { // FIXME: Implement in aarch64?
|
||||
return false;
|
||||
}
|
||||
|
||||
// FIXME: Maybe implement this better?
|
||||
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
|
||||
extern "C" void __clear_cache(void* start, void* end);
|
||||
extern "C" void __clear_cache(void *start, void *end);
|
||||
|
||||
namespace __xray {
|
||||
|
||||
@@ -122,8 +122,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
|
||||
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
|
||||
uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
|
||||
}
|
||||
__clear_cache(reinterpret_cast<char*>(FirstAddress),
|
||||
reinterpret_cast<char*>(CurAddress));
|
||||
__clear_cache(reinterpret_cast<char *>(FirstAddress),
|
||||
reinterpret_cast<char *>(CurAddress));
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -143,6 +143,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
|
||||
return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
|
||||
}
|
||||
|
||||
bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySled &Sled)
|
||||
XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm?
|
||||
return false;
|
||||
}
|
||||
|
||||
// FIXME: Maybe implement this better?
|
||||
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ struct alignas(16) MetadataRecord {
|
||||
NewCPUId,
|
||||
TSCWrap,
|
||||
WalltimeMarker,
|
||||
CustomEventMarker,
|
||||
};
|
||||
// Use 7 bits to identify this record type.
|
||||
/* RecordKinds */ uint8_t RecordKind : 7;
|
||||
|
||||
@@ -41,45 +41,12 @@ namespace __xray {
|
||||
// Global BufferQueue.
|
||||
std::shared_ptr<BufferQueue> BQ;
|
||||
|
||||
__sanitizer::atomic_sint32_t LoggingStatus = {
|
||||
XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
|
||||
|
||||
__sanitizer::atomic_sint32_t LogFlushStatus = {
|
||||
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
|
||||
|
||||
std::unique_ptr<FDRLoggingOptions> FDROptions;
|
||||
FDRLoggingOptions FDROptions;
|
||||
|
||||
XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
|
||||
void *Options,
|
||||
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
|
||||
if (OptionsSize != sizeof(FDRLoggingOptions))
|
||||
return static_cast<XRayLogInitStatus>(__sanitizer::atomic_load(
|
||||
&LoggingStatus, __sanitizer::memory_order_acquire));
|
||||
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
||||
if (!__sanitizer::atomic_compare_exchange_strong(
|
||||
&LoggingStatus, &CurrentStatus,
|
||||
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
|
||||
__sanitizer::memory_order_release))
|
||||
return static_cast<XRayLogInitStatus>(CurrentStatus);
|
||||
|
||||
FDROptions.reset(new FDRLoggingOptions());
|
||||
memcpy(FDROptions.get(), Options, OptionsSize);
|
||||
bool Success = false;
|
||||
BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
|
||||
if (!Success) {
|
||||
Report("BufferQueue init failed.\n");
|
||||
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
||||
}
|
||||
|
||||
// Install the actual handleArg0 handler after initialising the buffers.
|
||||
__xray_set_handler(fdrLoggingHandleArg0);
|
||||
|
||||
__sanitizer::atomic_store(&LoggingStatus,
|
||||
XRayLogInitStatus::XRAY_LOG_INITIALIZED,
|
||||
__sanitizer::memory_order_release);
|
||||
Report("XRay FDR init successful.\n");
|
||||
return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
|
||||
}
|
||||
__sanitizer::SpinMutex FDROptionsMutex;
|
||||
|
||||
// Must finalize before flushing.
|
||||
XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
|
||||
@@ -108,7 +75,11 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
|
||||
// (fixed-sized) and let the tools reading the buffers deal with the data
|
||||
// afterwards.
|
||||
//
|
||||
int Fd = FDROptions->Fd;
|
||||
int Fd = -1;
|
||||
{
|
||||
__sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
|
||||
Fd = FDROptions.Fd;
|
||||
}
|
||||
if (Fd == -1)
|
||||
Fd = getLogFD();
|
||||
if (Fd == -1) {
|
||||
@@ -120,8 +91,8 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
|
||||
|
||||
// Test for required CPU features and cache the cycle frequency
|
||||
static bool TSCSupported = probeRequiredCPUFeatures();
|
||||
static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency()
|
||||
: __xray::NanosecondsPerSecond;
|
||||
static uint64_t CycleFrequency =
|
||||
TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
|
||||
|
||||
XRayFileHeader Header;
|
||||
Header.Version = 1;
|
||||
@@ -192,8 +163,8 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
|
||||
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
||||
}
|
||||
|
||||
void fdrLoggingHandleArg0(int32_t FuncId,
|
||||
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
|
||||
static std::tuple<uint64_t, unsigned char>
|
||||
getTimestamp() XRAY_NEVER_INSTRUMENT {
|
||||
// We want to get the TSC as early as possible, so that we can check whether
|
||||
// we've seen this CPU before. We also do it before we load anything else, to
|
||||
// allow for forward progress with the scheduling.
|
||||
@@ -203,7 +174,7 @@ void fdrLoggingHandleArg0(int32_t FuncId,
|
||||
// Test once for required CPU features
|
||||
static bool TSCSupported = probeRequiredCPUFeatures();
|
||||
|
||||
if(TSCSupported) {
|
||||
if (TSCSupported) {
|
||||
TSC = __xray::readTSC(CPU);
|
||||
} else {
|
||||
// FIXME: This code needs refactoring as it appears in multiple locations
|
||||
@@ -216,9 +187,102 @@ void fdrLoggingHandleArg0(int32_t FuncId,
|
||||
CPU = 0;
|
||||
TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
|
||||
}
|
||||
return std::make_tuple(TSC, CPU);
|
||||
}
|
||||
|
||||
__xray_fdr_internal::processFunctionHook(FuncId, Entry, TSC, CPU,
|
||||
clock_gettime, LoggingStatus, BQ);
|
||||
void fdrLoggingHandleArg0(int32_t FuncId,
|
||||
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
|
||||
auto TSC_CPU = getTimestamp();
|
||||
__xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU),
|
||||
std::get<1>(TSC_CPU), clock_gettime,
|
||||
LoggingStatus, BQ);
|
||||
}
|
||||
|
||||
void fdrLoggingHandleCustomEvent(void *Event,
|
||||
std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
|
||||
using namespace __xray_fdr_internal;
|
||||
auto TSC_CPU = getTimestamp();
|
||||
auto &TSC = std::get<0>(TSC_CPU);
|
||||
auto &CPU = std::get<1>(TSC_CPU);
|
||||
thread_local bool Running = false;
|
||||
RecursionGuard Guard{Running};
|
||||
if (!Guard) {
|
||||
assert(Running && "RecursionGuard is buggy!");
|
||||
return;
|
||||
}
|
||||
if (EventSize > std::numeric_limits<int32_t>::max()) {
|
||||
using Empty = struct {};
|
||||
static Empty Once = [&] {
|
||||
Report("Event size too large = %zu ; > max = %d\n", EventSize,
|
||||
std::numeric_limits<int32_t>::max());
|
||||
return Empty();
|
||||
}();
|
||||
(void)Once;
|
||||
}
|
||||
int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
|
||||
if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, clock_gettime))
|
||||
return;
|
||||
|
||||
// Here we need to prepare the log to handle:
|
||||
// - The metadata record we're going to write. (16 bytes)
|
||||
// - The additional data we're going to write. Currently, that's the size of
|
||||
// the event we're going to dump into the log as free-form bytes.
|
||||
if (!prepareBuffer(clock_gettime, MetadataRecSize + EventSize)) {
|
||||
LocalBQ = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
// Write the custom event metadata record, which consists of the following
|
||||
// information:
|
||||
// - 8 bytes (64-bits) for the full TSC when the event started.
|
||||
// - 4 bytes (32-bits) for the length of the data.
|
||||
MetadataRecord CustomEvent;
|
||||
CustomEvent.Type = uint8_t(RecordType::Metadata);
|
||||
CustomEvent.RecordKind =
|
||||
uint8_t(MetadataRecord::RecordKinds::CustomEventMarker);
|
||||
constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU));
|
||||
std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
|
||||
std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
|
||||
std::memcpy(RecordPtr, &CustomEvent, sizeof(CustomEvent));
|
||||
RecordPtr += sizeof(CustomEvent);
|
||||
std::memcpy(RecordPtr, Event, ReducedEventSize);
|
||||
endBufferIfFull();
|
||||
}
|
||||
|
||||
XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
|
||||
void *Options,
|
||||
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
|
||||
if (OptionsSize != sizeof(FDRLoggingOptions))
|
||||
return static_cast<XRayLogInitStatus>(__sanitizer::atomic_load(
|
||||
&LoggingStatus, __sanitizer::memory_order_acquire));
|
||||
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
||||
if (!__sanitizer::atomic_compare_exchange_strong(
|
||||
&LoggingStatus, &CurrentStatus,
|
||||
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
|
||||
__sanitizer::memory_order_release))
|
||||
return static_cast<XRayLogInitStatus>(CurrentStatus);
|
||||
|
||||
{
|
||||
__sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
|
||||
memcpy(&FDROptions, Options, OptionsSize);
|
||||
}
|
||||
|
||||
bool Success = false;
|
||||
BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
|
||||
if (!Success) {
|
||||
Report("BufferQueue init failed.\n");
|
||||
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
|
||||
}
|
||||
|
||||
// Install the actual handleArg0 handler after initialising the buffers.
|
||||
__xray_set_handler(fdrLoggingHandleArg0);
|
||||
__xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
|
||||
|
||||
__sanitizer::atomic_store(&LoggingStatus,
|
||||
XRayLogInitStatus::XRAY_LOG_INITIALIZED,
|
||||
__sanitizer::memory_order_release);
|
||||
Report("XRay FDR init successful.\n");
|
||||
return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
|
||||
}
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
@@ -37,6 +37,9 @@
|
||||
|
||||
namespace __xray {
|
||||
|
||||
__sanitizer::atomic_sint32_t LoggingStatus = {
|
||||
XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
|
||||
|
||||
/// We expose some of the state transitions when FDR logging mode is operating
|
||||
/// such that we can simulate a series of log events that may occur without
|
||||
/// and test with determinism without worrying about the real CPU time.
|
||||
@@ -123,12 +126,21 @@ thread_local uint8_t NumTailCalls = 0;
|
||||
constexpr auto MetadataRecSize = sizeof(MetadataRecord);
|
||||
constexpr auto FunctionRecSize = sizeof(FunctionRecord);
|
||||
|
||||
// We use a thread_local variable to keep track of which CPUs we've already
|
||||
// run, and the TSC times for these CPUs. This allows us to stop repeating the
|
||||
// CPU field in the function records.
|
||||
//
|
||||
// We assume that we'll support only 65536 CPUs for x86_64.
|
||||
thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
|
||||
thread_local uint64_t LastTSC = 0;
|
||||
thread_local uint64_t LastFunctionEntryTSC = 0;
|
||||
|
||||
class ThreadExitBufferCleanup {
|
||||
std::weak_ptr<BufferQueue> Buffers;
|
||||
std::shared_ptr<BufferQueue> &Buffers;
|
||||
BufferQueue::Buffer &Buffer;
|
||||
|
||||
public:
|
||||
explicit ThreadExitBufferCleanup(std::weak_ptr<BufferQueue> BQ,
|
||||
explicit ThreadExitBufferCleanup(std::shared_ptr<BufferQueue> &BQ,
|
||||
BufferQueue::Buffer &Buffer)
|
||||
XRAY_NEVER_INSTRUMENT : Buffers(BQ),
|
||||
Buffer(Buffer) {}
|
||||
@@ -142,17 +154,24 @@ public:
|
||||
// the queue.
|
||||
assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
|
||||
static_cast<ptrdiff_t>(MetadataRecSize));
|
||||
if (auto BQ = Buffers.lock()) {
|
||||
if (Buffers) {
|
||||
writeEOBMetadata();
|
||||
auto EC = BQ->releaseBuffer(Buffer);
|
||||
auto EC = Buffers->releaseBuffer(Buffer);
|
||||
if (EC != BufferQueue::ErrorCode::Ok)
|
||||
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
|
||||
BufferQueue::getErrorString(EC));
|
||||
Buffers = nullptr;
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Make sure a thread that's ever called handleArg0 has a thread-local
|
||||
// live reference to the buffer queue for this particular instance of
|
||||
// FDRLogging, and that we're going to clean it up when the thread exits.
|
||||
thread_local std::shared_ptr<BufferQueue> LocalBQ = nullptr;
|
||||
thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
|
||||
|
||||
class RecursionGuard {
|
||||
bool &Running;
|
||||
const bool Valid;
|
||||
@@ -176,7 +195,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
static inline bool loggingInitialized(
|
||||
inline bool loggingInitialized(
|
||||
const __sanitizer::atomic_sint32_t &LoggingStatus) XRAY_NEVER_INSTRUMENT {
|
||||
return __sanitizer::atomic_load(&LoggingStatus,
|
||||
__sanitizer::memory_order_acquire) ==
|
||||
@@ -185,7 +204,7 @@ static inline bool loggingInitialized(
|
||||
|
||||
} // namespace
|
||||
|
||||
static inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
|
||||
inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
|
||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
static constexpr int InitRecordsCount = 2;
|
||||
std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
|
||||
@@ -222,9 +241,8 @@ static inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
|
||||
NumTailCalls = 0;
|
||||
}
|
||||
|
||||
static inline void setupNewBuffer(int (*wall_clock_reader)(clockid_t,
|
||||
struct timespec *))
|
||||
XRAY_NEVER_INSTRUMENT {
|
||||
inline void setupNewBuffer(int (*wall_clock_reader)(
|
||||
clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT {
|
||||
RecordPtr = static_cast<char *>(Buffer.Buffer);
|
||||
pid_t Tid = syscall(SYS_gettid);
|
||||
timespec TS{0, 0};
|
||||
@@ -235,7 +253,7 @@ static inline void setupNewBuffer(int (*wall_clock_reader)(clockid_t,
|
||||
NumTailCalls = 0;
|
||||
}
|
||||
|
||||
static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
|
||||
inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
|
||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
MetadataRecord NewCPUId;
|
||||
NewCPUId.Type = uint8_t(RecordType::Metadata);
|
||||
@@ -253,12 +271,12 @@ static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
|
||||
NumTailCalls = 0;
|
||||
}
|
||||
|
||||
static inline void writeNewCPUIdMetadata(uint16_t CPU,
|
||||
inline void writeNewCPUIdMetadata(uint16_t CPU,
|
||||
uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
||||
writeNewCPUIdMetadata(CPU, TSC, RecordPtr);
|
||||
}
|
||||
|
||||
static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
MetadataRecord EOBMeta;
|
||||
EOBMeta.Type = uint8_t(RecordType::Metadata);
|
||||
EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
|
||||
@@ -269,11 +287,11 @@ static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
NumTailCalls = 0;
|
||||
}
|
||||
|
||||
static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
|
||||
inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
|
||||
writeEOBMetadata(RecordPtr);
|
||||
}
|
||||
|
||||
static inline void writeTSCWrapMetadata(uint64_t TSC,
|
||||
inline void writeTSCWrapMetadata(uint64_t TSC,
|
||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
MetadataRecord TSCWrap;
|
||||
TSCWrap.Type = uint8_t(RecordType::Metadata);
|
||||
@@ -289,11 +307,11 @@ static inline void writeTSCWrapMetadata(uint64_t TSC,
|
||||
NumTailCalls = 0;
|
||||
}
|
||||
|
||||
static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
||||
inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
|
||||
writeTSCWrapMetadata(TSC, RecordPtr);
|
||||
}
|
||||
|
||||
static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
||||
inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
||||
XRayEntryType EntryType,
|
||||
char *&MemPtr) XRAY_NEVER_INSTRUMENT {
|
||||
std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
|
||||
@@ -339,6 +357,17 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
||||
FuncRecord.RecordKind =
|
||||
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
|
||||
break;
|
||||
case XRayEntryType::CUSTOM_EVENT: {
|
||||
// This is a bug in patching, so we'll report it once and move on.
|
||||
static bool Once = [&] {
|
||||
Report("Internal error: patched an XRay custom event call as a function; "
|
||||
"func id = %d\n",
|
||||
FuncId);
|
||||
return true;
|
||||
}();
|
||||
(void)Once;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
|
||||
@@ -346,8 +375,9 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
|
||||
}
|
||||
|
||||
static uint64_t thresholdTicks() {
|
||||
static uint64_t TicksPerSec = probeRequiredCPUFeatures() ? getTSCFrequency() :
|
||||
__xray::NanosecondsPerSecond;
|
||||
static uint64_t TicksPerSec = probeRequiredCPUFeatures()
|
||||
? getTSCFrequency()
|
||||
: __xray::NanosecondsPerSecond;
|
||||
static const uint64_t ThresholdTicks =
|
||||
TicksPerSec * flags()->xray_fdr_log_func_duration_threshold_us / 1000000;
|
||||
return ThresholdTicks;
|
||||
@@ -397,9 +427,8 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
|
||||
RewindingRecordPtr -= FunctionRecSize;
|
||||
RewindingTSC -= ExpectedTailExit.TSCDelta;
|
||||
AlignedFuncStorage FunctionEntryBuffer;
|
||||
const auto &ExpectedFunctionEntry =
|
||||
*reinterpret_cast<FunctionRecord *>(std::memcpy(
|
||||
&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
|
||||
const auto &ExpectedFunctionEntry = *reinterpret_cast<FunctionRecord *>(
|
||||
std::memcpy(&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
|
||||
assert(ExpectedFunctionEntry.RecordKind ==
|
||||
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
|
||||
"Expected to find function entry when rewinding tail call.");
|
||||
@@ -422,7 +451,7 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
|
||||
inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
|
||||
auto EC = BQ->releaseBuffer(Buffer);
|
||||
if (EC != BufferQueue::ErrorCode::Ok) {
|
||||
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
|
||||
@@ -432,11 +461,29 @@ static inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void processFunctionHook(
|
||||
int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
|
||||
int (*wall_clock_reader)(clockid_t, struct timespec *),
|
||||
__sanitizer::atomic_sint32_t &LoggingStatus,
|
||||
const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
|
||||
inline bool prepareBuffer(int (*wall_clock_reader)(clockid_t,
|
||||
struct timespec *),
|
||||
size_t MaxSize) XRAY_NEVER_INSTRUMENT {
|
||||
char *BufferStart = static_cast<char *>(Buffer.Buffer);
|
||||
if ((RecordPtr + MaxSize) > (BufferStart + Buffer.Size - MetadataRecSize)) {
|
||||
writeEOBMetadata();
|
||||
if (!releaseThreadLocalBuffer(LocalBQ.get()))
|
||||
return false;
|
||||
auto EC = LocalBQ->getBuffer(Buffer);
|
||||
if (EC != BufferQueue::ErrorCode::Ok) {
|
||||
Report("Failed to acquire a buffer; error=%s\n",
|
||||
BufferQueue::getErrorString(EC));
|
||||
return false;
|
||||
}
|
||||
setupNewBuffer(wall_clock_reader);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool isLogInitializedAndReady(
|
||||
std::shared_ptr<BufferQueue> &LocalBQ, uint64_t TSC, unsigned char CPU,
|
||||
int (*wall_clock_reader)(clockid_t,
|
||||
struct timespec *)) XRAY_NEVER_INSTRUMENT {
|
||||
// Bail out right away if logging is not initialized yet.
|
||||
// We should take the opportunity to release the buffer though.
|
||||
auto Status = __sanitizer::atomic_load(&LoggingStatus,
|
||||
@@ -446,44 +493,19 @@ static inline void processFunctionHook(
|
||||
(Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
|
||||
Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) {
|
||||
writeEOBMetadata();
|
||||
if (!releaseThreadLocalBuffer(BQ.get()))
|
||||
return;
|
||||
if (!releaseThreadLocalBuffer(LocalBQ.get()))
|
||||
return false;
|
||||
RecordPtr = nullptr;
|
||||
LocalBQ = nullptr;
|
||||
return false;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// We use a thread_local variable to keep track of which CPUs we've already
|
||||
// run, and the TSC times for these CPUs. This allows us to stop repeating the
|
||||
// CPU field in the function records.
|
||||
//
|
||||
// We assume that we'll support only 65536 CPUs for x86_64.
|
||||
thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
|
||||
thread_local uint64_t LastTSC = 0;
|
||||
thread_local uint64_t LastFunctionEntryTSC = 0;
|
||||
|
||||
// Make sure a thread that's ever called handleArg0 has a thread-local
|
||||
// live reference to the buffer queue for this particular instance of
|
||||
// FDRLogging, and that we're going to clean it up when the thread exits.
|
||||
thread_local auto LocalBQ = BQ;
|
||||
thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
|
||||
|
||||
// Prevent signal handler recursion, so in case we're already in a log writing
|
||||
// mode and the signal handler comes in (and is also instrumented) then we
|
||||
// don't want to be clobbering potentially partial writes already happening in
|
||||
// the thread. We use a simple thread_local latch to only allow one on-going
|
||||
// handleArg0 to happen at any given time.
|
||||
thread_local bool Running = false;
|
||||
RecursionGuard Guard{Running};
|
||||
if (!Guard) {
|
||||
assert(Running == true && "RecursionGuard is buggy!");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!loggingInitialized(LoggingStatus) || LocalBQ->finalizing()) {
|
||||
writeEOBMetadata();
|
||||
if (!releaseThreadLocalBuffer(BQ.get()))
|
||||
return;
|
||||
if (!releaseThreadLocalBuffer(LocalBQ.get()))
|
||||
return false;
|
||||
RecordPtr = nullptr;
|
||||
}
|
||||
|
||||
@@ -496,19 +518,57 @@ static inline void processFunctionHook(
|
||||
LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
|
||||
Report("Failed to acquire a buffer; error=%s\n",
|
||||
BufferQueue::getErrorString(EC));
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
setupNewBuffer(wall_clock_reader);
|
||||
}
|
||||
|
||||
if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
|
||||
// This means this is the first CPU this thread has ever run on. We set the
|
||||
// current CPU and record this as the first TSC we've seen.
|
||||
// This means this is the first CPU this thread has ever run on. We set
|
||||
// the current CPU and record this as the first TSC we've seen.
|
||||
CurrentCPU = CPU;
|
||||
writeNewCPUIdMetadata(CPU, TSC);
|
||||
}
|
||||
|
||||
return true;
|
||||
} // namespace __xray_fdr_internal
|
||||
|
||||
inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT {
|
||||
auto BufferStart = static_cast<char *>(Buffer.Buffer);
|
||||
if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
|
||||
writeEOBMetadata();
|
||||
if (!releaseThreadLocalBuffer(LocalBQ.get()))
|
||||
return;
|
||||
RecordPtr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
inline void processFunctionHook(
|
||||
int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
|
||||
int (*wall_clock_reader)(clockid_t, struct timespec *),
|
||||
__sanitizer::atomic_sint32_t &LoggingStatus,
|
||||
const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
|
||||
// Prevent signal handler recursion, so in case we're already in a log writing
|
||||
// mode and the signal handler comes in (and is also instrumented) then we
|
||||
// don't want to be clobbering potentially partial writes already happening in
|
||||
// the thread. We use a simple thread_local latch to only allow one on-going
|
||||
// handleArg0 to happen at any given time.
|
||||
thread_local bool Running = false;
|
||||
RecursionGuard Guard{Running};
|
||||
if (!Guard) {
|
||||
assert(Running == true && "RecursionGuard is buggy!");
|
||||
return;
|
||||
}
|
||||
|
||||
// In case the reference has been cleaned up before, we make sure we
|
||||
// initialize it to the provided BufferQueue.
|
||||
if (LocalBQ == nullptr)
|
||||
LocalBQ = BQ;
|
||||
|
||||
if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, wall_clock_reader))
|
||||
return;
|
||||
|
||||
// Before we go setting up writing new function entries, we need to be really
|
||||
// careful about the pointer math we're doing. This means we need to ensure
|
||||
// that the record we are about to write is going to fit into the buffer,
|
||||
@@ -545,24 +605,14 @@ static inline void processFunctionHook(
|
||||
// bytes in the end of the buffer, we need to write out the EOB, get a new
|
||||
// Buffer, set it up properly before doing any further writing.
|
||||
//
|
||||
char *BufferStart = static_cast<char *>(Buffer.Buffer);
|
||||
if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart <
|
||||
static_cast<ptrdiff_t>(MetadataRecSize)) {
|
||||
writeEOBMetadata();
|
||||
if (!releaseThreadLocalBuffer(LocalBQ.get()))
|
||||
return;
|
||||
auto EC = LocalBQ->getBuffer(Buffer);
|
||||
if (EC != BufferQueue::ErrorCode::Ok) {
|
||||
Report("Failed to acquire a buffer; error=%s\n",
|
||||
BufferQueue::getErrorString(EC));
|
||||
return;
|
||||
}
|
||||
setupNewBuffer(wall_clock_reader);
|
||||
if (!prepareBuffer(wall_clock_reader, FunctionRecSize + MetadataRecSize)) {
|
||||
LocalBQ = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
// By this point, we are now ready to write at most 24 bytes (one metadata
|
||||
// record and one function record).
|
||||
BufferStart = static_cast<char *>(Buffer.Buffer);
|
||||
auto BufferStart = static_cast<char *>(Buffer.Buffer);
|
||||
assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >=
|
||||
static_cast<ptrdiff_t>(MetadataRecSize) &&
|
||||
"Misconfigured BufferQueue provided; Buffer size not large enough.");
|
||||
@@ -586,7 +636,6 @@ static inline void processFunctionHook(
|
||||
// FunctionRecord. In this case we write down just a FunctionRecord with
|
||||
// the correct TSC delta.
|
||||
//
|
||||
|
||||
uint32_t RecordTSCDelta = 0;
|
||||
if (CPU != CurrentCPU) {
|
||||
// We've moved to a new CPU.
|
||||
@@ -619,21 +668,27 @@ static inline void processFunctionHook(
|
||||
break;
|
||||
rewindRecentCall(TSC, LastTSC, LastFunctionEntryTSC, FuncId);
|
||||
return; // without writing log.
|
||||
case XRayEntryType::CUSTOM_EVENT: {
|
||||
// This is a bug in patching, so we'll report it once and move on.
|
||||
static bool Once = [&] {
|
||||
Report("Internal error: patched an XRay custom event call as a function; "
|
||||
"func id = %d",
|
||||
FuncId);
|
||||
return true;
|
||||
}();
|
||||
(void)Once;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);
|
||||
|
||||
// If we've exhausted the buffer by this time, we then release the buffer to
|
||||
// make sure that other threads may start using this buffer.
|
||||
if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
|
||||
writeEOBMetadata();
|
||||
if (!releaseThreadLocalBuffer(LocalBQ.get()))
|
||||
return;
|
||||
RecordPtr = nullptr;
|
||||
}
|
||||
endBufferIfFull();
|
||||
}
|
||||
|
||||
} // namespace __xray_fdr_internal
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
#endif // XRAY_XRAY_FDR_LOGGING_IMPL_H
|
||||
|
||||
@@ -50,6 +50,9 @@ __sanitizer::atomic_uintptr_t XRayPatchedFunction{0};
|
||||
// This is the function to call from the arg1-enabled sleds/trampolines.
|
||||
__sanitizer::atomic_uintptr_t XRayArgLogger{0};
|
||||
|
||||
// This is the function to call when we encounter a custom event log call.
|
||||
__sanitizer::atomic_uintptr_t XRayPatchedCustomEvent{0};
|
||||
|
||||
// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will undo
|
||||
// any successful mprotect(...) changes. This is used to make a page writeable
|
||||
// and executable, and upon destruction if it was successful in doing so returns
|
||||
@@ -97,7 +100,19 @@ int __xray_set_handler(void (*entry)(int32_t,
|
||||
__sanitizer::memory_order_acquire)) {
|
||||
|
||||
__sanitizer::atomic_store(&__xray::XRayPatchedFunction,
|
||||
reinterpret_cast<uint64_t>(entry),
|
||||
reinterpret_cast<uintptr_t>(entry),
|
||||
__sanitizer::memory_order_release);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __xray_set_customevent_handler(void (*entry)(void *, size_t))
|
||||
XRAY_NEVER_INSTRUMENT {
|
||||
if (__sanitizer::atomic_load(&XRayInitialized,
|
||||
__sanitizer::memory_order_acquire)) {
|
||||
__sanitizer::atomic_store(&__xray::XRayPatchedCustomEvent,
|
||||
reinterpret_cast<uintptr_t>(entry),
|
||||
__sanitizer::memory_order_release);
|
||||
return 1;
|
||||
}
|
||||
@@ -161,6 +176,9 @@ inline bool patchSled(const XRaySledEntry &Sled, bool Enable,
|
||||
case XRayEntryType::LOG_ARGS_ENTRY:
|
||||
Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry);
|
||||
break;
|
||||
case XRayEntryType::CUSTOM_EVENT:
|
||||
Success = patchCustomEvent(Enable, FuncId, Sled);
|
||||
break;
|
||||
default:
|
||||
Report("Unsupported sled kind '%d' @%04x\n", Sled.Address, int(Sled.Kind));
|
||||
return false;
|
||||
@@ -301,6 +319,7 @@ int __xray_set_handler_arg1(void (*Handler)(int32_t, XRayEntryType, uint64_t)) {
|
||||
__sanitizer::memory_order_release);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); }
|
||||
|
||||
uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
|
||||
|
||||
@@ -60,6 +60,7 @@ bool patchFunctionEntry(bool Enable, uint32_t FuncId,
|
||||
bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
|
||||
bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
|
||||
const XRaySledEntry &Sled);
|
||||
bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
@@ -70,6 +71,7 @@ extern void __xray_FunctionEntry();
|
||||
extern void __xray_FunctionExit();
|
||||
extern void __xray_FunctionTailExit();
|
||||
extern void __xray_ArgLoggerEntry();
|
||||
extern void __xray_CustomEvent();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -95,7 +95,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
|
||||
// B #44
|
||||
|
||||
if (Enable) {
|
||||
uint32_t LoTracingHookAddr = reinterpret_cast<int32_t>(TracingHook) & 0xffff;
|
||||
uint32_t LoTracingHookAddr =
|
||||
reinterpret_cast<int32_t>(TracingHook) & 0xffff;
|
||||
uint32_t HiTracingHookAddr =
|
||||
(reinterpret_cast<int32_t>(TracingHook) >> 16) & 0xffff;
|
||||
uint32_t LoFunctionID = FuncId & 0xffff;
|
||||
@@ -151,6 +152,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
|
||||
return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
|
||||
}
|
||||
|
||||
bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
|
||||
// FIXME: Implement in mips?
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
|
||||
|
||||
@@ -93,7 +93,8 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
|
||||
if (Enable) {
|
||||
uint32_t LoTracingHookAddr =
|
||||
reinterpret_cast<int64_t>(TracingHook) & 0xffff;
|
||||
uint32_t HiTracingHookAddr = (reinterpret_cast<int64_t>(TracingHook) >> 16) & 0xffff;
|
||||
uint32_t HiTracingHookAddr =
|
||||
(reinterpret_cast<int64_t>(TracingHook) >> 16) & 0xffff;
|
||||
uint32_t HigherTracingHookAddr =
|
||||
(reinterpret_cast<int64_t>(TracingHook) >> 32) & 0xffff;
|
||||
uint32_t HighestTracingHookAddr =
|
||||
@@ -160,6 +161,11 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
|
||||
return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
|
||||
}
|
||||
|
||||
bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
|
||||
// FIXME: Implement in mips64?
|
||||
return false;
|
||||
}
|
||||
} // namespace __xray
|
||||
|
||||
extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
|
||||
|
||||
@@ -93,6 +93,12 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
|
||||
// FIXME: Maybe implement this better?
|
||||
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
|
||||
|
||||
bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
|
||||
// FIXME: Implement in powerpc64?
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace __xray
|
||||
|
||||
extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
|
||||
|
||||
@@ -176,9 +176,15 @@ __xray_ArgLoggerEntry:
|
||||
je .Larg1entryFail
|
||||
|
||||
.Larg1entryLog:
|
||||
movq %rdi, %rdx // first argument will become the third
|
||||
xorq %rsi, %rsi // XRayEntryType::ENTRY into the second
|
||||
movl %r10d, %edi // 32-bit function ID becomes the first
|
||||
|
||||
// First argument will become the third
|
||||
movq %rdi, %rdx
|
||||
|
||||
// XRayEntryType::ENTRY into the second
|
||||
xorq %rsi, %rsi
|
||||
|
||||
// 32-bit function ID becomes the first
|
||||
movl %r10d, %edi
|
||||
callq *%rax
|
||||
|
||||
.Larg1entryFail:
|
||||
@@ -189,4 +195,38 @@ __xray_ArgLoggerEntry:
|
||||
.size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry
|
||||
.cfi_endproc
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
.global __xray_CustomEvent
|
||||
.align 16, 0x90
|
||||
.type __xray_CustomEvent,@function
|
||||
__xray_CustomEvent:
|
||||
.cfi_startproc
|
||||
subq $16, %rsp
|
||||
.cfi_def_cfa_offset 24
|
||||
movq %rbp, 8(%rsp)
|
||||
movq %rax, 0(%rsp)
|
||||
|
||||
// We take two arguments to this trampoline, which should be in rdi and rsi
|
||||
// already. We also make sure that we stash %rax because we use that register
|
||||
// to call the logging handler.
|
||||
movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax
|
||||
testq %rax,%rax
|
||||
je .LcustomEventCleanup
|
||||
|
||||
// At this point we know that rcx and rdx already has the data, so we just
|
||||
// call the logging handler.
|
||||
callq *%rax
|
||||
|
||||
.LcustomEventCleanup:
|
||||
movq 0(%rsp), %rax
|
||||
movq 8(%rsp), %rbp
|
||||
addq $16, %rsp
|
||||
.cfi_def_cfa_offset 8
|
||||
retq
|
||||
|
||||
.Ltmp8:
|
||||
.size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent
|
||||
.cfi_endproc
|
||||
|
||||
NO_EXEC_STACK_DIRECTIVE
|
||||
|
||||
@@ -75,8 +75,10 @@ uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
|
||||
static constexpr uint8_t CallOpCode = 0xe8;
|
||||
static constexpr uint16_t MovR10Seq = 0xba41;
|
||||
static constexpr uint16_t Jmp9Seq = 0x09eb;
|
||||
static constexpr uint16_t Jmp20Seq = 0x14eb;
|
||||
static constexpr uint8_t JmpOpCode = 0xe9;
|
||||
static constexpr uint8_t RetOpCode = 0xc3;
|
||||
static constexpr uint16_t NopwSeq = 0x9066;
|
||||
|
||||
static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
|
||||
static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
|
||||
@@ -201,6 +203,40 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
|
||||
// Here we do the dance of replacing the following sled:
|
||||
//
|
||||
// xray_sled_n:
|
||||
// jmp +19 // 2 bytes
|
||||
// ...
|
||||
//
|
||||
// With the following:
|
||||
//
|
||||
// nopw // 2 bytes*
|
||||
// ...
|
||||
//
|
||||
// We need to do this in the following order:
|
||||
//
|
||||
// 1. Overwrite the 5-byte nop with the call (relative), where (relative) is
|
||||
// the relative offset to the __xray_CustomEvent trampoline.
|
||||
// 2. Do a two-byte atomic write over the 'jmp +24' to turn it into a 'nopw'.
|
||||
// This allows us to "enable" this code once the changes have committed.
|
||||
//
|
||||
// The "unpatch" should just turn the 'nopw' back to a 'jmp +24'.
|
||||
//
|
||||
if (Enable) {
|
||||
std::atomic_store_explicit(
|
||||
reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
|
||||
std::memory_order_release);
|
||||
} else {
|
||||
std::atomic_store_explicit(
|
||||
reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
|
||||
std::memory_order_release);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// We determine whether the CPU we're running on has the correct features we
|
||||
// need. In x86_64 this will be rdtscp support.
|
||||
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
// Use the clang feature for custom xray event logging.
|
||||
//
|
||||
// RUN: %clangxx_xray -std=c++11 %s -o %t
|
||||
// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s
|
||||
//
|
||||
#include <cstdio>
|
||||
#include "xray/xray_interface.h"
|
||||
|
||||
[[clang::xray_always_instrument]] void foo() {
|
||||
static constexpr char CustomLogged[] = "hello custom logging!";
|
||||
printf("before calling the custom logging...\n");
|
||||
__xray_customevent(CustomLogged, sizeof(CustomLogged));
|
||||
printf("after calling the custom logging...\n");
|
||||
}
|
||||
|
||||
void myprinter(void* ptr, size_t size) {
|
||||
printf("%.*s\n", static_cast<int>(size), static_cast<const char*>(ptr));
|
||||
}
|
||||
|
||||
int main() {
|
||||
foo();
|
||||
// CHECK: before calling the custom logging...
|
||||
// CHECK-NEXT: after calling the custom logging...
|
||||
printf("setting up custom event handler...\n");
|
||||
// CHECK-NEXT: setting up custom event handler...
|
||||
__xray_set_customevent_handler(myprinter);
|
||||
__xray_patch();
|
||||
// CHECK-NEXT: before calling the custom logging...
|
||||
foo();
|
||||
// CHECK-NEXT: hello custom logging!
|
||||
// CHECK-NEXT: after calling the custom logging...
|
||||
printf("removing custom event handler...\n");
|
||||
// CHECK-NEXT: removing custom event handler...
|
||||
__xray_set_customevent_handler(nullptr);
|
||||
foo();
|
||||
// CHECK-NEXT: before calling the custom logging...
|
||||
// CHECK-NEXT: after calling the custom logging...
|
||||
}
|
||||
Reference in New Issue
Block a user