[libunwind] Fix execution flow imbalance when using C++ Exceptions (#165066)

This commit is contained in:
Med Ismail Bennani
2025-11-11 18:31:57 -08:00
committed by GitHub
parent 3e6442a516
commit cf35502dd5
12 changed files with 396 additions and 19 deletions

View File

@@ -1832,8 +1832,9 @@ inline const char *Registers_ppc64::getRegisterName(int regNum) {
/// Registers_arm64 holds the register state of a thread in a 64-bit arm
/// process.
class _LIBUNWIND_HIDDEN Registers_arm64;
extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
extern "C" int64_t __libunwind_Registers_arm64_za_disable();
extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *,
unsigned walkedFrames);
#if defined(_LIBUNWIND_USE_GCS)
extern "C" void *__libunwind_shstk_get_jump_target() {
@@ -1861,10 +1862,17 @@ public:
v128 getVectorRegister(int num) const;
void setVectorRegister(int num, v128 value);
static const char *getRegisterName(int num);
void jumpto() {
zaDisable();
__libunwind_Registers_arm64_jumpto(this);
#ifdef _LIBUNWIND_TRACE_RET_INJECT
_LIBUNWIND_TRACE_NO_INLINE
void returnto(unsigned walkedFrames) {
__libunwind_Registers_arm64_jumpto(this, walkedFrames);
}
#else
void jumpto() {
zaDisable();
__libunwind_Registers_arm64_jumpto(this, 0);
}
#endif
static constexpr int lastDwarfRegNum() {
return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64;
}

View File

@@ -472,7 +472,9 @@ public:
virtual void getInfo(unw_proc_info_t *) {
_LIBUNWIND_ABORT("getInfo not implemented");
}
virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); }
_LIBUNWIND_TRACE_NO_INLINE virtual void jumpto() {
_LIBUNWIND_ABORT("jumpto not implemented");
}
virtual bool isSignalFrame() {
_LIBUNWIND_ABORT("isSignalFrame not implemented");
}
@@ -489,6 +491,12 @@ public:
virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); }
#endif
#ifdef _LIBUNWIND_TRACE_RET_INJECT
virtual void setWalkedFrames(unsigned) {
_LIBUNWIND_ABORT("setWalkedFrames not implemented");
}
#endif
#ifdef _AIX
virtual uintptr_t getDataRelBase() {
_LIBUNWIND_ABORT("getDataRelBase not implemented");
@@ -965,7 +973,8 @@ public:
virtual void setFloatReg(int, unw_fpreg_t);
virtual int step(bool stage2 = false);
virtual void getInfo(unw_proc_info_t *);
virtual void jumpto();
_LIBUNWIND_TRACE_NO_INLINE
virtual void jumpto();
virtual bool isSignalFrame();
virtual bool getFunctionName(char *buf, size_t len, unw_word_t *off);
virtual void setInfoBasedOnIPRegister(bool isReturnAddress = false);
@@ -974,6 +983,10 @@ public:
virtual void saveVFPAsX();
#endif
#ifdef _LIBUNWIND_TRACE_RET_INJECT
virtual void setWalkedFrames(unsigned);
#endif
#ifdef _AIX
virtual uintptr_t getDataRelBase();
#endif
@@ -1356,6 +1369,9 @@ private:
defined(_LIBUNWIND_TARGET_HAIKU)
bool _isSigReturn = false;
#endif
#ifdef _LIBUNWIND_TRACE_RET_INJECT
uint32_t _walkedFrames;
#endif
};
@@ -1410,7 +1426,46 @@ void UnwindCursor<A, R>::setFloatReg(int regNum, unw_fpreg_t value) {
}
template <typename A, typename R> void UnwindCursor<A, R>::jumpto() {
#ifdef _LIBUNWIND_TRACE_RET_INJECT
/*
The value of `_walkedFrames` is computed in `unwind_phase2` and represents the
number of frames walked starting `unwind_phase2` to get to the landing pad.
```
// uc is initialized by __unw_getcontext in the parent frame.
// The first stack frame walked is unwind_phase2.
unsigned framesWalked = 1;
```
To that, we need to add the number of function calls in libunwind between
`unwind_phase2` & `__libunwind_Registers_arm64_jumpto` which performs the long
jump, to rebalance the execution flow.
```
frame #0: libunwind.1.dylib`__libunwind_Registers_arm64_jumpto at UnwindRegistersRestore.S:646
frame #1: libunwind.1.dylib`libunwind::Registers_arm64::returnto at Registers.hpp:2291:3
frame #2: libunwind.1.dylib`libunwind::UnwindCursor<libunwind::LocalAddressSpace, libunwind::Registers_arm64>::jumpto at UnwindCursor.hpp:1474:14
frame #3: libunwind.1.dylib`__unw_resume at libunwind.cpp:375:7
frame #4: libunwind.1.dylib`__unw_resume_with_frames_walked at libunwind.cpp:363:10
frame #5: libunwind.1.dylib`unwind_phase2 at UnwindLevel1.c:328:9
frame #6: libunwind.1.dylib`_Unwind_RaiseException at UnwindLevel1.c:480:10
frame #7: libc++abi.dylib`__cxa_throw at cxa_exception.cpp:295:5
...
```
If we look at the backtrace from `__libunwind_Registers_arm64_jumpto`, we see
there are 5 frames on the stack to reach `unwind_phase2`. However, only 4 of
them will never return, since `__libunwind_Registers_arm64_jumpto` returns
back to the landing pad, so we need to subtract 1 to the number of
`_EXTRA_LIBUNWIND_FRAMES_WALKED`.
*/
static constexpr size_t _EXTRA_LIBUNWIND_FRAMES_WALKED = 5 - 1;
_registers.returnto(_walkedFrames + _EXTRA_LIBUNWIND_FRAMES_WALKED);
#else
_registers.jumpto();
#endif
}
#ifdef __arm__
@@ -1419,6 +1474,13 @@ template <typename A, typename R> void UnwindCursor<A, R>::saveVFPAsX() {
}
#endif
#ifdef _LIBUNWIND_TRACE_RET_INJECT
template <typename A, typename R>
void UnwindCursor<A, R>::setWalkedFrames(unsigned walkedFrames) {
_walkedFrames = walkedFrames;
}
#endif
#ifdef _AIX
template <typename A, typename R>
uintptr_t UnwindCursor<A, R>::getDataRelBase() {

View File

@@ -48,16 +48,15 @@
// avoided when invoking the `jumpto()` function. To do this, we use inline
// assemblies to "goto" the `jumpto()` for these architectures.
#if !defined(_LIBUNWIND_USE_CET) && !defined(_LIBUNWIND_USE_GCS)
#define __unw_phase2_resume(cursor, fn) \
#define __unw_phase2_resume(cursor, payload) \
do { \
(void)fn; \
__unw_resume((cursor)); \
__unw_resume_with_frames_walked((cursor), (payload)); \
} while (0)
#elif defined(_LIBUNWIND_TARGET_I386)
#define __shstk_step_size (4)
#define __unw_phase2_resume(cursor, fn) \
#define __unw_phase2_resume(cursor, payload) \
do { \
_LIBUNWIND_POP_SHSTK_SSP((fn)); \
_LIBUNWIND_POP_SHSTK_SSP((payload)); \
void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \
void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \
__asm__ volatile("push %%edi\n\t" \
@@ -67,9 +66,9 @@
} while (0)
#elif defined(_LIBUNWIND_TARGET_X86_64)
#define __shstk_step_size (8)
#define __unw_phase2_resume(cursor, fn) \
#define __unw_phase2_resume(cursor, payload) \
do { \
_LIBUNWIND_POP_SHSTK_SSP((fn)); \
_LIBUNWIND_POP_SHSTK_SSP((payload)); \
void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \
void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \
__asm__ volatile("jmpq *%%rdx\n\t" ::"D"(shstkRegContext), \
@@ -77,16 +76,17 @@
} while (0)
#elif defined(_LIBUNWIND_TARGET_AARCH64)
#define __shstk_step_size (8)
#define __unw_phase2_resume(cursor, fn) \
#define __unw_phase2_resume(cursor, payload) \
do { \
_LIBUNWIND_POP_SHSTK_SSP((fn)); \
_LIBUNWIND_POP_SHSTK_SSP((payload)); \
void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \
void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \
__asm__ volatile("mov x0, %0\n\t" \
"mov x1, wzr\n\t" \
"br %1\n\t" \
: \
: "r"(shstkRegContext), "r"(shstkJumpAddress) \
: "x0"); \
: "x0", "x1"); \
} while (0)
#endif
@@ -205,6 +205,8 @@ extern int __unw_step_stage2(unw_cursor_t *);
#if defined(_LIBUNWIND_USE_GCS)
// Enable the GCS target feature to permit gcspop instructions to be used.
__attribute__((target("+gcs")))
#else
_LIBUNWIND_TRACE_NO_INLINE
#endif
static _Unwind_Reason_Code
unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
@@ -349,6 +351,8 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
#if defined(_LIBUNWIND_USE_GCS)
// Enable the GCS target feature to permit gcspop instructions to be used.
__attribute__((target("+gcs")))
#else
_LIBUNWIND_TRACE_NO_INLINE
#endif
static _Unwind_Reason_Code
unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,

View File

@@ -645,13 +645,26 @@ Lnovec:
#endif
//
// extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
// extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *, unsigned);
//
// On entry:
// thread_state pointer is in x0
// walked_frames counter is in x1
//
.p2align 2
DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
#if defined(_LIBUNWIND_TRACE_RET_INJECT)
cbz w1, 1f
0:
subs w1, w1, #1
adr x16, #8
ret x16
b.ne 0b
1:
#endif
// skip restore of x0,x1 for now
ldp x2, x3, [x0, #0x010]
ldp x4, x5, [x0, #0x020]

View File

@@ -132,6 +132,10 @@
#if defined(__APPLE__)
#if defined(__aarch64__) || defined(__arm64__) || defined(__arm64e__)
#define _LIBUNWIND_TRACE_RET_INJECT 1
#endif
#define SYMBOL_IS_FUNC(name)
#define HIDDEN_SYMBOL(name) .private_extern name
#if defined(_LIBUNWIND_HIDE_SYMBOLS)

View File

@@ -28,6 +28,9 @@
#define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1
#define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
#endif
#if defined(__aarch64__) || defined(__arm64__) || defined(__arm64e__)
#define _LIBUNWIND_TRACE_RET_INJECT 1
#endif
#elif defined(_WIN32)
#ifdef __SEH__
#define _LIBUNWIND_SUPPORT_SEH_UNWIND 1
@@ -61,6 +64,12 @@
#endif
#endif
#ifdef _LIBUNWIND_TRACE_RET_INJECT
#define _LIBUNWIND_TRACE_NO_INLINE __attribute__((noinline, disable_tail_calls))
#else
#define _LIBUNWIND_TRACE_NO_INLINE
#endif
#if defined(_LIBUNWIND_HIDE_SYMBOLS)
// The CMake file passes -fvisibility=hidden to control ELF/Mach-O visibility.
#define _LIBUNWIND_EXPORT

View File

@@ -247,7 +247,27 @@ _LIBUNWIND_HIDDEN int __unw_get_proc_info(unw_cursor_t *cursor,
}
_LIBUNWIND_WEAK_ALIAS(__unw_get_proc_info, unw_get_proc_info)
/// Resume execution at cursor position (aka longjump).
/// Rebalance the execution flow by injecting the right amount of `ret`
/// instruction relatively to the amount of `walkedFrames` then resume execution
/// at cursor position (aka longjump).
_LIBUNWIND_HIDDEN int __unw_resume_with_frames_walked(unw_cursor_t *cursor,
unsigned walkedFrames) {
_LIBUNWIND_TRACE_API("__unw_resume(cursor=%p, walkedFrames=%u)",
static_cast<void *>(cursor), walkedFrames);
#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
// Inform the ASan runtime that now might be a good time to clean stuff up.
__asan_handle_no_return();
#endif
#ifdef _LIBUNWIND_TRACE_RET_INJECT
AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
co->setWalkedFrames(walkedFrames);
#endif
return __unw_resume(cursor);
}
_LIBUNWIND_WEAK_ALIAS(__unw_resume_with_frames_walked,
unw_resume_with_frames_walked)
/// Legacy function. Resume execution at cursor position (aka longjump).
_LIBUNWIND_HIDDEN int __unw_resume(unw_cursor_t *cursor) {
_LIBUNWIND_TRACE_API("__unw_resume(cursor=%p)", static_cast<void *>(cursor));
#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)

View File

@@ -30,7 +30,11 @@ extern int __unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *);
extern int __unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *);
extern int __unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t);
extern int __unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t);
extern int __unw_resume(unw_cursor_t *);
_LIBUNWIND_TRACE_NO_INLINE
extern int __unw_resume_with_frames_walked(unw_cursor_t *, unsigned);
// `__unw_resume` is a legacy function. Use `__unw_resume_with_frames_walked` instead.
_LIBUNWIND_TRACE_NO_INLINE
extern int __unw_resume(unw_cursor_t *);
#ifdef __arm__
/* Save VFP registers in FSTMX format (instead of FSTMD). */

View File

@@ -647,6 +647,31 @@ def skipIfOutOfTreeDebugserver(func):
return skipTestIfFn(is_out_of_tree_debugserver)(func)
def skipIfOutOfTreeLibunwind(func):
"""Decorate the item to skip tests if libunwind was not built in-tree."""
def is_out_of_tree_libunwind():
if not configuration.llvm_tools_dir:
return "out-of-tree libunwind"
# llvm_tools_dir is typically <build>/bin, so lib is a sibling.
llvm_lib_dir = os.path.join(
os.path.dirname(configuration.llvm_tools_dir), "lib"
)
if not os.path.isdir(llvm_lib_dir):
return "out-of-tree libunwind"
# Check for libunwind library (any extension).
for filename in os.listdir(llvm_lib_dir):
if filename.startswith("libunwind.") or filename.startswith("unwind."):
return None
return "out-of-tree libunwind"
return skipTestIfFn(is_out_of_tree_libunwind)(func)
def skipIfRemote(func):
"""Decorate the item to skip tests if testing remotely."""
return unittest.skipIf(lldb.remote_platform, "skip on remote platform")(func)

View File

@@ -0,0 +1,6 @@
CXX_SOURCES := main.cpp
# Build with C++ exceptions enabled
CXXFLAGS := -g -O0 -fexceptions
include Makefile.rules

View File

@@ -0,0 +1,177 @@
"""
Test that libunwind correctly injects 'ret' instructions to rebalance execution flow
when unwinding C++ exceptions. This is important for Apple Processor Trace analysis.
"""
import lldb
import os
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
from lldbsuite.test import lldbutil
from lldbsuite.test import configuration
class LibunwindRetInjectionTestCase(TestBase):
@skipIf(archs=no_match(["arm64", "arm64e", "aarch64"]))
@skipUnlessDarwin
@skipIfOutOfTreeLibunwind
def test_ret_injection_on_exception_unwind(self):
"""Test that __libunwind_Registers_arm64_jumpto receives correct walkedFrames count and injects the right number of ret instructions."""
self.build()
exe = self.getBuildArtifact("a.out")
target = self.dbg.CreateTarget(exe)
self.assertTrue(target, VALID_TARGET)
# Find the just-built libunwind, not the system one.
# llvm_tools_dir is typically <build>/bin, so lib is a sibling.
self.assertIsNotNone(
configuration.llvm_tools_dir,
"llvm_tools_dir must be set to find in-tree libunwind",
)
llvm_lib_dir = os.path.join(
os.path.dirname(configuration.llvm_tools_dir), "lib"
)
# Find the libunwind library (platform-agnostic).
libunwind_path = None
for filename in os.listdir(llvm_lib_dir):
if filename.startswith("libunwind.") or filename.startswith("unwind."):
libunwind_path = os.path.join(llvm_lib_dir, filename)
break
self.assertIsNotNone(
libunwind_path, f"Could not find libunwind in {llvm_lib_dir}"
)
# Set breakpoint in __libunwind_Registers_arm64_jumpto.
# This is the function that performs the actual jump and ret injection.
bp = target.BreakpointCreateByName("__libunwind_Registers_arm64_jumpto")
self.assertTrue(bp.IsValid())
self.assertGreater(bp.GetNumLocations(), 0)
# Set up DYLD_INSERT_LIBRARIES to use the just-built libunwind.
launch_info = lldb.SBLaunchInfo(None)
env = target.GetEnvironment()
env.Set("DYLD_INSERT_LIBRARIES", libunwind_path, True)
launch_info.SetEnvironment(env, False)
# Launch the process with our custom libunwind.
error = lldb.SBError()
process = target.Launch(launch_info, error)
self.assertSuccess(
error, f"Failed to launch process with libunwind at {libunwind_path}"
)
self.assertTrue(process, PROCESS_IS_VALID)
# We should hit the breakpoint in __libunwind_Registers_arm64_jumpto
# during the exception unwinding phase 2.
threads = lldbutil.get_threads_stopped_at_breakpoint(process, bp)
self.assertEqual(len(threads), 1, "Should have stopped at breakpoint")
thread = threads[0]
frame = thread.GetFrameAtIndex(0)
# Verify we're in __libunwind_Registers_arm64_jumpto.
function_name = frame.GetFunctionName()
self.assertTrue(
"__libunwind_Registers_arm64_jumpto" in function_name,
f"Expected to be in __libunwind_Registers_arm64_jumpto, got {function_name}",
)
# On ARM64, the walkedFrames parameter should be in register x1 (second parameter).
# According to the ARM64 calling convention, integer arguments are passed in x0-x7.
# x0 = Registers_arm64* pointer.
# x1 = unsigned walkedFrames.
error = lldb.SBError()
x1_value = frame.register["x1"].GetValueAsUnsigned(error)
self.assertSuccess(error, "Failed to read x1 register")
# According to the code in UnwindCursor.hpp, the walkedFrames value represents:
# 1. The number of frames walked in unwind_phase2 to reach the landing pad.
# 2. Plus _EXTRA_LIBUNWIND_FRAMES_WALKED = 5 - 1 = 4 additional libunwind frames.
#
# From the comment in the code:
# frame #0: __libunwind_Registers_arm64_jumpto
# frame #1: Registers_arm64::returnto
# frame #2: UnwindCursor::jumpto
# frame #3: __unw_resume
# frame #4: __unw_resume_with_frames_walked
# frame #5: unwind_phase2
#
# Since __libunwind_Registers_arm64_jumpto returns to the landing pad,
# we subtract 1, so _EXTRA_LIBUNWIND_FRAMES_WALKED = 4.
#
# For our test program:
# - unwind_phase2 starts walking (frame 0 counted here).
# - Walks through: func_d (throw site), func_c, func_b, func_a.
# - Finds landing pad in main.
# That's approximately 4-5 frames from the user code.
# Plus the 4 extra libunwind frames.
#
# So we expect x1 to be roughly 8-10.
expected_min_frames = 8
expected_max_frames = 13 # Allow some variation for libc++abi frames.
self.assertGreaterEqual(
x1_value,
expected_min_frames,
f"walkedFrames (x1) should be >= {expected_min_frames}, got {x1_value}. "
"This is the number of 'ret' instructions that will be executed.",
)
self.assertLessEqual(
x1_value,
expected_max_frames,
f"walkedFrames (x1) should be <= {expected_max_frames}, got {x1_value}. "
"Value seems too high.",
)
# Now step through the ret injection loop and count the actual number of 'ret' executions.
# The loop injects exactly x1_value ret instructions before continuing with register restoration.
# We step until we hit the first 'ldp' instruction (register restoration starts with 'ldp x2, x3, [x0, #0x010]').
ret_executed_count = 0
max_steps = 100 # Safety limit to prevent infinite loops.
for step_count in range(max_steps):
# Get current instruction.
pc = frame.GetPC()
inst = process.ReadMemory(pc, 4, lldb.SBError())
# Disassemble current instruction.
current_inst = target.GetInstructions(lldb.SBAddress(pc, target), inst)[0]
mnemonic = current_inst.GetMnemonic(target)
operands = current_inst.GetOperands(target)
# Check if we've reached the register restoration part (first ldp after the loop).
if mnemonic == "ldp":
# We've exited the ret injection loop.
break
# Count 'ret' instructions that get executed.
if mnemonic == "ret":
self.assertEqual(operands, "x16")
ret_executed_count += 1
# Step one instruction.
thread.StepInstruction(False) # False = step over.
# Update frame reference.
frame = thread.GetFrameAtIndex(0)
# Verify we didn't hit the safety limit.
self.assertLess(
step_count,
max_steps - 1,
f"Stepped {max_steps} times without reaching 'ldp' instruction. Something is wrong.",
)
# The number of executed 'ret' instructions should match x1_value.
# According to the implementation, the loop executes exactly x1_value times.
self.assertEqual(
ret_executed_count,
x1_value,
f"Expected {x1_value} 'ret' instructions to be executed (matching x1 register), "
f"but counted {ret_executed_count} executed 'ret' instructions.",
)

View File

@@ -0,0 +1,45 @@
// Test program to verify libunwind ret injection feature for execution flow
// rebalancing.
//
// This test creates a multi-frame call stack and throws a C++ exception to
// trigger libunwind's two-phase exception handling. The test verifies that
// libunwind correctly injects the right amount of 'ret' instructions to
// rebalance the execution flow when returning to the landing pad, which is
// important for Apple Processor Trace analysis.
#include <cstdio>
#include <exception>
#include <stdexcept>
// Marker functions with noinline to ensure they appear in the stack.
static void __attribute__((noinline)) func_d() {
printf("In func_d, about to throw exception\n");
throw std::runtime_error("test exception");
}
static void __attribute__((noinline)) func_c() {
printf("In func_c\n");
func_d();
}
static void __attribute__((noinline)) func_b() {
printf("In func_b\n");
func_c();
}
static void __attribute__((noinline)) func_a() {
printf("In func_a\n");
func_b();
}
int main(int argc, char *argv[]) {
try {
printf("In main, about to call func_a\n");
func_a();
printf("ERROR: Should not reach here\n");
return 1;
} catch (const std::exception &e) {
printf("Caught exception in main: %s\n", e.what());
return 0;
}
}