[BOLT][AArch64] Validate code padding (#164037)

Check whether AArch64 function code padding is valid,
and add an option to treat invalid code padding as error.
This commit is contained in:
YongKang Zhu
2025-10-22 20:25:06 -07:00
committed by GitHub
parent 6f147445f7
commit e1ae126401
7 changed files with 142 additions and 30 deletions

View File

@@ -2142,8 +2142,9 @@ public:
}
/// Detects whether \p Address is inside a data region in this function
/// (constant islands).
bool isInConstantIsland(uint64_t Address) const {
/// (constant islands), and optionally return the island size starting
/// from the given \p Address.
bool isInConstantIsland(uint64_t Address, uint64_t *Size = nullptr) const {
if (!Islands)
return false;
@@ -2161,10 +2162,15 @@ public:
DataIter = std::prev(DataIter);
auto CodeIter = Islands->CodeOffsets.upper_bound(Offset);
if (CodeIter == Islands->CodeOffsets.begin())
if (CodeIter == Islands->CodeOffsets.begin() ||
*std::prev(CodeIter) <= *DataIter) {
if (Size)
*Size = (CodeIter == Islands->CodeOffsets.end() ? getMaxSize()
: *CodeIter) -
Offset;
return true;
return *std::prev(CodeIter) <= *DataIter;
}
return false;
}
uint16_t getConstantIslandAlignment() const;

View File

@@ -77,6 +77,11 @@ cl::opt<std::string> CompDirOverride(
"location, which is used with DW_AT_dwo_name to construct a path "
"to *.dwo files."),
cl::Hidden, cl::init(""), cl::cat(BoltCategory));
static cl::opt<bool>
FailOnInvalidPadding("fail-on-invalid-padding", cl::Hidden, cl::init(false),
cl::desc("treat invalid code padding as error"),
cl::ZeroOrMore, cl::cat(BoltCategory));
} // namespace opts
namespace llvm {
@@ -942,8 +947,7 @@ std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
}
bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
// FIXME: aarch64 support is missing.
if (!isX86())
if (!isX86() && !isAArch64())
return true;
if (BF.getSize() == BF.getMaxSize())
@@ -973,14 +977,26 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
return Offset - StartOffset;
};
// Skip a sequence of zero bytes.
// Skip a sequence of zero bytes. For AArch64 we only skip 4 bytes of zeros
// in case the following zeros belong to constant island or veneer.
auto skipZeros = [&]() {
const uint64_t StartOffset = Offset;
for (; Offset < BF.getMaxSize(); ++Offset)
if ((*FunctionData)[Offset] != 0)
uint64_t CurrentOffset = Offset;
for (; CurrentOffset < BF.getMaxSize() &&
(!isAArch64() || CurrentOffset < StartOffset + 4);
++CurrentOffset)
if ((*FunctionData)[CurrentOffset] != 0)
break;
return Offset - StartOffset;
uint64_t NumZeros = CurrentOffset - StartOffset;
if (isAArch64())
NumZeros &= ~((uint64_t)0x3);
if (NumZeros == 0)
return false;
Offset += NumZeros;
InstrAddress += NumZeros;
return true;
};
// Accept the whole padding area filled with breakpoints.
@@ -993,6 +1009,8 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
// Some functions have a jump to the next function or to the padding area
// inserted after the body.
auto isSkipJump = [&](const MCInst &Instr) {
if (!isX86())
return false;
uint64_t TargetAddress = 0;
if (MIB->isUnconditionalBranch(Instr) &&
MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
@@ -1004,34 +1022,73 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
return false;
};
// For veneers that are not already covered by binary functions, only those
// that handleAArch64Veneer() can recognize are checked here.
auto skipAArch64Veneer = [&]() {
if (!isAArch64() || Offset >= BF.getMaxSize())
return false;
BinaryFunction *BFVeneer = getBinaryFunctionContainingAddress(InstrAddress);
if (BFVeneer) {
// A binary function may have been created to point to this veneer.
Offset += BFVeneer->getSize();
assert(Offset <= BF.getMaxSize() &&
"AArch64 veneeer goes past the max size of function");
InstrAddress += BFVeneer->getSize();
return true;
}
const uint64_t AArch64VeneerSize = 12;
if (Offset + AArch64VeneerSize <= BF.getMaxSize() &&
handleAArch64Veneer(InstrAddress, /*MatchOnly*/ true)) {
Offset += AArch64VeneerSize;
InstrAddress += AArch64VeneerSize;
this->errs() << "BOLT-WARNING: found unmarked AArch64 veneer at 0x"
<< Twine::utohexstr(BF.getAddress() + Offset) << '\n';
return true;
}
return false;
};
auto skipAArch64ConstantIsland = [&]() {
if (!isAArch64() || Offset >= BF.getMaxSize())
return false;
uint64_t Size;
if (BF.isInConstantIsland(InstrAddress, &Size)) {
Offset += Size;
InstrAddress += Size;
return true;
}
return false;
};
// Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
// For AArch64 also check veneers and skip constant islands.
while (skipAArch64Veneer() || skipAArch64ConstantIsland() ||
skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
skipZeros())
;
if (Offset == BF.getMaxSize())
return true;
if (opts::Verbosity >= 1) {
this->errs() << "BOLT-WARNING: bad padding at address 0x"
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
<< " starting at offset " << (Offset - BF.getSize())
<< " in function " << BF << '\n'
<< FunctionData->slice(BF.getSize(),
BF.getMaxSize() - BF.getSize())
<< '\n';
}
this->errs() << "BOLT-WARNING: bad padding at address 0x"
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
<< " starting at offset " << (Offset - BF.getSize())
<< " in function " << BF << '\n'
<< FunctionData->slice(BF.getSize(),
BF.getMaxSize() - BF.getSize())
<< '\n';
return false;
}
void BinaryContext::adjustCodePadding() {
uint64_t NumInvalid = 0;
for (auto &BFI : BinaryFunctions) {
BinaryFunction &BF = BFI.second;
if (!shouldEmit(BF))
continue;
if (!hasValidCodePadding(BF)) {
NumInvalid++;
if (HasRelocations) {
this->errs() << "BOLT-WARNING: function " << BF
<< " has invalid padding. Ignoring the function\n";
@@ -1041,6 +1098,11 @@ void BinaryContext::adjustCodePadding() {
}
}
}
if (NumInvalid && opts::FailOnInvalidPadding) {
this->errs() << "BOLT-ERROR: found " << NumInvalid
<< " instance(s) of invalid code padding\n";
exit(1);
}
}
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,

View File

@@ -1427,10 +1427,9 @@ Error BinaryFunction::disassemble() {
!(BC.isAArch64() &&
BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) {
// Result of __builtin_unreachable().
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr)
<< " in function " << *this
<< " : replacing with nop.\n");
errs() << "BOLT-WARNING: jump past end detected at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr) << " in function "
<< *this << " : replacing with nop.\n";
BC.MIB->createNoop(Instruction);
if (IsCondBranch) {
// Register branch offset for profile validation.

View File

@@ -157,6 +157,10 @@ public:
MCPhysReg getStackPointer() const override { return AArch64::SP; }
MCPhysReg getFramePointer() const override { return AArch64::FP; }
bool isBreakpoint(const MCInst &Inst) const override {
return Inst.getOpcode() == AArch64::BRK;
}
bool isPush(const MCInst &Inst) const override {
return isStoreToStack(Inst);
};
@@ -2153,7 +2157,7 @@ public:
--I;
Address -= 4;
if (I->getOpcode() != AArch64::ADRP ||
if (I != Begin || I->getOpcode() != AArch64::ADRP ||
MCPlus::getNumPrimeOperands(*I) < 2 || !I->getOperand(0).isReg() ||
!I->getOperand(1).isImm() || I->getOperand(0).getReg() != AArch64::X16)
return 0;

View File

@@ -93,7 +93,7 @@ Sections:
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
Address: 0x400510
AddressAlign: 0x8
Content: 1D0080D21E0080D2E50300AAE10340F9E2230091E603009100000090002015910300009063201B910400009084201D91E0FFFF97EBFFFF972F0000148000009000F047F9400000B4E2FFFF17C0035FD6800000B000000191810000B0210001913F0000EBC00000540100009021B443F9610000B4F00301AA00021FD6C0035FD6800000B000000191810000B021000191210000CB22FC7FD3410C818BFF0781EB21FC4193C00000540200009042B843F9620000B4F00302AA00021FD6C0035FD6FD7BBEA9FD030091F30B00F9930000B06002413980000035DEFFFF972000805260020139F30B40F9FD7BC2A8C0035FD6E4FFFF17FF8300D1FD7B01A9FD430091BFC31FB8E1230091E8DD9752A8D5BB72E80300B9E80B00B9E0130091FF0700B9880000B00900009029C11291092500F9082540F9820080D200013FD6E90340B9E80740B90801096BA00000540100001428008052A8C31FB814000014880000B00900009029411391092900F9082940F9E0230091E1031F2A820080D200013FD6E80B40B9A80000340100001428008052A8C31FB8050000140000009000E01D9194FFFF9701000014A0C35FB8FD7B41A9FF830091C0035FD6FD7BBCA9FD030091F35301A99400009094023891F55B02A995000090B5E23791940215CBF603002AF76303A9F70301AAF80302AA5DFFFF97FF0F94EB6001005494FE4393130080D2A37A73F8E20318AA73060091E10317AAE003162A60003FD69F0213EB21FFFF54F35341A9F55B42A9F76343A9FD7BC4A8C0035FD61F2003D5C0035FD6
Content: 1D0080D21E0080D2E50300AAE10340F9E2230091E603009100000090002015910300009063201B910400009084201D91E0FFFF97EBFFFF972F0000148000009000F047F9400000B4E2FFFF17C0035FD6800000B000000191810000B0210001913F0000EBC00000540100009021B443F9610000B4F00301AA00021FD6C0035FD6800000B000000191810000B021000191210000CB22FC7FD3410C818BFF0781EB21FC4193C00000540200009042B843F9620000B4F00302AA00021FD6C0035FD6FD7BBEA9FD030091F30B00F9930000B06002413980000035DEFFFF972000805260020139F30B40F9FD7BC2A8C0035FD6E4FFFF17FF8300D1FD7B01A9FD430091BFC31FB8E1230091E8DD9752A8D5BB72E80300B9E80B00B9E0130091FF0700B9880000B00900009029C11291092500F9082540F9820080D200013FD6E90340B9E80740B90801096BA00000540100001428008052A8C31FB814000014880000B00900009029411391092900F9082940F9E0230091E1031F2A820080D200013FD6E80B40B9A80000340100001428008052A8C31FB8050000140000009000E01D9194FFFF9701000014A0C35FB8FD7B41A9FF830091C0035FD6FD7BBCA9FD030091F35301A99400009094023891F55B02A995000090B5E23791940215CBF603002AF76303A9F70301AAF80302AA60003FD6FF0F94EB6001005494FE4393130080D2A37A73F8E20318AA73060091E10317AAE003162A60003FD69F0213EB21FFFF54F35341A9F55B42A9F76343A9FD7BC4A8C0035FD61F2003D5C0035FD6
- Name: .rodata
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC ]
@@ -435,6 +435,12 @@ Symbols:
Binding: STB_GLOBAL
Value: 0x400604
Size: 0xC4
- Name: post_main
Type: STT_FUNC
Section: .text
Binding: STB_GLOBAL
Value: 0x4006C8
Size: 0x7C
- Name: 'printf@@GLIBC_2.17'
Type: STT_FUNC
Binding: STB_GLOBAL

View File

@@ -0,0 +1,34 @@
# RUN: %clang %cflags %s -o %t.so -Wl,-q
# RUN: not llvm-bolt %t.so -o %t.bolt --fail-on-invalid-padding \
# RUN: 2>&1 | FileCheck %s
# CHECK: BOLT-ERROR: found 1 instance(s) of invalid code padding
.text
.align 2
.global foo
.type foo, %function
foo:
cmp x0, x1
b.eq .Ltmp1
adrp x1, jmptbl
add x1, x1, :lo12:jmptbl
ldrsw x2, [x1, x2, lsl #2]
br x2
b .Ltmp1
.Ltmp2:
add x0, x0, x1
ret
.size foo, .-foo
.Ltmp1:
add x0, x0, x1
b .Ltmp2
# Dummy relocation to force relocation mode
.reloc 0, R_AARCH64_NONE
.section .rodata, "a"
.align 2
.global jmptbl
jmptbl:
.word .text+0x28 - .

View File

@@ -1,7 +1,8 @@
// This test checks .plt.got handling by BOLT
RUN: yaml2obj %p/Inputs/plt-got.yaml &> %t.exe
RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm --print-only=_start/1 | \
RUN: FileCheck %s
RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm --print-only=_start/1 \
RUN: 2>&1 | FileCheck %s
CHECK: bl abort@PLT
CHECK: BOLT-WARNING: found unmarked AArch64 veneer