[BOLT] Validate secondary entry point (#135731)

Some functions have their sizes as zero in input binary's symbol
table, like those compiled by assembler. When figuring out function
sizes, we may create label symbol if it doesn't point to any constant
island. However, before function size is known, marker symbol can
not be correctly associated to a function and therefore all such
checks would fail and we could end up adding a code label pointing
to constant island as secondary entry point and later mistakenly
marking the function as not simple.

Querying the global marker symbol array has big throughput overhead.
Instead we can run an extra check when post processing entry points
to identify such label symbols that actually point to constant islands.
This commit is contained in:
YongKang Zhu
2025-04-15 13:19:15 -07:00
committed by GitHub
parent 2271f0bebd
commit 823adc7a2d
4 changed files with 86 additions and 0 deletions

View File

@@ -1174,6 +1174,11 @@ public:
return getSecondaryEntryPointSymbol(BB.getLabel());
}
/// Remove a label from the secondary entry point map.
void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) {
SecondaryEntryPoints.erase(Label);
}
/// Return true if the basic block is an entry point into the function
/// (either primary or secondary).
bool isEntryPoint(const BinaryBasicBlock &BB) const {
@@ -2126,6 +2131,10 @@ public:
return Islands && !Islands->DataOffsets.empty();
}
bool isStartOfConstantIsland(uint64_t Offset) const {
return hasConstantIsland() && Islands->DataOffsets.count(Offset);
}
/// Return true iff the symbol could be seen inside this function otherwise
/// it is probably another function.
bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;

View File

@@ -1896,6 +1896,15 @@ void BinaryFunction::postProcessEntryPoints() {
if (BC.isAArch64() && Offset == getSize())
continue;
// If we have grabbed a wrong code label which actually points to some
// constant island inside the function, ignore this label and remove it
// from the secondary entry point map.
if (isStartOfConstantIsland(Offset)) {
BC.SymbolToFunctionMap.erase(Label);
removeSymbolFromSecondaryEntryPointMap(Label);
continue;
}
BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
"detected in function "
<< *this << " at offset 0x" << Twine::utohexstr(Offset) << '\n';

View File

@@ -0,0 +1,34 @@
# This test is to verify that BOLT won't take a label pointing to constant
# island as a secondary entry point (function `_start` doesn't have ELF size
# set originally) and the function won't otherwise be mistaken as non-simple.
# RUN: %clang %cflags -pie %s -o %t.so -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo
# RUN: llvm-bolt %t.so -o %t.bolt.so --print-cfg 2>&1 | FileCheck %s
# CHECK-NOT: BOLT-WARNING: reference in the middle of instruction detected \
# CHECK-NOT: function _start at offset 0x{{[0-9a-f]+}}
# CHECK: Binary Function "_start" after building cfg
.text
.global _foo
.type _foo, %function
_foo:
ret
.global _start
.type _start, %function
_start:
b _foo
.balign 16
_random_consts:
.long 0x12345678
.long 0x90abcdef
.global _bar
.type _bar, %function
_bar:
ret
# Dummy relocation to force relocation mode
.reloc 0, R_AARCH64_NONE

View File

@@ -0,0 +1,34 @@
# This test is to verify that BOLT won't take a label pointing to constant
# island as a secondary entry point (function `_start` doesn't have ELF size
# set originally) and the function won't otherwise be mistaken as non-simple.
# RUN: %clang %cflags -pie %s -o %t.so -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo
# RUN: llvm-bolt %t.so -o %t.bolt.so --print-cfg 2>&1 | FileCheck %s
# CHECK-NOT: BOLT-WARNING: reference in the middle of instruction detected \
# CHECK-NOT: function _start at offset 0x{{[0-9a-f]+}}
# CHECK: Binary Function "_start" after building cfg
.text
.global _foo
.type _foo, %function
_foo:
ret
.global _start
.type _start, %function
_start:
j _foo
.balign 16
_random_consts:
.long 0x12345678
.long 0x90abcdef
.global _bar
.type _bar, %function
_bar:
ret
# Dummy relocation to force relocation mode
.reloc 0, R_RISCV_NONE