[BOLT] Support PIC-style exception tables

Summary:
Exceptions tables for PIC may contain indirect type references
that are also encoded using relative addresses.

This diff adds support for such encodings. We read PIC-style
type info table, and write it using new encoding.

(cherry picked from FBD5716060)
This commit is contained in:
Maksim Panchenko
2017-08-27 17:04:06 -07:00
parent 49d1f5698d
commit bd8e4b9e87
5 changed files with 151 additions and 29 deletions

View File

@@ -387,6 +387,21 @@ ErrorOr<SectionRef> BinaryContext::getSectionForAddress(uint64_t Address) const{
return std::make_error_code(std::errc::bad_address);
}
ErrorOr<uint64_t>
BinaryContext::extractPointerAtAddress(uint64_t Address) const {
auto Section = getSectionForAddress(Address);
if (!Section)
return Section.getError();
StringRef SectionContents;
Section->getContents(SectionContents);
DataExtractor DE(SectionContents,
AsmInfo->isLittleEndian(),
AsmInfo->getPointerSize());
uint32_t SectionOffset = Address - Section->getAddress();
return DE.getAddress(&SectionOffset);
}
void BinaryContext::addSectionRelocation(SectionRef Section, uint64_t Offset,
MCSymbol *Symbol, uint64_t Type,
uint64_t Addend) {

View File

@@ -207,6 +207,11 @@ public:
/// Return (allocatable) section containing the given \p Address.
ErrorOr<SectionRef> getSectionForAddress(uint64_t Address) const;
/// Given \p Address in the binary, extract and return a pointer value at that
/// address. The address has to be a valid statically allocated address for
/// the binary.
ErrorOr<uint64_t> extractPointerAtAddress(uint64_t Address) const;
/// Register a symbol with \p Name at a given \p Address.
MCSymbol *registerNameAtAddress(const std::string &Name, uint64_t Address) {
// Check if the Name was already registered.

View File

@@ -548,7 +548,8 @@ private:
/// Binary blobs reprsenting action, type, and type index tables for this
/// function' LSDA (exception handling).
ArrayRef<uint8_t> LSDAActionAndTypeTables;
ArrayRef<uint8_t> LSDAActionTable;
std::vector<uint64_t> LSDATypeTable;
ArrayRef<uint8_t> LSDATypeIndexTable;
/// Marking for the beginning of language-specific data area for the function.

View File

@@ -53,6 +53,28 @@ PrintExceptions("print-exceptions",
namespace llvm {
namespace bolt {
namespace {
unsigned getEncodingSize(unsigned Encoding, BinaryContext &BC) {
switch (Encoding & 0x0f) {
default: llvm_unreachable("unknown encoding");
case dwarf::DW_EH_PE_absptr:
case dwarf::DW_EH_PE_signed:
return BC.AsmInfo->getPointerSize();
case dwarf::DW_EH_PE_udata2:
case dwarf::DW_EH_PE_sdata2:
return 2;
case dwarf::DW_EH_PE_udata4:
case dwarf::DW_EH_PE_sdata4:
return 4;
case dwarf::DW_EH_PE_udata8:
case dwarf::DW_EH_PE_sdata8:
return 8;
}
}
} // anonymous namespace
// Read and dump the .gcc_exception_table section entry.
//
// .gcc_except_table section contains a set of Language-Specific Data Areas -
@@ -88,19 +110,20 @@ namespace bolt {
// these tables is encoded in LSDA header. Sizes for both of the tables are not
// included anywhere.
//
// For the purpose of rewriting exception handling tables, we can reuse action,
// types, and type index tables in their original binary format.
// This is only possible when type references are encoded as absolute addresses.
// We still have to parse all the tables to determine their sizes. Then we have
// We have to parse all of the tables to determine their sizes. Then we have
// to parse the call site table and associate discovered information with
// actual call instructions and landing pad blocks.
//
// For the purpose of rewriting exception handling tables, we can reuse action,
// and type index tables in their original binary format.
//
// Type table could be encoded using position-independent references, and thus
// may require relocation.
//
// Ideally we should be able to re-write LSDA in-place, without the need to
// allocate a new space for it. Sadly there's no guarantee that the new call
// site table will be the same size as GCC uses uleb encodings for PC offsets.
//
// For split function re-writing we would need to split LSDA too.
//
// Note: some functions have LSDA entries with 0 call site entries.
void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
uint64_t LSDASectionAddress) {
@@ -112,29 +135,37 @@ void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
assert(getLSDAAddress() < LSDASectionAddress + LSDASectionData.size() &&
"wrong LSDA address");
// Given an address in memory corresponding to some entity in mapped
// LSDA section return address of this entity in a binary file.
auto getFileAddress = [&](const uint8_t *InMemAddress) {
return InMemAddress - LSDASectionData.data() + LSDASectionAddress;
};
const uint8_t *Ptr =
LSDASectionData.data() + getLSDAAddress() - LSDASectionAddress;
uint8_t LPStartEncoding = *Ptr++;
uintptr_t LPStart = 0;
if (LPStartEncoding != DW_EH_PE_omit) {
LPStart = readEncodedPointer(Ptr, LPStartEncoding);
LPStart = readEncodedPointer(Ptr, LPStartEncoding, getFileAddress(Ptr));
}
assert(LPStart == 0 && "support for split functions not implemented");
uint8_t TTypeEncoding = *Ptr++;
const auto TTypeEncoding = *Ptr++;
size_t TTypeEncodingSize = 0;
uintptr_t TTypeEnd = 0;
if (TTypeEncoding != DW_EH_PE_omit) {
TTypeEnd = readULEB128(Ptr);
TTypeEncodingSize = getEncodingSize(TTypeEncoding, BC);
}
if (opts::PrintExceptions) {
outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
<< " for function " << *this << "]:\n";
outs() << "LPStart Encoding = " << (unsigned)LPStartEncoding << '\n';
outs() << "LPStart Encoding = 0x"
<< Twine::utohexstr(LPStartEncoding) << '\n';
outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
outs() << "TType Encoding = " << (unsigned)TTypeEncoding << '\n';
outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n';
outs() << "TType End = " << TTypeEnd << '\n';
}
@@ -144,9 +175,12 @@ void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
// Offset past the last decoded index.
intptr_t MaxTypeIndexTableOffset = 0;
// Max positive index used in type table.
unsigned MaxTypeIndex = 0;
// The actual type info table starts at the same location, but grows in
// opposite direction. TTypeEncoding is used to encode stored values.
auto TypeTableStart = reinterpret_cast<const uint32_t *>(Ptr + TTypeEnd);
const auto TypeTableStart = Ptr + TTypeEnd;
uint8_t CallSiteEncoding = *Ptr++;
uint32_t CallSiteTableLength = readULEB128(Ptr);
@@ -164,9 +198,12 @@ void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
HasEHRanges = CallSitePtr < CallSiteTableEnd;
uint64_t RangeBase = getAddress();
while (CallSitePtr < CallSiteTableEnd) {
uintptr_t Start = readEncodedPointer(CallSitePtr, CallSiteEncoding);
uintptr_t Length = readEncodedPointer(CallSitePtr, CallSiteEncoding);
uintptr_t LandingPad = readEncodedPointer(CallSitePtr, CallSiteEncoding);
uintptr_t Start = readEncodedPointer(CallSitePtr, CallSiteEncoding,
getFileAddress(CallSitePtr));
uintptr_t Length = readEncodedPointer(CallSitePtr, CallSiteEncoding,
getFileAddress(CallSitePtr));
uintptr_t LandingPad = readEncodedPointer(CallSitePtr, CallSiteEncoding,
getFileAddress(CallSitePtr));
uintptr_t ActionEntry = readULEB128(CallSitePtr);
if (opts::PrintExceptions) {
@@ -220,13 +257,24 @@ void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
if (ActionEntry != 0) {
auto printType = [&] (int Index, raw_ostream &OS) {
assert(Index > 0 && "only positive indices are valid");
assert(TTypeEncoding == DW_EH_PE_udata4 &&
"only udata4 supported for TTypeEncoding");
auto TypeAddress = *(TypeTableStart - Index);
const uint8_t *TTEntry = TypeTableStart - Index * TTypeEncodingSize;
const auto TTEntryAddress = getFileAddress(TTEntry);
auto TypeAddress = readEncodedPointer(TTEntry,
TTypeEncoding,
TTEntryAddress);
if ((TTypeEncoding & DW_EH_PE_pcrel) &&
(TypeAddress == TTEntryAddress)) {
TypeAddress = 0;
}
if (TypeAddress == 0) {
OS << "<all>";
return;
}
if (TTypeEncoding & DW_EH_PE_indirect) {
auto PointerOrErr = BC.extractPointerAtAddress(TypeAddress);
assert(PointerOrErr && "failed to decode indirect address");
TypeAddress = *PointerOrErr;
}
auto NI = BC.GlobalAddresses.find(TypeAddress);
if (NI != BC.GlobalAddresses.end()) {
OS << NI->second;
@@ -251,6 +299,8 @@ void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
outs() << "cleanup";
} else if (ActionType > 0) {
// It's an index into a type table.
MaxTypeIndex = std::max(MaxTypeIndex,
static_cast<unsigned>(ActionType));
if (opts::PrintExceptions) {
outs() << "catch type ";
printType(ActionType, outs());
@@ -265,6 +315,7 @@ void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
// encoded using uleb128 thus we cannot directly dereference them.
auto TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
while (auto Index = readULEB128(TypeIndexTablePtr)) {
MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
if (opts::PrintExceptions) {
outs() << TSep;
printType(Index, outs());
@@ -293,9 +344,27 @@ void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
if (TTypeEnd) {
// TypeIndexTableStart is a <uint8_t *> alias for TypeTableStart.
LSDAActionAndTypeTables =
ArrayRef<uint8_t>(ActionTableStart,
TypeIndexTableStart - ActionTableStart);
LSDAActionTable =
ArrayRef<uint8_t>(ActionTableStart, TypeIndexTableStart -
MaxTypeIndex * TTypeEncodingSize - ActionTableStart);
for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
const uint8_t *TTEntry = TypeTableStart - Index * TTypeEncodingSize;
const auto TTEntryAddress = getFileAddress(TTEntry);
auto TypeAddress = readEncodedPointer(TTEntry,
TTypeEncoding,
TTEntryAddress);
if ((TTypeEncoding & DW_EH_PE_pcrel) &&
(TypeAddress == TTEntryAddress)) {
TypeAddress = 0;
}
if (TypeAddress &&
(TTypeEncoding & DW_EH_PE_indirect)) {
auto PointerOrErr = BC.extractPointerAtAddress(TypeAddress);
assert(PointerOrErr && "failed to decode indirect address");
TypeAddress = *PointerOrErr;
}
LSDATypeTable.emplace_back(TypeAddress);
}
LSDATypeIndexTable =
ArrayRef<uint8_t>(TypeIndexTableStart, MaxTypeIndexTableOffset);
}
@@ -446,8 +515,8 @@ void BinaryFunction::emitLSDA(MCStreamer *Streamer, bool EmitColdPart) {
Streamer->SwitchSection(BC.MOFI->getLSDASection());
// When we read we make sure only the following encoding is supported.
constexpr unsigned TTypeEncoding = dwarf::DW_EH_PE_udata4;
const auto TTypeEncoding = BC.MOFI->getTTypeEncoding();
const auto TTypeEncodingSize = getEncodingSize(TTypeEncoding, BC);
// Type tables have to be aligned at 4 bytes.
Streamer->EmitValueToAlignment(4);
@@ -470,7 +539,8 @@ void BinaryFunction::emitLSDA(MCStreamer *Streamer, bool EmitColdPart) {
sizeof(int8_t) + // Call site format
CallSiteTableLengthSize + // Call site table length size
CallSiteTableLength + // Call site table length
LSDAActionAndTypeTables.size(); // Actions + Types size
LSDAActionTable.size() + // Actions table size
LSDATypeTable.size() * TTypeEncodingSize; // Types table size
unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset);
unsigned TotalSize =
sizeof(int8_t) + // LPStart format
@@ -514,12 +584,43 @@ void BinaryFunction::emitLSDA(MCStreamer *Streamer, bool EmitColdPart) {
// Write out action, type, and type index tables at the end.
//
// There's no need to change the original format we saw on input
// unless we are doing a function splitting in which case we can
// perhaps split and optimize the tables.
for (auto const &Byte : LSDAActionAndTypeTables) {
// For action and type index tables there's no need to change the original
// table format unless we are doing function splitting, in which case we can
// split and optimize the tables.
//
// For type table we (re-)encode the table using TTypeEncoding matching
// the current assembler mode.
for (auto const &Byte : LSDAActionTable) {
Streamer->EmitIntValue(Byte, 1);
}
assert(!(TTypeEncoding & dwarf::DW_EH_PE_indirect) &&
"indirect type info encoding is not supported yet");
for (int Index = LSDATypeTable.size() - 1; Index >= 0; --Index) {
// Note: the address could be an indirect one.
const auto TypeAddress = LSDATypeTable[Index];
switch (TTypeEncoding & 0x70) {
default:
llvm_unreachable("unsupported TTypeEncoding");
case 0:
Streamer->EmitIntValue(TypeAddress, TTypeEncodingSize);
break;
case dwarf::DW_EH_PE_pcrel: {
if (TypeAddress) {
const auto *TypeSymbol = BC.getOrCreateGlobalSymbol(TypeAddress, "TI");
auto *DotSymbol = BC.Ctx->createTempSymbol();
Streamer->EmitLabel(DotSymbol);
const auto *SubDotExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(TypeSymbol, *BC.Ctx),
MCSymbolRefExpr::create(DotSymbol, *BC.Ctx),
*BC.Ctx);
Streamer->EmitValue(SubDotExpr, TTypeEncodingSize);
} else {
Streamer->EmitIntValue(0, TTypeEncodingSize);
}
break;
}
}
}
for (auto const &Byte : LSDATypeIndexTable) {
Streamer->EmitIntValue(Byte, 1);
}

View File

@@ -559,7 +559,7 @@ createBinaryContext(ELFObjectFileBase *File, DataReader &DR,
std::unique_ptr<MCContext> Ctx =
llvm::make_unique<MCContext>(AsmInfo.get(), MRI.get(), MOFI.get());
MOFI->InitMCObjectFileInfo(*TheTriple, Reloc::Default,
CodeModel::Default, *Ctx);
CodeModel::Small, *Ctx);
std::unique_ptr<MCDisassembler> DisAsm(
TheTarget->createMCDisassembler(*STI, *Ctx));