bolt/Exceptions.cpp

//===-- Exceptions.cpp - Helpers for processing C++ exceptions ------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Some of the code is taken from examples/ExceptionDemo
//
//===----------------------------------------------------------------------===//

#include "Exceptions.h"
#include "BinaryFunction.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"

#undef  DEBUG_TYPE
#define DEBUG_TYPE "flo-exceptions"

STATISTIC(NumLSDAs, "Number of all LSDAs");
STATISTIC(NumTrivialLSDAs,
          "Number of LSDAs with single call site without landing pad or action");

using namespace llvm::dwarf;

namespace llvm {
namespace flo {

namespace opts {

static cl::opt<bool>
PrintExceptions("print-exceptions",
                cl::desc("print exception handling data"),
                cl::Hidden);

} // namespace opts

// readLSDA is reading and dumping the whole .gcc_exception_table section
// at once.
//
// .gcc_except_table section contains a set of Language-Specific Data Areas
// which are basically exception handling tables. One LSDA per function.
// One important observation - you can't actually tell which function LSDA
// refers to, and most addresses are relative to the function start. So you
// have to start with parsing .eh_frame entries that refers to LSDA to obtain
// a function context.
//
// The best visual representation of the tables comprising LSDA and relationship
// between them is illustrated at:
//   http://mentorembedded.github.io/cxx-abi/exceptions.pdf
// Keep in mind that GCC implementation deviates slightly from that document.
//
// To summarize, there are 4 tables in LSDA: call site table, actions table,
// types table, and types index table (indirection). The main table contains
// call site entries. Each call site includes a range that can throw an exception,
// a handler (landing pad), and a reference to an entry in the action table.
// A handler and/or action could be 0. An action entry is in fact a head
// of a list of actions associated with a call site and an action table contains
// all such lists (it could be optimize to share list tails). Each action could be
// either to catch an exception of a given type, to perform a cleanup, or to
// propagate an exception after filtering it out (e.g. to make sure function
// exception specification is not violated). Catch action contains a reference
// to an entry in the type table, and filter action refers to an entry in the
// type index table to encode a set of types to filter.
//
// Call site table follows LSDA header. Action table immediately follows the
// call site table.
//
// Both types table and type index table start at the same location, but they
// grow in opposite directions (types go up, indices go down). The beginning of
// these tables is encoded in LSDA header. Sizes for both of the tables are not
// included anywhere.
//
// For the purpose of rewriting exception handling tables, we can reuse action
// table, types table, and type index table in a binary format when type
// references are hard-coded absolute addresses. We still have to parse all the
// table to determine their size. We have to parse call site table and associate
// discovered information with actual call instructions and landing pad blocks.
void readLSDA(ArrayRef<uint8_t> LSDAData, BinaryContext &BC) {
  const uint8_t *Ptr = LSDAData.data();

  while (Ptr < LSDAData.data() + LSDAData.size()) {
    uint8_t LPStartEncoding = *Ptr++;
    // Some of LSDAs are aligned while other are not. We use the hack below
    // to work around 0-filled alignment. However it could also mean 
    // DW_EH_PE_absptr format.
    //
    // FIXME: the proper way to parse these tables is to get the pointer
    //        from .eh_frame and parse one entry at a time.
    while (!LPStartEncoding)
      LPStartEncoding = *Ptr++;
    if (opts::PrintExceptions) {
      errs() << "[LSDA at 0x"
             << Twine::utohexstr(reinterpret_cast<uint64_t>(Ptr-1)) << "]:\n";
    }

    ++NumLSDAs;
    bool IsTrivial = true;

    uintptr_t LPStart = 0;
    if (LPStartEncoding != DW_EH_PE_omit) {
      LPStart = readEncodedPointer(Ptr, LPStartEncoding);
    }

    uint8_t TTypeEncoding = *Ptr++;
    uintptr_t TTypeEnd = 0;
    if (TTypeEncoding != DW_EH_PE_omit) {
      TTypeEnd = readULEB128(Ptr);
    }

    if (opts::PrintExceptions) {
      errs() << "LPStart Encoding = " << (unsigned)LPStartEncoding << '\n';
      errs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
      errs() << "TType Encoding = " << (unsigned)TTypeEncoding << '\n';
      errs() << "TType End = " << TTypeEnd << '\n';
    }

    // Table to store list of indices in type table. Entries are uleb128s values.
    auto TypeIndexTableStart = Ptr + TTypeEnd;

    // Offset past the last decoded index.
    intptr_t MaxTypeIndexTableOffset = 0;

    // The actual type info table starts at the same location, but grows in
    // different direction. Encoding is different too (TTypeEncoding).
    auto TypeTableStart = reinterpret_cast<const uint32_t *>(Ptr + TTypeEnd);

    uint8_t       CallSiteEncoding = *Ptr++;
    uint32_t      CallSiteTableLength = readULEB128(Ptr);
    const uint8_t *CallSiteTableStart = Ptr;
    const uint8_t *CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
    const uint8_t *CallSitePtr = CallSiteTableStart;
    const uint8_t *ActionTableStart = CallSiteTableEnd;

    if (opts::PrintExceptions) {
      errs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
      errs() << "CallSite table length = " << CallSiteTableLength << '\n';
      errs() << '\n';
    }

    unsigned NumCallSites = 0;
    while (CallSitePtr < CallSiteTableEnd) {
      ++NumCallSites;
      uintptr_t Start = readEncodedPointer(CallSitePtr, CallSiteEncoding);
      uintptr_t Length = readEncodedPointer(CallSitePtr, CallSiteEncoding);
      uintptr_t LandingPad = readEncodedPointer(CallSitePtr, CallSiteEncoding);

      uintptr_t ActionEntry = readULEB128(CallSitePtr);
      uint64_t RangeBase = 0;
      if (opts::PrintExceptions) {
        errs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
               << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
               << "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad)
               << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
      }
      if (ActionEntry != 0) {
        auto printType = [&] (int Index, raw_ostream &OS) {
          assert(Index > 0 && "only positive indices are valid");
          assert(TTypeEncoding == DW_EH_PE_udata4 &&
                 "only udata4 supported for TTypeEncoding");
          auto TypeAddress = *(TypeTableStart - Index);
          if (TypeAddress == 0) {
            OS << "<all>";
            return;
          }
          auto NI = BC.GlobalAddresses.find(TypeAddress);
          if (NI != BC.GlobalAddresses.end()) {
            OS << NI->second;
          } else {
            OS << "0x" << Twine::utohexstr(TypeAddress);
          }
        };
        if (opts::PrintExceptions)
          errs() << "    actions: ";
        const uint8_t *ActionPtr = ActionTableStart + ActionEntry - 1;
        long long ActionType;
        long long ActionNext;
        auto Sep = "";
        do {
          ActionType = readSLEB128(ActionPtr);
          auto Self = ActionPtr;
          ActionNext = readSLEB128(ActionPtr);
          if (opts::PrintExceptions)
            errs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
          if (ActionType == 0) {
            if (opts::PrintExceptions)
              errs() << "cleanup";
          } else if (ActionType > 0) {
            // It's an index into a type table.
            if (opts::PrintExceptions) {
              errs() << "catch type ";
              printType(ActionType, errs());
            }
          } else { // ActionType < 0
            if (opts::PrintExceptions)
              errs() << "filter exception types ";
            auto TSep = "";
            // ActionType is a negative byte offset into uleb128-encoded table
            // of indices with base 1.
            // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
            // encoded using uleb128 so we cannot directly dereference them.
            auto TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
            while (auto Index = readULEB128(TypeIndexTablePtr)) {
              if (opts::PrintExceptions) {
                errs() << TSep;
                printType(Index, errs());
                TSep = ", ";
              }
            }
            MaxTypeIndexTableOffset =
                std::max(MaxTypeIndexTableOffset,
                         TypeIndexTablePtr - TypeIndexTableStart);
          }

          Sep = "; ";

          ActionPtr = Self + ActionNext;
        } while (ActionNext);
        if (opts::PrintExceptions)
          errs() << '\n';
      }

      if (LandingPad != 0 || ActionEntry != 0)
        IsTrivial = false;
    }
    Ptr = CallSiteTableEnd;

    if (NumCallSites > 1)
      IsTrivial = false;

    if (IsTrivial)
      ++NumTrivialLSDAs;

    if (opts::PrintExceptions)
      errs() << '\n';

    if (CallSiteTableLength == 0 || TTypeEnd == 0)
      continue;

    Ptr = TypeIndexTableStart + MaxTypeIndexTableOffset;
  }
}

void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
                               uint64_t LSDASectionAddress) {
  assert(CurrentState == State::Disassembled && "unexpecrted function state");

  if (!getLSDAAddress())
    return;

  assert(getLSDAAddress() < LSDASectionAddress + LSDASectionData.size() &&
         "wrong LSDA address");

  const uint8_t *Ptr =
      LSDASectionData.data() + getLSDAAddress() - LSDASectionAddress;

  uint8_t LPStartEncoding = *Ptr++;
  uintptr_t LPStart = 0;
  if (LPStartEncoding != DW_EH_PE_omit) {
    LPStart = readEncodedPointer(Ptr, LPStartEncoding);
  }

  assert(LPStart == 0 && "support for split functions not implemented");

  uint8_t TTypeEncoding = *Ptr++;
  uintptr_t TTypeEnd = 0;
  if (TTypeEncoding != DW_EH_PE_omit) {
    TTypeEnd = readULEB128(Ptr);
  }

  if (opts::PrintExceptions) {
    errs() << "LPStart Encoding = " << (unsigned)LPStartEncoding << '\n';
    errs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
    errs() << "TType Encoding = " << (unsigned)TTypeEncoding << '\n';
    errs() << "TType End = " << TTypeEnd << '\n';
  }

  // Table to store list of indices in type table. Entries are uleb128s values.
  auto TypeIndexTableStart = Ptr + TTypeEnd;

  // Offset past the last decoded index.
  intptr_t MaxTypeIndexTableOffset = 0;

  // The actual type info table starts at the same location, but grows in
  // different direction. Encoding is different too (TTypeEncoding).
  auto TypeTableStart = reinterpret_cast<const uint32_t *>(Ptr + TTypeEnd);

  uint8_t       CallSiteEncoding = *Ptr++;
  uint32_t      CallSiteTableLength = readULEB128(Ptr);
  const uint8_t *CallSiteTableStart = Ptr;
  const uint8_t *CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
  const uint8_t *CallSitePtr = CallSiteTableStart;
  const uint8_t *ActionTableStart = CallSiteTableEnd;

  if (opts::PrintExceptions) {
    errs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
    errs() << "CallSite table length = " << CallSiteTableLength << '\n';
    errs() << '\n';
  }

  unsigned NumCallSites = 0;
  while (CallSitePtr < CallSiteTableEnd) {
    ++NumCallSites;
    uintptr_t Start = readEncodedPointer(CallSitePtr, CallSiteEncoding);
    uintptr_t Length = readEncodedPointer(CallSitePtr, CallSiteEncoding);
    uintptr_t LandingPad = readEncodedPointer(CallSitePtr, CallSiteEncoding);

    uintptr_t ActionEntry = readULEB128(CallSitePtr);
    uint64_t RangeBase = getAddress();
    if (opts::PrintExceptions) {
      errs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
             << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
             << "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad)
             << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
    }

    // Create a handler entry if necessary.
    MCSymbol *LPSymbol{nullptr};
    if (LandingPad) {
      auto Label = Labels.find(LandingPad);
      if (Label != Labels.end()) {
        LPSymbol = Label->second;
      } else {
        LPSymbol = BC.Ctx->createTempSymbol("LP");
        Labels[LandingPad] = LPSymbol;
      }
      LandingPads.insert(LPSymbol);
    }

    // Mark all call instructions in the range.
    auto II = Instructions.find(Start);
    assert(II != Instructions.end() &&
           "exception range not pointing to instruction");
    do {
      auto &Instruction = II->second;
      if (BC.MIA->isCall(Instruction)) {
        if (LPSymbol) {
          Instruction.addOperand(MCOperand::createExpr(
              MCSymbolRefExpr::create(LPSymbol,
                                      MCSymbolRefExpr::VK_None,
                                      *BC.Ctx)));
        } else {
          Instruction.addOperand(MCOperand::createImm(0));
        }
        Instruction.addOperand(MCOperand::createImm(ActionEntry));
      }
      ++II;
    } while (II->first < Start + Length);

    if (ActionEntry != 0) {
      auto printType = [&] (int Index, raw_ostream &OS) {
        assert(Index > 0 && "only positive indices are valid");
        assert(TTypeEncoding == DW_EH_PE_udata4 &&
               "only udata4 supported for TTypeEncoding");
        auto TypeAddress = *(TypeTableStart - Index);
        if (TypeAddress == 0) {
          OS << "<all>";
          return;
        }
        auto NI = BC.GlobalAddresses.find(TypeAddress);
        if (NI != BC.GlobalAddresses.end()) {
          OS << NI->second;
        } else {
          OS << "0x" << Twine::utohexstr(TypeAddress);
        }
      };
      if (opts::PrintExceptions)
        errs() << "    actions: ";
      const uint8_t *ActionPtr = ActionTableStart + ActionEntry - 1;
      long long ActionType;
      long long ActionNext;
      auto Sep = "";
      do {
        ActionType = readSLEB128(ActionPtr);
        auto Self = ActionPtr;
        ActionNext = readSLEB128(ActionPtr);
        if (opts::PrintExceptions)
          errs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
        if (ActionType == 0) {
          if (opts::PrintExceptions)
            errs() << "cleanup";
        } else if (ActionType > 0) {
          // It's an index into a type table.
          if (opts::PrintExceptions) {
            errs() << "catch type ";
            printType(ActionType, errs());
          }
        } else { // ActionType < 0
          if (opts::PrintExceptions)
            errs() << "filter exception types ";
          auto TSep = "";
          // ActionType is a negative byte offset into uleb128-encoded table
          // of indices with base 1.
          // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
          // encoded using uleb128 so we cannot directly dereference them.
          auto TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
          while (auto Index = readULEB128(TypeIndexTablePtr)) {
            if (opts::PrintExceptions) {
              errs() << TSep;
              printType(Index, errs());
              TSep = ", ";
            }
          }
          MaxTypeIndexTableOffset =
              std::max(MaxTypeIndexTableOffset,
                       TypeIndexTablePtr - TypeIndexTableStart);
        }

        Sep = "; ";

        ActionPtr = Self + ActionNext;
      } while (ActionNext);
      if (opts::PrintExceptions)
        errs() << '\n';
    }
  }
  if (opts::PrintExceptions)
    errs() << '\n';
}

const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f;

void CFIReader::fillCFIInfoFor(BinaryFunction &Function) const {
  uint64_t Address = Function.getAddress();
  auto I = FDEs.find(Address);
  if (I == FDEs.end())
    return;

  const FDE &CurFDE = *I->second;
  if (Function.getSize() != CurFDE.getAddressRange()) {
    errs() << "FLO-WARNING: CFI information size mismatch for function \""
           << Function.getName() << "\""
           << format(": Function size is %dB, CFI covers "
                     "%dB\n",
                     Function.getSize(), CurFDE.getAddressRange());
  }

  Function.setLSDAAddress(CurFDE.getLSDAAddress());

  uint64_t Offset = 0;
  uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
  uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
  for (const FrameEntry::Instruction &Instr : CurFDE) {
    uint8_t Opcode = Instr.Opcode;
    if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
      Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
    switch (Instr.Opcode) {
    case DW_CFA_nop:
      break;
    case DW_CFA_advance_loc4:
    case DW_CFA_advance_loc2:
    case DW_CFA_advance_loc1:
    case DW_CFA_advance_loc:
      // Advance our current address
      Offset += CodeAlignment * int64_t(Instr.Ops[0]);
      break;
    case DW_CFA_offset_extended_sf:
      Function.addCFIInstruction(
          Offset,
          MCCFIInstruction::createOffset(
              nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
      break;
    case DW_CFA_offset_extended:
    case DW_CFA_offset:
      Function.addCFIInstruction(
          Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
                                                 DataAlignment * Instr.Ops[1]));
      break;
    case DW_CFA_restore_extended:
    case DW_CFA_restore:
      Function.addCFIInstruction(
          Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
      break;
    case DW_CFA_set_loc:
      assert(Instr.Ops[0] < Address && "set_loc out of function bounds");
      assert(Instr.Ops[0] > Address + Function.getSize() &&
             "set_loc out of function bounds");
      Offset = Instr.Ops[0] - Address;
      break;

    case DW_CFA_undefined:
      Function.addCFIInstruction(
          Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
      break;
    case DW_CFA_same_value:
      Function.addCFIInstruction(
          Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
      break;
    case DW_CFA_register:
      Function.addCFIInstruction(
          Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
                                                   Instr.Ops[1]));
      break;
    case DW_CFA_remember_state:
      Function.addCFIInstruction(
          Offset, MCCFIInstruction::createRememberState(nullptr));
      break;
    case DW_CFA_restore_state:
      Function.addCFIInstruction(Offset,
                                 MCCFIInstruction::createRestoreState(nullptr));
      break;
    case DW_CFA_def_cfa:
      Function.addCFIInstruction(
          Offset,
          MCCFIInstruction::createDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
      break;
    case DW_CFA_def_cfa_sf:
      Function.addCFIInstruction(
          Offset,
          MCCFIInstruction::createDefCfa(
              nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
      break;
    case DW_CFA_def_cfa_register:
      Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
                                             nullptr, Instr.Ops[0]));
      break;
    case DW_CFA_def_cfa_offset:
      Function.addCFIInstruction(
          Offset, MCCFIInstruction::createDefCfaOffset(nullptr, Instr.Ops[0]));
      break;
    case DW_CFA_def_cfa_offset_sf:
      Function.addCFIInstruction(
          Offset, MCCFIInstruction::createDefCfaOffset(
                      nullptr, DataAlignment * int64_t(Instr.Ops[0])));
      break;
    case DW_CFA_val_offset_sf:
    case DW_CFA_val_offset:
      llvm_unreachable("DWARF val_offset() unimplemented");
      break;
    case DW_CFA_expression:
    case DW_CFA_def_cfa_expression:
    case DW_CFA_val_expression:
      llvm_unreachable("DWARF CFA expressions unimplemented");
      break;
      dbgs() << "DW_CFA_val_expression";
      break;
    case DW_CFA_MIPS_advance_loc8:
      llvm_unreachable("DW_CFA_MIPS_advance_loc unimplemented");
      break;
    case DW_CFA_GNU_args_size:
    case DW_CFA_GNU_window_save:
    case DW_CFA_lo_user:
    case DW_CFA_hi_user:
      llvm_unreachable("DW_CFA_GNU_* and DW_CFA_*_use unimplemented");
      break;
    default:
      llvm_unreachable("Unrecognized CFI instruction");
    }
  }
}

} // namespace flo
} // namespace llvm