2015-11-02 11:50:53 -07:00
|
|
|
//===-- Exceptions.cpp - Helpers for processing C++ exceptions ------------===//
|
|
|
|
|
//
|
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
|
//
|
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
//
|
|
|
|
|
// Some of the code is taken from examples/ExceptionDemo
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2015-11-03 14:26:33 -08:00
|
|
|
#include "Exceptions.h"
|
2015-11-04 16:48:47 -08:00
|
|
|
#include "BinaryFunction.h"
|
2016-09-27 19:09:38 -07:00
|
|
|
#include "RewriteInstance.h"
|
2015-11-02 11:50:53 -07:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
|
#include "llvm/ADT/STLExtras.h"
|
|
|
|
|
#include "llvm/ADT/Statistic.h"
|
|
|
|
|
#include "llvm/ADT/StringExtras.h"
|
|
|
|
|
#include "llvm/ADT/Twine.h"
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
#include "llvm/BinaryFormat/Dwarf.h"
|
|
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
|
2015-12-18 17:00:46 -08:00
|
|
|
#include "llvm/MC/MCStreamer.h"
|
2015-11-02 11:50:53 -07:00
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
|
#include "llvm/Support/Debug.h"
|
2015-12-18 17:00:46 -08:00
|
|
|
#include "llvm/Support/LEB128.h"
|
2015-11-02 11:50:53 -07:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2017-09-20 13:32:46 -07:00
|
|
|
#include <functional>
|
2016-09-27 19:09:38 -07:00
|
|
|
#include <map>
|
2015-11-02 11:50:53 -07:00
|
|
|
|
|
|
|
|
#undef DEBUG_TYPE
|
2016-02-05 14:42:04 -08:00
|
|
|
#define DEBUG_TYPE "bolt-exceptions"
|
2015-11-02 11:50:53 -07:00
|
|
|
|
|
|
|
|
using namespace llvm::dwarf;
|
|
|
|
|
|
|
|
|
|
namespace opts {
|
|
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
extern llvm::cl::OptionCategory BoltCategory;
|
|
|
|
|
|
2016-09-02 14:15:29 -07:00
|
|
|
extern llvm::cl::opt<unsigned> Verbosity;
|
|
|
|
|
|
|
|
|
|
static llvm::cl::opt<bool>
|
2015-11-02 11:50:53 -07:00
|
|
|
PrintExceptions("print-exceptions",
|
2017-03-28 14:40:20 -07:00
|
|
|
llvm::cl::desc("print exception handling data"),
|
|
|
|
|
llvm::cl::ZeroOrMore,
|
|
|
|
|
llvm::cl::Hidden,
|
|
|
|
|
llvm::cl::cat(BoltCategory));
|
2015-11-02 11:50:53 -07:00
|
|
|
|
|
|
|
|
} // namespace opts
|
|
|
|
|
|
2016-09-02 14:15:29 -07:00
|
|
|
namespace llvm {
|
|
|
|
|
namespace bolt {
|
|
|
|
|
|
2017-08-27 17:04:06 -07:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
unsigned getEncodingSize(unsigned Encoding, BinaryContext &BC) {
|
|
|
|
|
switch (Encoding & 0x0f) {
|
|
|
|
|
default: llvm_unreachable("unknown encoding");
|
|
|
|
|
case dwarf::DW_EH_PE_absptr:
|
|
|
|
|
case dwarf::DW_EH_PE_signed:
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
return BC.AsmInfo->getCodePointerSize();
|
2017-08-27 17:04:06 -07:00
|
|
|
case dwarf::DW_EH_PE_udata2:
|
|
|
|
|
case dwarf::DW_EH_PE_sdata2:
|
|
|
|
|
return 2;
|
|
|
|
|
case dwarf::DW_EH_PE_udata4:
|
|
|
|
|
case dwarf::DW_EH_PE_sdata4:
|
|
|
|
|
return 4;
|
|
|
|
|
case dwarf::DW_EH_PE_udata8:
|
|
|
|
|
case dwarf::DW_EH_PE_sdata8:
|
|
|
|
|
return 8;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
2015-12-17 12:59:15 -08:00
|
|
|
// Read and dump the .gcc_exception_table section entry.
|
2015-11-03 14:26:33 -08:00
|
|
|
//
|
2015-12-17 12:59:15 -08:00
|
|
|
// .gcc_except_table section contains a set of Language-Specific Data Areas -
|
|
|
|
|
// a fancy name for exception handling tables. There's one LSDA entry per
|
|
|
|
|
// function. However, we can't actually tell which function LSDA refers to
|
|
|
|
|
// unless we parse .eh_frame entry that refers to the LSDA.
|
|
|
|
|
// Then inside LSDA most addresses are encoded relative to the function start,
|
|
|
|
|
// so we need the function context in order to get to real addresses.
|
2015-11-03 14:26:33 -08:00
|
|
|
//
|
2015-12-17 12:59:15 -08:00
|
|
|
// The best visual representation of the tables comprising LSDA and
|
|
|
|
|
// relationships between them is illustrated at:
|
2017-08-01 11:19:01 -07:00
|
|
|
// https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
|
2015-11-03 14:26:33 -08:00
|
|
|
// Keep in mind that GCC implementation deviates slightly from that document.
|
|
|
|
|
//
|
|
|
|
|
// To summarize, there are 4 tables in LSDA: call site table, actions table,
|
2015-12-17 12:59:15 -08:00
|
|
|
// types table, and types index table (for indirection). The main table contains
|
|
|
|
|
// call site entries. Each call site includes a PC range that can throw an
|
|
|
|
|
// exception, a handler (landing pad), and a reference to an entry in the action
|
|
|
|
|
// table. The handler and/or action could be 0. The action entry is a head
|
|
|
|
|
// of a list of actions associated with a call site. The action table contains
|
|
|
|
|
// all such lists (it could be optimized to share list tails). Each action could
|
|
|
|
|
// be either to catch an exception of a given type, to perform a cleanup, or to
|
|
|
|
|
// propagate the exception after filtering it out (e.g. to make sure function
|
2015-11-03 14:26:33 -08:00
|
|
|
// exception specification is not violated). Catch action contains a reference
|
|
|
|
|
// to an entry in the type table, and filter action refers to an entry in the
|
|
|
|
|
// type index table to encode a set of types to filter.
|
|
|
|
|
//
|
|
|
|
|
// Call site table follows LSDA header. Action table immediately follows the
|
|
|
|
|
// call site table.
|
|
|
|
|
//
|
|
|
|
|
// Both types table and type index table start at the same location, but they
|
|
|
|
|
// grow in opposite directions (types go up, indices go down). The beginning of
|
|
|
|
|
// these tables is encoded in LSDA header. Sizes for both of the tables are not
|
|
|
|
|
// included anywhere.
|
|
|
|
|
//
|
2017-08-27 17:04:06 -07:00
|
|
|
// We have to parse all of the tables to determine their sizes. Then we have
|
2015-12-17 12:59:15 -08:00
|
|
|
// to parse the call site table and associate discovered information with
|
|
|
|
|
// actual call instructions and landing pad blocks.
|
|
|
|
|
//
|
2017-08-27 17:04:06 -07:00
|
|
|
// For the purpose of rewriting exception handling tables, we can reuse action,
|
|
|
|
|
// and type index tables in their original binary format.
|
|
|
|
|
//
|
|
|
|
|
// Type table could be encoded using position-independent references, and thus
|
|
|
|
|
// may require relocation.
|
|
|
|
|
//
|
2015-12-17 12:59:15 -08:00
|
|
|
// Ideally we should be able to re-write LSDA in-place, without the need to
|
|
|
|
|
// allocate a new space for it. Sadly there's no guarantee that the new call
|
|
|
|
|
// site table will be the same size as GCC uses uleb encodings for PC offsets.
|
|
|
|
|
//
|
|
|
|
|
// Note: some functions have LSDA entries with 0 call site entries.
|
2015-11-12 18:56:58 -08:00
|
|
|
void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
|
|
|
|
|
uint64_t LSDASectionAddress) {
|
2015-12-15 17:06:27 -08:00
|
|
|
assert(CurrentState == State::Disassembled && "unexpected function state");
|
2015-11-12 18:56:58 -08:00
|
|
|
|
|
|
|
|
if (!getLSDAAddress())
|
|
|
|
|
return;
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
DWARFDataExtractor Data(
|
|
|
|
|
StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
|
|
|
|
|
LSDASectionData.size()),
|
|
|
|
|
BC.DwCtx->getDWARFObj().isLittleEndian(), 8);
|
|
|
|
|
uint32_t Offset = getLSDAAddress() - LSDASectionAddress;
|
|
|
|
|
assert(Data.isValidOffset(Offset) && "wrong LSDA address");
|
2015-11-12 18:56:58 -08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
uint8_t LPStartEncoding = Data.getU8(&Offset);
|
2018-05-04 10:10:41 -07:00
|
|
|
uint64_t LPStart = 0;
|
|
|
|
|
if (auto MaybeLPStart = Data.getEncodedPointer(&Offset, LPStartEncoding,
|
|
|
|
|
Offset + LSDASectionAddress))
|
|
|
|
|
LPStart = *MaybeLPStart;
|
2015-11-12 18:56:58 -08:00
|
|
|
|
|
|
|
|
assert(LPStart == 0 && "support for split functions not implemented");
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const auto TTypeEncoding = Data.getU8(&Offset);
|
2017-08-27 17:04:06 -07:00
|
|
|
size_t TTypeEncodingSize = 0;
|
2015-11-12 18:56:58 -08:00
|
|
|
uintptr_t TTypeEnd = 0;
|
|
|
|
|
if (TTypeEncoding != DW_EH_PE_omit) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
TTypeEnd = Data.getULEB128(&Offset);
|
2017-08-27 17:04:06 -07:00
|
|
|
TTypeEncodingSize = getEncodingSize(TTypeEncoding, BC);
|
2015-11-12 18:56:58 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (opts::PrintExceptions) {
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
|
2016-08-07 12:35:23 -07:00
|
|
|
<< " for function " << *this << "]:\n";
|
2017-08-27 17:04:06 -07:00
|
|
|
outs() << "LPStart Encoding = 0x"
|
|
|
|
|
<< Twine::utohexstr(LPStartEncoding) << '\n';
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
|
2017-08-27 17:04:06 -07:00
|
|
|
outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n';
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "TType End = " << TTypeEnd << '\n';
|
2015-11-12 18:56:58 -08:00
|
|
|
}
|
|
|
|
|
|
2015-12-15 17:06:27 -08:00
|
|
|
// Table to store list of indices in type table. Entries are uleb128 values.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const uint32_t TypeIndexTableStart = Offset + TTypeEnd;
|
2015-11-12 18:56:58 -08:00
|
|
|
|
|
|
|
|
// Offset past the last decoded index.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
uint32_t MaxTypeIndexTableOffset = 0;
|
2015-11-12 18:56:58 -08:00
|
|
|
|
2017-08-27 17:04:06 -07:00
|
|
|
// Max positive index used in type table.
|
|
|
|
|
unsigned MaxTypeIndex = 0;
|
|
|
|
|
|
2015-11-12 18:56:58 -08:00
|
|
|
// The actual type info table starts at the same location, but grows in
|
2017-08-01 11:19:01 -07:00
|
|
|
// opposite direction. TTypeEncoding is used to encode stored values.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const auto TypeTableStart = Offset + TTypeEnd;
|
2015-11-12 18:56:58 -08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
uint8_t CallSiteEncoding = Data.getU8(&Offset);
|
|
|
|
|
uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
|
|
|
|
|
auto CallSiteTableStart = Offset;
|
|
|
|
|
auto CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
|
|
|
|
|
auto CallSitePtr = CallSiteTableStart;
|
|
|
|
|
auto ActionTableStart = CallSiteTableEnd;
|
2015-11-12 18:56:58 -08:00
|
|
|
|
|
|
|
|
if (opts::PrintExceptions) {
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
|
|
|
|
|
outs() << "CallSite table length = " << CallSiteTableLength << '\n';
|
|
|
|
|
outs() << '\n';
|
2015-11-12 18:56:58 -08:00
|
|
|
}
|
|
|
|
|
|
2016-01-22 16:45:39 -08:00
|
|
|
HasEHRanges = CallSitePtr < CallSiteTableEnd;
|
2015-12-17 12:59:15 -08:00
|
|
|
uint64_t RangeBase = getAddress();
|
2015-11-12 18:56:58 -08:00
|
|
|
while (CallSitePtr < CallSiteTableEnd) {
|
2018-05-04 10:10:41 -07:00
|
|
|
uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
CallSitePtr + LSDASectionAddress);
|
2018-05-04 10:10:41 -07:00
|
|
|
uint64_t Length = *Data.getEncodedPointer(
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
&CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
|
2018-05-04 10:10:41 -07:00
|
|
|
uint64_t LandingPad = *Data.getEncodedPointer(
|
|
|
|
|
&CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
|
|
|
|
|
uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
|
2015-12-17 12:59:15 -08:00
|
|
|
|
2015-11-12 18:56:58 -08:00
|
|
|
if (opts::PrintExceptions) {
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
|
2015-11-12 18:56:58 -08:00
|
|
|
<< ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
|
|
|
|
|
<< "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad)
|
|
|
|
|
<< "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
|
2016-09-27 19:09:38 -07:00
|
|
|
outs() << " current offset is " << (CallSitePtr - CallSiteTableStart)
|
|
|
|
|
<< '\n';
|
2015-11-12 18:56:58 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create a handler entry if necessary.
|
|
|
|
|
MCSymbol *LPSymbol{nullptr};
|
|
|
|
|
if (LandingPad) {
|
2017-11-04 19:22:05 -07:00
|
|
|
if (Instructions.find(LandingPad) == Instructions.end()) {
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad)
|
|
|
|
|
<< " not pointing to an instruction in function "
|
|
|
|
|
<< *this << " - ignoring.\n";
|
|
|
|
|
}
|
2015-11-12 18:56:58 -08:00
|
|
|
} else {
|
2015-12-18 17:00:46 -08:00
|
|
|
auto Label = Labels.find(LandingPad);
|
|
|
|
|
if (Label != Labels.end()) {
|
|
|
|
|
LPSymbol = Label->second;
|
|
|
|
|
} else {
|
|
|
|
|
LPSymbol = BC.Ctx->createTempSymbol("LP", true);
|
|
|
|
|
Labels[LandingPad] = LPSymbol;
|
|
|
|
|
}
|
2015-11-12 18:56:58 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Mark all call instructions in the range.
|
2017-11-04 19:22:05 -07:00
|
|
|
auto II = Instructions.find(Start);
|
|
|
|
|
auto IE = Instructions.end();
|
2015-11-17 11:02:04 -08:00
|
|
|
assert(II != IE && "exception range not pointing to an instruction");
|
2015-11-12 18:56:58 -08:00
|
|
|
do {
|
2017-11-04 19:22:05 -07:00
|
|
|
auto &Instruction = II->second;
|
2018-03-09 09:45:13 -08:00
|
|
|
if (BC.MIB->isCall(Instruction) &&
|
|
|
|
|
!BC.MIB->getConditionalTailCall(Instruction)) {
|
|
|
|
|
assert(!BC.MIB->isInvoke(Instruction) &&
|
2015-12-17 12:59:15 -08:00
|
|
|
"overlapping exception ranges detected");
|
|
|
|
|
// Add extra operands to a call instruction making it an invoke from
|
|
|
|
|
// now on.
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->addEHInfo(Instruction,
|
[BOLT][Refactoring] Isolate changes to MC layer
Summary:
Changes that we made to MCInst, MCOperand, MCExpr, etc. are now all
moved into tools/llvm-bolt. That required a change to the way we handle
annotations and any extra operands for MCInst.
Any MCPlus information is now attached via an extra operand of type
MCInst with an opcode ANNOTATION_LABEL. Since this operand is MCInst, we
attach extra info as operands to this instruction. For first-level
annotations use functions to access the information, such as
getConditionalTailCall() or getEHInfo(), etc. For the rest, optional or
second-class annotations, use a general named-annotation interface such
as getAnnotationAs<uint64_t>(Inst, "Count").
I did a test on HHVM binary, and a memory consumption went down a little
bit while the runtime remained the same.
(cherry picked from FBD7405412)
2018-03-19 18:32:12 -07:00
|
|
|
MCPlus::MCLandingPad(LPSymbol, ActionEntry));
|
2015-11-12 18:56:58 -08:00
|
|
|
}
|
|
|
|
|
++II;
|
2015-11-17 11:02:04 -08:00
|
|
|
} while (II != IE && II->first < Start + Length);
|
2015-11-12 18:56:58 -08:00
|
|
|
|
|
|
|
|
if (ActionEntry != 0) {
|
|
|
|
|
auto printType = [&] (int Index, raw_ostream &OS) {
|
|
|
|
|
assert(Index > 0 && "only positive indices are valid");
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
uint32_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
|
|
|
|
|
const auto TTEntryAddress = TTEntry + LSDASectionAddress;
|
2018-05-04 10:10:41 -07:00
|
|
|
uint32_t TypeAddress =
|
|
|
|
|
*Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
|
2017-08-27 17:04:06 -07:00
|
|
|
if ((TTypeEncoding & DW_EH_PE_pcrel) &&
|
|
|
|
|
(TypeAddress == TTEntryAddress)) {
|
|
|
|
|
TypeAddress = 0;
|
|
|
|
|
}
|
2015-11-12 18:56:58 -08:00
|
|
|
if (TypeAddress == 0) {
|
|
|
|
|
OS << "<all>";
|
|
|
|
|
return;
|
|
|
|
|
}
|
2017-08-27 17:04:06 -07:00
|
|
|
if (TTypeEncoding & DW_EH_PE_indirect) {
|
2019-04-09 12:29:40 -07:00
|
|
|
auto PointerOrErr = BC.getPointerAtAddress(TypeAddress);
|
2017-08-27 17:04:06 -07:00
|
|
|
assert(PointerOrErr && "failed to decode indirect address");
|
|
|
|
|
TypeAddress = *PointerOrErr;
|
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
if (auto *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress)) {
|
|
|
|
|
OS << TypeSymBD->getName();
|
2015-11-12 18:56:58 -08:00
|
|
|
} else {
|
|
|
|
|
OS << "0x" << Twine::utohexstr(TypeAddress);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
if (opts::PrintExceptions)
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << " actions: ";
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
uint32_t ActionPtr = ActionTableStart + ActionEntry - 1;
|
2015-11-12 18:56:58 -08:00
|
|
|
long long ActionType;
|
|
|
|
|
long long ActionNext;
|
|
|
|
|
auto Sep = "";
|
|
|
|
|
do {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
ActionType = Data.getSLEB128(&ActionPtr);
|
2015-11-12 18:56:58 -08:00
|
|
|
auto Self = ActionPtr;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
ActionNext = Data.getSLEB128(&ActionPtr);
|
2015-11-12 18:56:58 -08:00
|
|
|
if (opts::PrintExceptions)
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
|
2015-11-12 18:56:58 -08:00
|
|
|
if (ActionType == 0) {
|
|
|
|
|
if (opts::PrintExceptions)
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "cleanup";
|
2015-11-12 18:56:58 -08:00
|
|
|
} else if (ActionType > 0) {
|
|
|
|
|
// It's an index into a type table.
|
2017-08-27 17:04:06 -07:00
|
|
|
MaxTypeIndex = std::max(MaxTypeIndex,
|
|
|
|
|
static_cast<unsigned>(ActionType));
|
2015-11-12 18:56:58 -08:00
|
|
|
if (opts::PrintExceptions) {
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "catch type ";
|
|
|
|
|
printType(ActionType, outs());
|
2015-11-12 18:56:58 -08:00
|
|
|
}
|
|
|
|
|
} else { // ActionType < 0
|
|
|
|
|
if (opts::PrintExceptions)
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "filter exception types ";
|
2015-11-12 18:56:58 -08:00
|
|
|
auto TSep = "";
|
2015-12-18 17:00:46 -08:00
|
|
|
// ActionType is a negative *byte* offset into *uleb128-encoded* table
|
2015-11-12 18:56:58 -08:00
|
|
|
// of indices with base 1.
|
|
|
|
|
// E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
|
2015-12-17 12:59:15 -08:00
|
|
|
// encoded using uleb128 thus we cannot directly dereference them.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
uint32_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
|
|
|
|
|
while (auto Index = Data.getULEB128(&TypeIndexTablePtr)) {
|
2017-08-27 17:04:06 -07:00
|
|
|
MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
|
2015-11-12 18:56:58 -08:00
|
|
|
if (opts::PrintExceptions) {
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << TSep;
|
|
|
|
|
printType(Index, outs());
|
2015-11-12 18:56:58 -08:00
|
|
|
TSep = ", ";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
MaxTypeIndexTableOffset =
|
|
|
|
|
std::max(MaxTypeIndexTableOffset,
|
|
|
|
|
TypeIndexTablePtr - TypeIndexTableStart);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Sep = "; ";
|
|
|
|
|
|
|
|
|
|
ActionPtr = Self + ActionNext;
|
|
|
|
|
} while (ActionNext);
|
|
|
|
|
if (opts::PrintExceptions)
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << '\n';
|
2015-11-12 18:56:58 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (opts::PrintExceptions)
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << '\n';
|
2015-12-17 12:59:15 -08:00
|
|
|
|
2015-12-18 17:00:46 -08:00
|
|
|
assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Data.getData().size() &&
|
2015-12-17 12:59:15 -08:00
|
|
|
"LSDA entry has crossed section boundary");
|
|
|
|
|
|
2015-12-18 17:00:46 -08:00
|
|
|
if (TTypeEnd) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
LSDAActionTable = LSDASectionData.slice(
|
|
|
|
|
ActionTableStart, TypeIndexTableStart -
|
|
|
|
|
MaxTypeIndex * TTypeEncodingSize -
|
|
|
|
|
ActionTableStart);
|
2017-08-27 17:04:06 -07:00
|
|
|
for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
uint32_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
|
|
|
|
|
const auto TTEntryAddress = TTEntry + LSDASectionAddress;
|
2018-05-04 10:10:41 -07:00
|
|
|
uint64_t TypeAddress =
|
|
|
|
|
*Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress)) {
|
2017-08-27 17:04:06 -07:00
|
|
|
TypeAddress = 0;
|
|
|
|
|
}
|
2019-04-09 12:29:40 -07:00
|
|
|
if (TypeAddress && (TTypeEncoding & DW_EH_PE_indirect)) {
|
|
|
|
|
auto PointerOrErr = BC.getPointerAtAddress(TypeAddress);
|
2017-08-27 17:04:06 -07:00
|
|
|
assert(PointerOrErr && "failed to decode indirect address");
|
|
|
|
|
TypeAddress = *PointerOrErr;
|
|
|
|
|
}
|
|
|
|
|
LSDATypeTable.emplace_back(TypeAddress);
|
|
|
|
|
}
|
2015-12-18 17:00:46 -08:00
|
|
|
LSDATypeIndexTable =
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
|
2015-12-18 17:00:46 -08:00
|
|
|
}
|
2015-11-12 18:56:58 -08:00
|
|
|
}
|
|
|
|
|
|
2015-11-13 14:18:45 -08:00
|
|
|
void BinaryFunction::updateEHRanges() {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (getSize() == 0)
|
|
|
|
|
return;
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
assert(CurrentState == State::CFG_Finalized && "unexpected state");
|
2015-11-13 14:18:45 -08:00
|
|
|
|
|
|
|
|
// Build call sites table.
|
|
|
|
|
struct EHInfo {
|
|
|
|
|
const MCSymbol *LP; // landing pad
|
|
|
|
|
uint64_t Action;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// If previous call can throw, this is its exception handler.
|
|
|
|
|
EHInfo PreviousEH = {nullptr, 0};
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Marker for the beginning of exceptions range.
|
|
|
|
|
const MCSymbol *StartRange = nullptr;
|
|
|
|
|
|
|
|
|
|
// Indicates whether the start range is located in a cold part.
|
|
|
|
|
bool IsStartInCold = false;
|
|
|
|
|
|
|
|
|
|
// Have we crossed hot/cold border for split functions?
|
|
|
|
|
bool SeenCold = false;
|
|
|
|
|
|
|
|
|
|
// Sites to update - either regular or cold.
|
|
|
|
|
auto *Sites = &CallSites;
|
|
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
for (auto &BB : BasicBlocksLayout) {
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
if (BB->isCold() && !SeenCold) {
|
|
|
|
|
SeenCold = true;
|
|
|
|
|
|
|
|
|
|
// Close the range (if any) and change the target call sites.
|
|
|
|
|
if (StartRange) {
|
|
|
|
|
Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(),
|
|
|
|
|
PreviousEH.LP, PreviousEH.Action});
|
|
|
|
|
}
|
|
|
|
|
Sites = &ColdCallSites;
|
|
|
|
|
|
|
|
|
|
// Reset the range.
|
|
|
|
|
StartRange = nullptr;
|
|
|
|
|
PreviousEH = {nullptr, 0};
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-13 14:18:45 -08:00
|
|
|
for (auto II = BB->begin(); II != BB->end(); ++II) {
|
[BOLT][Refactoring] Isolate changes to MC layer
Summary:
Changes that we made to MCInst, MCOperand, MCExpr, etc. are now all
moved into tools/llvm-bolt. That required a change to the way we handle
annotations and any extra operands for MCInst.
Any MCPlus information is now attached via an extra operand of type
MCInst with an opcode ANNOTATION_LABEL. Since this operand is MCInst, we
attach extra info as operands to this instruction. For first-level
annotations use functions to access the information, such as
getConditionalTailCall() or getEHInfo(), etc. For the rest, optional or
second-class annotations, use a general named-annotation interface such
as getAnnotationAs<uint64_t>(Inst, "Count").
I did a test on HHVM binary, and a memory consumption went down a little
bit while the runtime remained the same.
(cherry picked from FBD7405412)
2018-03-19 18:32:12 -07:00
|
|
|
if (!BC.MIB->isCall(*II))
|
2015-11-13 14:18:45 -08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Instruction can throw an exception that should be handled.
|
[BOLT][Refactoring] Isolate changes to MC layer
Summary:
Changes that we made to MCInst, MCOperand, MCExpr, etc. are now all
moved into tools/llvm-bolt. That required a change to the way we handle
annotations and any extra operands for MCInst.
Any MCPlus information is now attached via an extra operand of type
MCInst with an opcode ANNOTATION_LABEL. Since this operand is MCInst, we
attach extra info as operands to this instruction. For first-level
annotations use functions to access the information, such as
getConditionalTailCall() or getEHInfo(), etc. For the rest, optional or
second-class annotations, use a general named-annotation interface such
as getAnnotationAs<uint64_t>(Inst, "Count").
I did a test on HHVM binary, and a memory consumption went down a little
bit while the runtime remained the same.
(cherry picked from FBD7405412)
2018-03-19 18:32:12 -07:00
|
|
|
const bool Throws = BC.MIB->isInvoke(*II);
|
2015-11-13 14:18:45 -08:00
|
|
|
|
|
|
|
|
// Ignore the call if it's a continuation of a no-throw gap.
|
|
|
|
|
if (!Throws && !StartRange)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Extract exception handling information from the instruction.
|
2015-12-15 17:06:27 -08:00
|
|
|
const MCSymbol *LP = nullptr;
|
|
|
|
|
uint64_t Action = 0;
|
[BOLT][Refactoring] Isolate changes to MC layer
Summary:
Changes that we made to MCInst, MCOperand, MCExpr, etc. are now all
moved into tools/llvm-bolt. That required a change to the way we handle
annotations and any extra operands for MCInst.
Any MCPlus information is now attached via an extra operand of type
MCInst with an opcode ANNOTATION_LABEL. Since this operand is MCInst, we
attach extra info as operands to this instruction. For first-level
annotations use functions to access the information, such as
getConditionalTailCall() or getEHInfo(), etc. For the rest, optional or
second-class annotations, use a general named-annotation interface such
as getAnnotationAs<uint64_t>(Inst, "Count").
I did a test on HHVM binary, and a memory consumption went down a little
bit while the runtime remained the same.
(cherry picked from FBD7405412)
2018-03-19 18:32:12 -07:00
|
|
|
if (const auto EHInfo = BC.MIB->getEHInfo(*II))
|
|
|
|
|
std::tie(LP, Action) = *EHInfo;
|
2015-11-13 14:18:45 -08:00
|
|
|
|
|
|
|
|
// No action if the exception handler has not changed.
|
|
|
|
|
if (Throws &&
|
|
|
|
|
StartRange &&
|
|
|
|
|
PreviousEH.LP == LP &&
|
|
|
|
|
PreviousEH.Action == Action)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Same symbol is used for the beginning and the end of the range.
|
2016-09-27 19:09:38 -07:00
|
|
|
const MCSymbol *EHSymbol = BC.Ctx->createTempSymbol("EH", true);
|
|
|
|
|
MCInst EHLabel;
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
|
2016-09-27 19:09:38 -07:00
|
|
|
II = std::next(BB->insertPseudoInstr(II, EHLabel));
|
2015-11-13 14:18:45 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// At this point we could be in one of the following states:
|
2015-11-13 14:18:45 -08:00
|
|
|
//
|
2016-09-27 19:09:38 -07:00
|
|
|
// I. Exception handler has changed and we need to close previous range
|
|
|
|
|
// and start a new one.
|
2015-11-13 14:18:45 -08:00
|
|
|
//
|
2016-09-27 19:09:38 -07:00
|
|
|
// II. Start a new exception range after the gap.
|
2015-11-13 14:18:45 -08:00
|
|
|
//
|
2016-09-27 19:09:38 -07:00
|
|
|
// III. Close current exception range and start a new gap.
|
|
|
|
|
const MCSymbol *EndRange;
|
2015-11-13 14:18:45 -08:00
|
|
|
if (StartRange) {
|
|
|
|
|
// I, III:
|
|
|
|
|
EndRange = EHSymbol;
|
|
|
|
|
} else {
|
|
|
|
|
// II:
|
|
|
|
|
StartRange = EHSymbol;
|
2016-09-27 19:09:38 -07:00
|
|
|
IsStartInCold = SeenCold;
|
2015-11-13 14:18:45 -08:00
|
|
|
EndRange = nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Close the previous range.
|
|
|
|
|
if (EndRange) {
|
2016-09-27 19:09:38 -07:00
|
|
|
Sites->emplace_back(CallSite{StartRange, EndRange,
|
|
|
|
|
PreviousEH.LP, PreviousEH.Action});
|
2015-11-13 14:18:45 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Throws) {
|
|
|
|
|
// I, II:
|
|
|
|
|
StartRange = EHSymbol;
|
2016-09-27 19:09:38 -07:00
|
|
|
IsStartInCold = SeenCold;
|
2015-11-13 14:18:45 -08:00
|
|
|
PreviousEH = EHInfo{LP, Action};
|
|
|
|
|
} else {
|
|
|
|
|
StartRange = nullptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check if we need to close the range.
|
|
|
|
|
if (StartRange) {
|
2016-09-27 19:09:38 -07:00
|
|
|
assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch");
|
|
|
|
|
const auto *EndRange = IsStartInCold ? getFunctionColdEndLabel()
|
|
|
|
|
: getFunctionEndLabel();
|
|
|
|
|
Sites->emplace_back(CallSite{StartRange, EndRange,
|
|
|
|
|
PreviousEH.LP, PreviousEH.Action});
|
2015-11-13 14:18:45 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
// The code is based on EHStreamer::emitExceptionTable().
|
2016-09-27 19:09:38 -07:00
|
|
|
void BinaryFunction::emitLSDA(MCStreamer *Streamer, bool EmitColdPart) {
|
|
|
|
|
const auto *Sites = EmitColdPart ? &ColdCallSites : &CallSites;
|
|
|
|
|
if (Sites->empty()) {
|
2015-12-18 17:00:46 -08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-20 13:32:46 -07:00
|
|
|
|
2015-12-18 17:00:46 -08:00
|
|
|
// Calculate callsite table size. Size of each callsite entry is:
|
|
|
|
|
//
|
|
|
|
|
// sizeof(start) + sizeof(length) + sizeof(LP) + sizeof(uleb128(action))
|
|
|
|
|
//
|
|
|
|
|
// or
|
|
|
|
|
//
|
2016-09-27 19:09:38 -07:00
|
|
|
// sizeof(dwarf::DW_EH_PE_data4) * 3 + sizeof(uleb128(action))
|
|
|
|
|
uint64_t CallSiteTableLength = Sites->size() * 4 * 3;
|
|
|
|
|
for (const auto &CallSite : *Sites) {
|
|
|
|
|
CallSiteTableLength += getULEB128Size(CallSite.Action);
|
2015-12-18 17:00:46 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Streamer->SwitchSection(BC.MOFI->getLSDASection());
|
|
|
|
|
|
2017-08-27 17:04:06 -07:00
|
|
|
const auto TTypeEncoding = BC.MOFI->getTTypeEncoding();
|
|
|
|
|
const auto TTypeEncodingSize = getEncodingSize(TTypeEncoding, BC);
|
2017-11-14 20:05:11 -08:00
|
|
|
const auto TTypeAlignment = 4;
|
2015-12-18 17:00:46 -08:00
|
|
|
|
|
|
|
|
// Type tables have to be aligned at 4 bytes.
|
2017-11-14 20:05:11 -08:00
|
|
|
Streamer->EmitValueToAlignment(TTypeAlignment);
|
2015-12-18 17:00:46 -08:00
|
|
|
|
|
|
|
|
// Emit the LSDA label.
|
2016-09-27 19:09:38 -07:00
|
|
|
auto LSDASymbol = EmitColdPart ? getColdLSDASymbol() : getLSDASymbol();
|
2015-12-18 17:00:46 -08:00
|
|
|
assert(LSDASymbol && "no LSDA symbol set");
|
|
|
|
|
Streamer->EmitLabel(LSDASymbol);
|
|
|
|
|
|
2017-09-20 13:32:46 -07:00
|
|
|
// Corresponding FDE start.
|
|
|
|
|
const auto *StartSymbol = EmitColdPart ? getColdSymbol() : getSymbol();
|
|
|
|
|
|
2015-12-18 17:00:46 -08:00
|
|
|
// Emit the LSDA header.
|
2017-09-20 13:32:46 -07:00
|
|
|
|
|
|
|
|
// If LPStart is omitted, then the start of the FDE is used as a base for
|
|
|
|
|
// landing pad displacements. Then if a cold fragment starts with
|
|
|
|
|
// a landing pad, this means that the first landing pad offset will be 0.
|
|
|
|
|
// As a result, an exception handling runtime will ignore this landing pad,
|
|
|
|
|
// because zero offset denotes the absence of a landing pad.
|
2019-03-14 18:51:05 -07:00
|
|
|
// For this reason, we emit LPStart value of 0 and output an absolute value
|
|
|
|
|
// of the landing pad in the table.
|
2017-09-20 13:32:46 -07:00
|
|
|
//
|
2019-03-14 18:51:05 -07:00
|
|
|
// FIXME: this may break PIEs and DSOs where the base address is not 0.
|
|
|
|
|
Streamer->EmitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format
|
|
|
|
|
Streamer->EmitIntValue(0, 4);
|
|
|
|
|
auto emitLandingPad = [&](const MCSymbol *LPSymbol) {
|
|
|
|
|
if (!LPSymbol) {
|
|
|
|
|
Streamer->EmitIntValue(0, 4);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
Streamer->EmitSymbolValue(LPSymbol, 4);
|
|
|
|
|
};
|
2017-09-20 13:32:46 -07:00
|
|
|
|
2015-12-18 17:00:46 -08:00
|
|
|
Streamer->EmitIntValue(TTypeEncoding, 1); // TType format
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// See the comment in EHStreamer::emitExceptionTable() on to use
|
2015-12-18 17:00:46 -08:00
|
|
|
// uleb128 encoding (which can use variable number of bytes to encode the same
|
|
|
|
|
// value) to ensure type info table is properly aligned at 4 bytes without
|
2016-09-27 19:09:38 -07:00
|
|
|
// iteratively fixing sizes of the tables.
|
2015-12-18 17:00:46 -08:00
|
|
|
unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength);
|
|
|
|
|
unsigned TTypeBaseOffset =
|
|
|
|
|
sizeof(int8_t) + // Call site format
|
|
|
|
|
CallSiteTableLengthSize + // Call site table length size
|
|
|
|
|
CallSiteTableLength + // Call site table length
|
2017-08-27 17:04:06 -07:00
|
|
|
LSDAActionTable.size() + // Actions table size
|
|
|
|
|
LSDATypeTable.size() * TTypeEncodingSize; // Types table size
|
2015-12-18 17:00:46 -08:00
|
|
|
unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset);
|
|
|
|
|
unsigned TotalSize =
|
|
|
|
|
sizeof(int8_t) + // LPStart format
|
|
|
|
|
sizeof(int8_t) + // TType format
|
|
|
|
|
TTypeBaseOffsetSize + // TType base offset size
|
|
|
|
|
TTypeBaseOffset; // TType base offset
|
|
|
|
|
unsigned SizeAlign = (4 - TotalSize) & 3;
|
|
|
|
|
|
|
|
|
|
// Account for any extra padding that will be added to the call site table
|
|
|
|
|
// length.
|
2018-02-26 20:09:14 -08:00
|
|
|
Streamer->EmitPaddedULEB128IntValue(TTypeBaseOffset,
|
|
|
|
|
TTypeBaseOffsetSize + SizeAlign);
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Emit the landing pad call site table. We use signed data4 since we can emit
|
|
|
|
|
// a landing pad in a different part of the split function that could appear
|
|
|
|
|
// earlier in the address space than LPStart.
|
|
|
|
|
Streamer->EmitIntValue(dwarf::DW_EH_PE_sdata4, 1);
|
2015-12-18 17:00:46 -08:00
|
|
|
Streamer->EmitULEB128IntValue(CallSiteTableLength);
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
for (const auto &CallSite : *Sites) {
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2017-09-20 13:32:46 -07:00
|
|
|
const auto *BeginLabel = CallSite.Start;
|
|
|
|
|
const auto *EndLabel = CallSite.End;
|
2015-12-18 17:00:46 -08:00
|
|
|
|
|
|
|
|
assert(BeginLabel && "start EH label expected");
|
|
|
|
|
assert(EndLabel && "end EH label expected");
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Start of the range is emitted relative to the start of current
|
|
|
|
|
// function split part.
|
|
|
|
|
Streamer->emitAbsoluteSymbolDiff(BeginLabel, StartSymbol, 4);
|
2015-12-18 17:00:46 -08:00
|
|
|
Streamer->emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4);
|
2017-09-20 13:32:46 -07:00
|
|
|
emitLandingPad(CallSite.LP);
|
2015-12-18 17:00:46 -08:00
|
|
|
Streamer->EmitULEB128IntValue(CallSite.Action);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Write out action, type, and type index tables at the end.
|
|
|
|
|
//
|
2017-08-27 17:04:06 -07:00
|
|
|
// For action and type index tables there's no need to change the original
|
|
|
|
|
// table format unless we are doing function splitting, in which case we can
|
|
|
|
|
// split and optimize the tables.
|
|
|
|
|
//
|
|
|
|
|
// For type table we (re-)encode the table using TTypeEncoding matching
|
|
|
|
|
// the current assembler mode.
|
|
|
|
|
for (auto const &Byte : LSDAActionTable) {
|
2015-12-18 17:00:46 -08:00
|
|
|
Streamer->EmitIntValue(Byte, 1);
|
|
|
|
|
}
|
2017-08-27 17:04:06 -07:00
|
|
|
assert(!(TTypeEncoding & dwarf::DW_EH_PE_indirect) &&
|
|
|
|
|
"indirect type info encoding is not supported yet");
|
|
|
|
|
for (int Index = LSDATypeTable.size() - 1; Index >= 0; --Index) {
|
|
|
|
|
// Note: the address could be an indirect one.
|
|
|
|
|
const auto TypeAddress = LSDATypeTable[Index];
|
|
|
|
|
switch (TTypeEncoding & 0x70) {
|
|
|
|
|
default:
|
|
|
|
|
llvm_unreachable("unsupported TTypeEncoding");
|
|
|
|
|
case 0:
|
|
|
|
|
Streamer->EmitIntValue(TypeAddress, TTypeEncodingSize);
|
|
|
|
|
break;
|
|
|
|
|
case dwarf::DW_EH_PE_pcrel: {
|
|
|
|
|
if (TypeAddress) {
|
2017-11-14 20:05:11 -08:00
|
|
|
const auto *TypeSymbol =
|
|
|
|
|
BC.getOrCreateGlobalSymbol(TypeAddress,
|
2018-09-21 12:00:20 -07:00
|
|
|
"TI",
|
2017-11-14 20:05:11 -08:00
|
|
|
TTypeEncodingSize,
|
2018-09-21 12:00:20 -07:00
|
|
|
TTypeAlignment);
|
2017-08-27 17:04:06 -07:00
|
|
|
auto *DotSymbol = BC.Ctx->createTempSymbol();
|
|
|
|
|
Streamer->EmitLabel(DotSymbol);
|
|
|
|
|
const auto *SubDotExpr = MCBinaryExpr::createSub(
|
|
|
|
|
MCSymbolRefExpr::create(TypeSymbol, *BC.Ctx),
|
|
|
|
|
MCSymbolRefExpr::create(DotSymbol, *BC.Ctx),
|
|
|
|
|
*BC.Ctx);
|
|
|
|
|
Streamer->EmitValue(SubDotExpr, TTypeEncodingSize);
|
|
|
|
|
} else {
|
|
|
|
|
Streamer->EmitIntValue(0, TTypeEncodingSize);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
for (auto const &Byte : LSDATypeIndexTable) {
|
2015-12-18 17:00:46 -08:00
|
|
|
Streamer->EmitIntValue(Byte, 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-04 16:48:47 -08:00
|
|
|
const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
|
|
|
|
|
|
2019-02-05 15:28:19 -08:00
|
|
|
CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
|
|
|
|
|
// Prepare FDEs for fast lookup
|
|
|
|
|
for (const auto &Entry : EHFrame.entries()) {
|
|
|
|
|
const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
|
|
|
|
|
// Skip CIEs.
|
|
|
|
|
if (!CurFDE)
|
|
|
|
|
continue;
|
|
|
|
|
// There could me multiple FDEs with the same initial address, and perhaps
|
|
|
|
|
// different sizes (address ranges). Use the first entry with non-zero size.
|
|
|
|
|
auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
|
|
|
|
|
if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
|
|
|
|
|
if (CurFDE->getAddressRange()) {
|
|
|
|
|
if (FDEI->second->getAddressRange() == 0) {
|
|
|
|
|
FDEI->second = CurFDE;
|
|
|
|
|
} else if (opts::Verbosity > 0) {
|
|
|
|
|
errs() << "BOLT-WARNING: different FDEs for function at 0x"
|
|
|
|
|
<< Twine::utohexstr(FDEI->first)
|
|
|
|
|
<< " detected; sizes: "
|
|
|
|
|
<< FDEI->second->getAddressRange() << " and "
|
|
|
|
|
<< CurFDE->getAddressRange() << '\n';
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-22 18:25:43 -08:00
|
|
|
bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
|
2015-11-04 16:48:47 -08:00
|
|
|
uint64_t Address = Function.getAddress();
|
|
|
|
|
auto I = FDEs.find(Address);
|
2017-08-04 11:21:05 -07:00
|
|
|
// Ignore zero-length FDE ranges.
|
|
|
|
|
if (I == FDEs.end() || !I->second->getAddressRange())
|
2016-02-22 18:25:43 -08:00
|
|
|
return true;
|
2015-11-04 16:48:47 -08:00
|
|
|
|
|
|
|
|
const FDE &CurFDE = *I->second;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto LSDA = CurFDE.getLSDAAddress();
|
|
|
|
|
Function.setLSDAAddress(LSDA ? *LSDA : 0);
|
2015-11-12 18:56:58 -08:00
|
|
|
|
2015-11-04 16:48:47 -08:00
|
|
|
uint64_t Offset = 0;
|
|
|
|
|
uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
|
|
|
|
|
uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
|
2015-11-10 15:20:50 -08:00
|
|
|
Function.setPersonalityFunction(
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
*CurFDE.getLinkedCIE()->getPersonalityAddress());
|
2015-11-10 15:20:50 -08:00
|
|
|
Function.setPersonalityEncoding(
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
*CurFDE.getLinkedCIE()->getPersonalityEncoding());
|
2015-11-10 15:20:50 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto decodeFrameInstruction =
|
|
|
|
|
[&Function, &Offset, Address, CodeAlignment, DataAlignment](
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const CFIProgram::Instruction &Instr) {
|
2015-11-10 15:20:50 -08:00
|
|
|
uint8_t Opcode = Instr.Opcode;
|
|
|
|
|
if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
|
|
|
|
|
Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
|
|
|
|
|
switch (Instr.Opcode) {
|
|
|
|
|
case DW_CFA_nop:
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_advance_loc4:
|
|
|
|
|
case DW_CFA_advance_loc2:
|
|
|
|
|
case DW_CFA_advance_loc1:
|
|
|
|
|
case DW_CFA_advance_loc:
|
|
|
|
|
// Advance our current address
|
|
|
|
|
Offset += CodeAlignment * int64_t(Instr.Ops[0]);
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_offset_extended_sf:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createOffset(
|
|
|
|
|
nullptr, Instr.Ops[0],
|
|
|
|
|
DataAlignment * int64_t(Instr.Ops[1])));
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_offset_extended:
|
|
|
|
|
case DW_CFA_offset:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createOffset(
|
|
|
|
|
nullptr, Instr.Ops[0], DataAlignment * Instr.Ops[1]));
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_restore_extended:
|
|
|
|
|
case DW_CFA_restore:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_set_loc:
|
2015-12-18 20:26:44 -08:00
|
|
|
assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
|
|
|
|
|
assert(Instr.Ops[0] <= Address + Function.getSize() &&
|
2015-11-10 15:20:50 -08:00
|
|
|
"set_loc out of function bounds");
|
|
|
|
|
Offset = Instr.Ops[0] - Address;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case DW_CFA_undefined:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_same_value:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_register:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
|
|
|
|
|
Instr.Ops[1]));
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_remember_state:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createRememberState(nullptr));
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_restore_state:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createRestoreState(nullptr));
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_def_cfa:
|
|
|
|
|
Function.addCFIInstruction(
|
2016-11-30 15:52:24 -08:00
|
|
|
Offset, MCCFIInstruction::createDefCfa(nullptr, Instr.Ops[0],
|
|
|
|
|
Instr.Ops[1]));
|
2015-11-10 15:20:50 -08:00
|
|
|
break;
|
|
|
|
|
case DW_CFA_def_cfa_sf:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createDefCfa(
|
2016-11-30 15:52:24 -08:00
|
|
|
nullptr, Instr.Ops[0],
|
|
|
|
|
DataAlignment * int64_t(Instr.Ops[1])));
|
2015-11-10 15:20:50 -08:00
|
|
|
break;
|
|
|
|
|
case DW_CFA_def_cfa_register:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset,
|
|
|
|
|
MCCFIInstruction::createDefCfaRegister(nullptr, Instr.Ops[0]));
|
|
|
|
|
break;
|
|
|
|
|
case DW_CFA_def_cfa_offset:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset,
|
2016-11-15 10:40:00 -08:00
|
|
|
MCCFIInstruction::createDefCfaOffset(nullptr, Instr.Ops[0]));
|
2015-11-10 15:20:50 -08:00
|
|
|
break;
|
|
|
|
|
case DW_CFA_def_cfa_offset_sf:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createDefCfaOffset(
|
2016-11-15 10:40:00 -08:00
|
|
|
nullptr, DataAlignment * int64_t(Instr.Ops[0])));
|
2015-11-10 15:20:50 -08:00
|
|
|
break;
|
2015-12-18 20:26:44 -08:00
|
|
|
case DW_CFA_GNU_args_size:
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset,
|
|
|
|
|
MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
|
2016-04-19 22:00:29 -07:00
|
|
|
Function.setUsesGnuArgsSize();
|
2015-12-18 20:26:44 -08:00
|
|
|
break;
|
2015-11-10 15:20:50 -08:00
|
|
|
case DW_CFA_val_offset_sf:
|
|
|
|
|
case DW_CFA_val_offset:
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
|
|
|
|
|
}
|
2016-02-22 18:25:43 -08:00
|
|
|
return false;
|
2015-11-10 15:20:50 -08:00
|
|
|
case DW_CFA_expression:
|
|
|
|
|
case DW_CFA_def_cfa_expression:
|
2016-11-15 10:40:00 -08:00
|
|
|
case DW_CFA_val_expression: {
|
|
|
|
|
MCDwarfExprBuilder Builder;
|
2018-05-04 10:10:41 -07:00
|
|
|
for (auto &ExprOp : *Instr.Expression) {
|
|
|
|
|
const DWARFExpression::Operation::Description &Desc =
|
|
|
|
|
ExprOp.getDescription();
|
|
|
|
|
if (Desc.Op[0] == DWARFExpression::Operation::SizeNA) {
|
|
|
|
|
Builder.appendOperation(ExprOp.getCode());
|
|
|
|
|
} else if (Desc.Op[1] == DWARFExpression::Operation::SizeNA) {
|
|
|
|
|
Builder.appendOperation(ExprOp.getCode(),
|
|
|
|
|
ExprOp.getRawOperand(0));
|
|
|
|
|
} else {
|
|
|
|
|
Builder.appendOperation(ExprOp.getCode(), ExprOp.getRawOperand(0),
|
|
|
|
|
ExprOp.getRawOperand(1));
|
2016-11-15 10:40:00 -08:00
|
|
|
}
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2016-11-15 10:40:00 -08:00
|
|
|
if (Opcode == DW_CFA_expression) {
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createExpression(
|
|
|
|
|
nullptr, Instr.Ops[0], Builder.take()));
|
|
|
|
|
} else if (Opcode == DW_CFA_def_cfa_expression) {
|
|
|
|
|
Function.addCFIInstruction(Offset,
|
|
|
|
|
MCCFIInstruction::createDefCfaExpression(
|
|
|
|
|
nullptr, Builder.take()));
|
|
|
|
|
} else {
|
|
|
|
|
assert(Opcode == DW_CFA_val_expression && "Unexpected opcode");
|
|
|
|
|
Function.addCFIInstruction(
|
|
|
|
|
Offset, MCCFIInstruction::createValExpression(
|
|
|
|
|
nullptr, Instr.Ops[0], Builder.take()));
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
2015-11-10 15:20:50 -08:00
|
|
|
case DW_CFA_MIPS_advance_loc8:
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
|
|
|
|
|
}
|
2016-02-22 18:25:43 -08:00
|
|
|
return false;
|
2015-11-10 15:20:50 -08:00
|
|
|
case DW_CFA_GNU_window_save:
|
|
|
|
|
case DW_CFA_lo_user:
|
|
|
|
|
case DW_CFA_hi_user:
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
2016-11-15 10:40:00 -08:00
|
|
|
errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user "
|
|
|
|
|
"unimplemented\n";
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2016-02-22 18:25:43 -08:00
|
|
|
return false;
|
2015-11-10 15:20:50 -08:00
|
|
|
default:
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
2019-04-17 15:17:55 -07:00
|
|
|
errs() << "BOLT-WARNING: Unrecognized CFI instruction: "
|
|
|
|
|
<< Instr.Opcode << '\n';
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2016-02-22 18:25:43 -08:00
|
|
|
return false;
|
2015-11-10 15:20:50 -08:00
|
|
|
}
|
2016-02-22 18:25:43 -08:00
|
|
|
|
|
|
|
|
return true;
|
2015-11-10 15:20:50 -08:00
|
|
|
};
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) {
|
2016-02-22 18:25:43 -08:00
|
|
|
if (!decodeFrameInstruction(Instr))
|
|
|
|
|
return false;
|
2015-11-10 15:20:50 -08:00
|
|
|
}
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) {
|
2016-02-22 18:25:43 -08:00
|
|
|
if (!decodeFrameInstruction(Instr))
|
|
|
|
|
return false;
|
2015-11-10 15:20:50 -08:00
|
|
|
}
|
2016-02-22 18:25:43 -08:00
|
|
|
|
|
|
|
|
return true;
|
2015-11-10 15:20:50 -08:00
|
|
|
}
|
2015-11-04 16:48:47 -08:00
|
|
|
|
2016-11-14 16:39:55 -08:00
|
|
|
std::vector<char> CFIReaderWriter::generateEHFrameHeader(
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const DWARFDebugFrame &OldEHFrame,
|
|
|
|
|
const DWARFDebugFrame &NewEHFrame,
|
2016-11-14 16:39:55 -08:00
|
|
|
uint64_t EHFrameHeaderAddress,
|
|
|
|
|
std::vector<uint64_t> &FailedAddresses) const {
|
2016-11-11 14:33:34 -08:00
|
|
|
// Common PC -> FDE map to be written into .eh_frame_hdr.
|
2015-11-10 15:20:50 -08:00
|
|
|
std::map<uint64_t, uint64_t> PCToFDE;
|
|
|
|
|
|
2016-11-14 16:39:55 -08:00
|
|
|
// Presort array for binary search.
|
|
|
|
|
std::sort(FailedAddresses.begin(), FailedAddresses.end());
|
2015-11-19 17:59:41 -08:00
|
|
|
|
2016-11-14 16:39:55 -08:00
|
|
|
// Initialize PCToFDE using NewEHFrame.
|
2016-11-11 14:33:34 -08:00
|
|
|
NewEHFrame.for_each_FDE([&](const dwarf::FDE *FDE) {
|
|
|
|
|
const auto FuncAddress = FDE->getInitialLocation();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const auto FDEAddress = NewEHFrame.getEHFrameAddress() + FDE->getOffset();
|
2016-11-09 11:19:02 -08:00
|
|
|
|
2016-11-11 14:33:34 -08:00
|
|
|
// Ignore unused FDEs.
|
2016-11-09 11:19:02 -08:00
|
|
|
if (FuncAddress == 0)
|
2016-11-11 14:33:34 -08:00
|
|
|
return;
|
2015-11-10 15:20:50 -08:00
|
|
|
|
2016-11-14 16:39:55 -08:00
|
|
|
// Add the address to the map unless we failed to write it.
|
|
|
|
|
if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(),
|
|
|
|
|
FuncAddress)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
|
|
|
|
|
<< Twine::utohexstr(FuncAddress) << " is at 0x"
|
|
|
|
|
<< Twine::utohexstr(FDEAddress) << '\n');
|
|
|
|
|
PCToFDE[FuncAddress] = FDEAddress;
|
|
|
|
|
}
|
2016-11-11 14:33:34 -08:00
|
|
|
});
|
2015-11-10 15:20:50 -08:00
|
|
|
|
2016-11-14 16:39:55 -08:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
<< std::distance(NewEHFrame.entries().begin(),
|
|
|
|
|
NewEHFrame.entries().end())
|
|
|
|
|
<< " entries\n");
|
2016-11-14 16:39:55 -08:00
|
|
|
|
|
|
|
|
// Add entries from the original .eh_frame corresponding to the functions
|
|
|
|
|
// that we did not update.
|
2016-11-11 14:33:34 -08:00
|
|
|
OldEHFrame.for_each_FDE([&](const dwarf::FDE *FDE) {
|
|
|
|
|
const auto FuncAddress = FDE->getInitialLocation();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const auto FDEAddress = OldEHFrame.getEHFrameAddress() + FDE->getOffset();
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2016-11-14 16:39:55 -08:00
|
|
|
// Add the address if we failed to write it.
|
|
|
|
|
if (PCToFDE.count(FuncAddress) == 0) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
|
|
|
|
|
<< Twine::utohexstr(FuncAddress) << " is at 0x"
|
|
|
|
|
<< Twine::utohexstr(FDEAddress) << '\n');
|
|
|
|
|
PCToFDE[FuncAddress] = FDEAddress;
|
|
|
|
|
}
|
2016-11-11 14:33:34 -08:00
|
|
|
});
|
2015-11-19 17:59:41 -08:00
|
|
|
|
2016-11-14 16:39:55 -08:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
<< std::distance(OldEHFrame.entries().begin(),
|
|
|
|
|
OldEHFrame.entries().end())
|
|
|
|
|
<< " entries\n");
|
2016-11-14 16:39:55 -08:00
|
|
|
|
|
|
|
|
// Generate a new .eh_frame_hdr based on the new map.
|
|
|
|
|
|
|
|
|
|
// Header plus table of entries of size 8 bytes.
|
|
|
|
|
std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
|
|
|
|
|
|
|
|
|
|
// Version is 1.
|
|
|
|
|
EHFrameHeader[0] = 1;
|
|
|
|
|
// Encoding of the eh_frame pointer.
|
|
|
|
|
EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
|
|
|
|
|
// Encoding of the count field to follow.
|
|
|
|
|
EHFrameHeader[2] = DW_EH_PE_udata4;
|
|
|
|
|
// Encoding of the table entries - 4-byte offset from the start of the header.
|
|
|
|
|
EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
|
|
|
|
|
|
|
|
|
|
// Address of eh_frame. Use the new one.
|
|
|
|
|
support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
|
2016-11-14 16:39:55 -08:00
|
|
|
|
|
|
|
|
// Number of entries in the table (FDE count).
|
|
|
|
|
support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
|
|
|
|
|
|
|
|
|
|
// Write the table at offset 12.
|
|
|
|
|
auto *Ptr = EHFrameHeader.data();
|
|
|
|
|
uint32_t Offset = 12;
|
|
|
|
|
for (const auto &PCI : PCToFDE) {
|
|
|
|
|
int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
|
|
|
|
|
assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
|
|
|
|
|
support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
|
|
|
|
|
Offset += 4;
|
|
|
|
|
int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
|
|
|
|
|
assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
|
|
|
|
|
support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
|
2015-11-19 17:59:41 -08:00
|
|
|
Offset += 4;
|
2015-11-04 16:48:47 -08:00
|
|
|
}
|
2016-11-14 16:39:55 -08:00
|
|
|
|
|
|
|
|
return EHFrameHeader;
|
2015-11-04 16:48:47 -08:00
|
|
|
}
|
|
|
|
|
|
2016-02-05 14:42:04 -08:00
|
|
|
} // namespace bolt
|
2015-11-02 11:50:53 -07:00
|
|
|
} // namespace llvm
|