2015-10-09 17:21:14 -07:00
|
|
|
//===--- BinaryFunction.cpp - Interface for machine-level function --------===//
|
|
|
|
|
//
|
2021-03-15 18:04:18 -07:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-10-09 17:21:14 -07:00
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
#include "BinaryFunction.h"
|
2021-04-30 13:54:02 -07:00
|
|
|
#include "BinaryBasicBlock.h"
|
2019-04-29 12:51:10 -07:00
|
|
|
#include "DynoStats.h"
|
2018-03-09 09:45:13 -08:00
|
|
|
#include "MCPlusBuilder.h"
|
2020-10-09 16:06:27 -07:00
|
|
|
#include "NameResolver.h"
|
2020-05-26 04:21:04 -07:00
|
|
|
#include "NameShortener.h"
|
2020-12-01 16:29:39 -08:00
|
|
|
#include "Utils.h"
|
2019-09-20 11:29:35 -07:00
|
|
|
#include "llvm/ADT/SmallSet.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
2021-04-30 13:54:02 -07:00
|
|
|
#include "llvm/ADT/edit_distance.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/MC/MCAsmInfo.h"
|
2019-11-03 21:57:15 -08:00
|
|
|
#include "llvm/MC/MCAsmLayout.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/MC/MCContext.h"
|
2021-04-30 13:54:02 -07:00
|
|
|
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
|
2015-10-09 21:47:18 -07:00
|
|
|
#include "llvm/MC/MCExpr.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/MC/MCInst.h"
|
|
|
|
|
#include "llvm/MC/MCInstPrinter.h"
|
|
|
|
|
#include "llvm/Object/ObjectFile.h"
|
2016-01-21 14:18:30 -08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/Support/Debug.h"
|
2016-07-01 08:40:56 -07:00
|
|
|
#include "llvm/Support/GraphWriter.h"
|
2020-12-01 16:29:39 -08:00
|
|
|
#include "llvm/Support/LEB128.h"
|
|
|
|
|
#include "llvm/Support/Regex.h"
|
2017-11-27 18:00:24 -08:00
|
|
|
#include "llvm/Support/Timer.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2017-12-07 15:00:41 -08:00
|
|
|
#include <cxxabi.h>
|
2019-04-09 21:22:41 -07:00
|
|
|
#include <functional>
|
2015-10-09 17:21:14 -07:00
|
|
|
#include <limits>
|
2019-04-09 21:22:41 -07:00
|
|
|
#include <numeric>
|
2015-10-09 17:21:14 -07:00
|
|
|
#include <string>
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
#undef DEBUG_TYPE
|
2016-02-05 14:42:04 -08:00
|
|
|
#define DEBUG_TYPE "bolt"
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-09-02 14:15:29 -07:00
|
|
|
using namespace llvm;
|
2016-09-16 15:54:32 -07:00
|
|
|
using namespace bolt;
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-01-21 14:18:30 -08:00
|
|
|
namespace opts {
|
|
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
extern cl::OptionCategory BoltCategory;
|
|
|
|
|
extern cl::OptionCategory BoltOptCategory;
|
|
|
|
|
extern cl::OptionCategory BoltRelocCategory;
|
|
|
|
|
|
2019-04-12 17:33:46 -07:00
|
|
|
extern cl::opt<bool> EnableBAT;
|
2019-06-19 20:10:49 -07:00
|
|
|
extern cl::opt<bool> Instrument;
|
2019-06-28 09:21:27 -07:00
|
|
|
extern cl::opt<bool> StrictMode;
|
|
|
|
|
extern cl::opt<bool> UpdateDebugSections;
|
|
|
|
|
extern cl::opt<unsigned> Verbosity;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
extern bool processAllFunctions();
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
cl::opt<bool>
|
|
|
|
|
CheckEncoding("check-encoding",
|
|
|
|
|
cl::desc("perform verification of LLVM instruction encoding/decoding. "
|
|
|
|
|
"Every instruction in the input is decoded and re-encoded. "
|
|
|
|
|
"If the resulting bytes do not match the input, a warning message "
|
|
|
|
|
"is printed."),
|
|
|
|
|
cl::init(false),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
|
DotToolTipCode("dot-tooltip-code",
|
|
|
|
|
cl::desc("add basic block instructions as tool tips on nodes"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
|
|
|
|
cl::opt<JumpTableSupportLevel>
|
|
|
|
|
JumpTables("jump-tables",
|
|
|
|
|
cl::desc("jump tables support (default=basic)"),
|
|
|
|
|
cl::init(JTS_BASIC),
|
|
|
|
|
cl::values(
|
|
|
|
|
clEnumValN(JTS_NONE, "none",
|
|
|
|
|
"do not optimize functions with jump tables"),
|
|
|
|
|
clEnumValN(JTS_BASIC, "basic",
|
|
|
|
|
"optimize functions with jump tables"),
|
|
|
|
|
clEnumValN(JTS_MOVE, "move",
|
|
|
|
|
"move jump tables to a separate section"),
|
|
|
|
|
clEnumValN(JTS_SPLIT, "split",
|
|
|
|
|
"split jump tables section into hot and cold based on "
|
|
|
|
|
"function execution frequency"),
|
|
|
|
|
clEnumValN(JTS_AGGRESSIVE, "aggressive",
|
|
|
|
|
"aggressively split jump tables section based on usage "
|
|
|
|
|
"of the tables")),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
|
|
|
|
|
2020-12-30 12:23:58 -08:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
NoScan("no-scan",
|
|
|
|
|
cl::desc("do not scan cold functions for external references (may result in "
|
|
|
|
|
"slower binary)"),
|
|
|
|
|
cl::init(false),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
cl::opt<bool>
|
|
|
|
|
PreserveBlocksAlignment("preserve-blocks-alignment",
|
|
|
|
|
cl::desc("try to preserve basic block alignment"),
|
|
|
|
|
cl::init(false),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
2019-07-31 16:03:49 -07:00
|
|
|
|
2017-08-10 13:18:44 -07:00
|
|
|
cl::opt<bool>
|
2020-07-16 17:35:55 -07:00
|
|
|
PrintDynoStats("dyno-stats",
|
|
|
|
|
cl::desc("print execution info based on profile"),
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
|
PrintDynoStatsOnly("print-dyno-stats-only",
|
|
|
|
|
cl::desc("while printing functions output dyno-stats and skip instructions"),
|
|
|
|
|
cl::init(false),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
|
|
|
|
static cl::list<std::string>
|
|
|
|
|
PrintOnly("print-only",
|
|
|
|
|
cl::CommaSeparated,
|
|
|
|
|
cl::desc("list of functions to print"),
|
|
|
|
|
cl::value_desc("func1,func2,func3,..."),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2017-03-17 19:05:11 -07:00
|
|
|
|
2019-07-12 07:25:50 -07:00
|
|
|
cl::opt<bool>
|
2020-07-16 17:35:55 -07:00
|
|
|
TimeBuild("time-build",
|
|
|
|
|
cl::desc("print time spent constructing binary functions"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2017-11-27 18:00:24 -08:00
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
cl::opt<bool>
|
|
|
|
|
TrapOnAVX512("trap-avx512",
|
|
|
|
|
cl::desc("in relocation mode trap upon entry to any function that uses "
|
|
|
|
|
"AVX-512 instructions"),
|
|
|
|
|
cl::init(false),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2018-02-02 16:07:11 -08:00
|
|
|
|
2017-03-17 19:05:11 -07:00
|
|
|
bool shouldPrint(const BinaryFunction &Function) {
|
2020-05-03 13:54:45 -07:00
|
|
|
if (Function.isIgnored())
|
|
|
|
|
return false;
|
|
|
|
|
|
2019-05-29 18:33:09 -07:00
|
|
|
if (PrintOnly.empty())
|
2017-03-17 19:05:11 -07:00
|
|
|
return true;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (std::string &Name : opts::PrintOnly) {
|
2017-10-20 12:11:34 -07:00
|
|
|
if (Function.hasNameRegex(Name)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-17 19:05:11 -07:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-21 14:18:30 -08:00
|
|
|
} // namespace opts
|
|
|
|
|
|
2016-09-02 14:15:29 -07:00
|
|
|
namespace llvm {
|
|
|
|
|
namespace bolt {
|
|
|
|
|
|
2017-05-19 14:45:46 -07:00
|
|
|
constexpr unsigned BinaryFunction::MinAlign;
|
2017-05-01 16:52:54 -07:00
|
|
|
|
2016-02-25 16:57:07 -08:00
|
|
|
namespace {
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
template <typename R>
|
|
|
|
|
bool emptyRange(const R &Range) {
|
2017-10-20 12:11:34 -07:00
|
|
|
return Range.begin() == Range.end();
|
|
|
|
|
}
|
2017-11-27 12:58:21 -08:00
|
|
|
|
2016-05-27 20:19:19 -07:00
|
|
|
/// Gets debug line information for the instruction located at the given
|
|
|
|
|
/// address in the original binary. The SMLoc's pointer is used
|
|
|
|
|
/// to point to this information, which is represented by a
|
|
|
|
|
/// DebugLineTableRowRef. The returned pointer is null if no debug line
|
|
|
|
|
/// information for this instruction was found.
|
2020-10-12 21:04:42 -07:00
|
|
|
SMLoc findDebugLineInformationForInstructionAt(uint64_t Address,
|
|
|
|
|
DWARFUnit *Unit, const DWARFDebugLine::LineTable *LineTable) {
|
2016-05-27 20:19:19 -07:00
|
|
|
// We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
|
|
|
|
|
// which occupies 64 bits. Thus, we can only proceed if the struct fits into
|
|
|
|
|
// the pointer itself.
|
2020-07-16 17:35:55 -07:00
|
|
|
assert(
|
|
|
|
|
sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef) &&
|
|
|
|
|
"Cannot fit instruction debug line information into SMLoc's pointer");
|
2016-02-25 16:57:07 -08:00
|
|
|
|
2016-05-27 20:19:19 -07:00
|
|
|
SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
|
2020-12-01 16:29:39 -08:00
|
|
|
uint32_t RowIndex = LineTable->lookupAddress(
|
|
|
|
|
{Address, object::SectionedAddress::UndefSection});
|
2016-05-27 20:19:19 -07:00
|
|
|
if (RowIndex == LineTable->UnknownRowIndex)
|
|
|
|
|
return NullResult;
|
|
|
|
|
|
|
|
|
|
assert(RowIndex < LineTable->Rows.size() &&
|
|
|
|
|
"Line Table lookup returned invalid index.");
|
|
|
|
|
|
|
|
|
|
decltype(SMLoc().getPointer()) Ptr;
|
|
|
|
|
DebugLineTableRowRef *InstructionLocation =
|
2020-07-16 17:35:55 -07:00
|
|
|
reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
|
2016-05-27 20:19:19 -07:00
|
|
|
|
2020-10-12 21:04:42 -07:00
|
|
|
InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
|
2016-05-27 20:19:19 -07:00
|
|
|
InstructionLocation->RowIndex = RowIndex + 1;
|
|
|
|
|
|
|
|
|
|
return SMLoc::getFromPointer(Ptr);
|
2016-02-25 16:57:07 -08:00
|
|
|
}
|
|
|
|
|
|
2020-05-26 04:21:04 -07:00
|
|
|
std::string buildSectionName(StringRef Prefix, StringRef Name,
|
|
|
|
|
const BinaryContext &BC) {
|
|
|
|
|
if (BC.isELF())
|
|
|
|
|
return (Prefix + Name).str();
|
|
|
|
|
static NameShortener NS;
|
|
|
|
|
return (Prefix + Twine(NS.getID(Name))).str();
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-25 16:57:07 -08:00
|
|
|
} // namespace
|
|
|
|
|
|
2020-05-26 04:21:04 -07:00
|
|
|
std::string BinaryFunction::buildCodeSectionName(StringRef Name,
|
|
|
|
|
const BinaryContext &BC) {
|
|
|
|
|
return buildSectionName(BC.isELF() ? ".local.text." : ".l.text.", Name, BC);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string BinaryFunction::buildColdCodeSectionName(StringRef Name,
|
|
|
|
|
const BinaryContext &BC) {
|
|
|
|
|
return buildSectionName(BC.isELF() ? ".local.cold.text." : ".l.c.text.", Name,
|
|
|
|
|
BC);
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-18 17:00:46 -08:00
|
|
|
uint64_t BinaryFunction::Count = 0;
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
Optional<StringRef>
|
|
|
|
|
BinaryFunction::hasNameRegex(const StringRef Name) const {
|
2021-04-08 00:19:26 -07:00
|
|
|
const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
|
2019-05-29 18:33:09 -07:00
|
|
|
Regex MatchName(RegexName);
|
2021-04-08 00:19:26 -07:00
|
|
|
Optional<StringRef> Match = forEachName(
|
|
|
|
|
[&MatchName](StringRef Name) { return MatchName.match(Name); });
|
2020-01-13 11:56:59 -08:00
|
|
|
|
|
|
|
|
return Match;
|
2017-10-20 12:11:34 -07:00
|
|
|
}
|
2017-11-27 12:58:21 -08:00
|
|
|
|
2020-11-09 12:38:51 -08:00
|
|
|
Optional<StringRef>
|
|
|
|
|
BinaryFunction::hasRestoredNameRegex(const StringRef Name) const {
|
2021-04-08 00:19:26 -07:00
|
|
|
const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str();
|
2020-11-09 12:38:51 -08:00
|
|
|
Regex MatchName(RegexName);
|
2021-04-08 00:19:26 -07:00
|
|
|
Optional<StringRef> Match = forEachName([&MatchName](StringRef Name) {
|
|
|
|
|
return MatchName.match(NameResolver::restore(Name));
|
2020-11-09 12:38:51 -08:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return Match;
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-07 15:00:41 -08:00
|
|
|
std::string BinaryFunction::getDemangledName() const {
|
2020-10-09 16:06:27 -07:00
|
|
|
StringRef MangledName = NameResolver::restore(getOneName());
|
2017-12-07 15:00:41 -08:00
|
|
|
int Status = 0;
|
|
|
|
|
char *const Name =
|
|
|
|
|
abi::__cxa_demangle(MangledName.str().c_str(), 0, 0, &Status);
|
|
|
|
|
const std::string NameStr(Status == 0 ? Name : MangledName);
|
2019-10-31 16:54:48 -07:00
|
|
|
::free(Name);
|
2017-12-07 15:00:41 -08:00
|
|
|
return NameStr;
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-12 12:12:16 -07:00
|
|
|
BinaryBasicBlock *
|
|
|
|
|
BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
|
|
|
|
|
if (Offset > Size)
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
2016-09-07 18:59:23 -07:00
|
|
|
if (BasicBlockOffsets.empty())
|
2015-10-12 12:12:16 -07:00
|
|
|
return nullptr;
|
|
|
|
|
|
2016-09-07 18:59:23 -07:00
|
|
|
/*
|
2018-06-14 14:27:20 -07:00
|
|
|
* This is commented out because it makes BOLT too slow.
|
2016-09-07 18:59:23 -07:00
|
|
|
* assert(std::is_sorted(BasicBlockOffsets.begin(),
|
|
|
|
|
* BasicBlockOffsets.end(),
|
|
|
|
|
* CompareBasicBlockOffsets())));
|
|
|
|
|
*/
|
2020-07-16 17:35:55 -07:00
|
|
|
auto I = std::upper_bound(BasicBlockOffsets.begin(),
|
|
|
|
|
BasicBlockOffsets.end(),
|
2016-09-07 18:59:23 -07:00
|
|
|
BasicBlockOffset(Offset, nullptr),
|
|
|
|
|
CompareBasicBlockOffsets());
|
|
|
|
|
assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0");
|
|
|
|
|
--I;
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *BB = I->second;
|
2017-05-16 09:27:34 -07:00
|
|
|
return (Offset < BB->getOffset() + BB->getOriginalSize()) ? BB : nullptr;
|
2016-03-28 17:45:22 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-30 17:44:14 -07:00
|
|
|
void BinaryFunction::markUnreachableBlocks() {
|
2016-09-07 18:59:23 -07:00
|
|
|
std::stack<BinaryBasicBlock *> Stack;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : layout()) {
|
2016-09-07 18:59:23 -07:00
|
|
|
BB->markValid(false);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
// Add all entries and landing pads as roots.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
2020-04-19 22:29:54 -07:00
|
|
|
if (isEntryPoint(*BB) || BB->isLandingPad()) {
|
2016-09-07 18:59:23 -07:00
|
|
|
Stack.push(BB);
|
|
|
|
|
BB->markValid(true);
|
2018-07-03 17:02:33 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// FIXME:
|
|
|
|
|
// Also mark BBs with indirect jumps as reachable, since we do not
|
|
|
|
|
// support removing unused jump tables yet (T29418024 / GH-issue20)
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const MCInst &Inst : *BB) {
|
2018-07-03 17:02:33 -07:00
|
|
|
if (BC.MIB->getJumpTable(Inst)) {
|
|
|
|
|
Stack.push(BB);
|
|
|
|
|
BB->markValid(true);
|
|
|
|
|
break;
|
|
|
|
|
}
|
2016-09-07 18:59:23 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Determine reachable BBs from the entry point
|
|
|
|
|
while (!Stack.empty()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *BB = Stack.top();
|
2016-09-07 18:59:23 -07:00
|
|
|
Stack.pop();
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *Succ : BB->successors()) {
|
2016-09-07 18:59:23 -07:00
|
|
|
if (Succ->isValid())
|
|
|
|
|
continue;
|
|
|
|
|
Succ->markValid(true);
|
|
|
|
|
Stack.push(Succ);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
|
|
|
|
|
// will be cleaned up by fixBranches().
|
|
|
|
|
std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
|
2015-10-20 12:47:37 -07:00
|
|
|
BasicBlockOrderType NewLayout;
|
|
|
|
|
unsigned Count = 0;
|
2016-09-07 18:59:23 -07:00
|
|
|
uint64_t Bytes = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : layout()) {
|
2016-09-29 11:19:06 -07:00
|
|
|
if (BB->isValid()) {
|
|
|
|
|
NewLayout.push_back(BB);
|
2016-09-07 18:59:23 -07:00
|
|
|
} else {
|
2020-04-19 22:29:54 -07:00
|
|
|
assert(!isEntryPoint(*BB) && "all entry blocks must be valid");
|
2015-10-20 12:47:37 -07:00
|
|
|
++Count;
|
2016-09-29 11:19:06 -07:00
|
|
|
Bytes += BC.computeCodeSize(BB->begin(), BB->end());
|
2016-09-07 18:59:23 -07:00
|
|
|
}
|
2015-10-20 12:47:37 -07:00
|
|
|
}
|
|
|
|
|
BasicBlocksLayout = std::move(NewLayout);
|
2016-09-07 18:59:23 -07:00
|
|
|
|
|
|
|
|
BasicBlockListType NewBasicBlocks;
|
|
|
|
|
for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *BB = *I;
|
2018-03-30 17:44:14 -07:00
|
|
|
if (BB->isValid()) {
|
|
|
|
|
NewBasicBlocks.push_back(BB);
|
2016-09-07 18:59:23 -07:00
|
|
|
} else {
|
2018-03-30 17:44:14 -07:00
|
|
|
// Make sure the block is removed from the list of predecessors.
|
|
|
|
|
BB->removeAllSuccessors();
|
|
|
|
|
DeletedBasicBlocks.push_back(BB);
|
2016-09-07 18:59:23 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
BasicBlocks = std::move(NewBasicBlocks);
|
|
|
|
|
|
|
|
|
|
assert(BasicBlocks.size() == BasicBlocksLayout.size());
|
|
|
|
|
|
|
|
|
|
// Update CFG state if needed
|
2017-10-26 18:36:30 -07:00
|
|
|
if (Count > 0)
|
|
|
|
|
recomputeLandingPads();
|
2016-09-07 18:59:23 -07:00
|
|
|
|
|
|
|
|
return std::make_pair(Count, Bytes);
|
2015-10-20 12:47:37 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-22 18:08:20 -07:00
|
|
|
bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
|
2017-03-20 22:44:25 -07:00
|
|
|
// This function should work properly before and after function reordering.
|
|
|
|
|
// In order to accomplish this, we use the function index (if it is valid).
|
|
|
|
|
// If the function indices are not valid, we fall back to the original
|
|
|
|
|
// addresses. This should be ok because the functions without valid indices
|
|
|
|
|
// should have been ordered with a stable sort.
|
2021-04-08 00:19:26 -07:00
|
|
|
const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol);
|
2016-09-22 18:08:20 -07:00
|
|
|
if (CalleeBF) {
|
2020-07-16 17:35:55 -07:00
|
|
|
if(CalleeBF->isInjected())
|
2018-07-08 12:14:08 -07:00
|
|
|
return true;
|
|
|
|
|
|
2017-03-20 22:44:25 -07:00
|
|
|
if (hasValidIndex() && CalleeBF->hasValidIndex()) {
|
|
|
|
|
return getIndex() < CalleeBF->getIndex();
|
|
|
|
|
} else if (hasValidIndex() && !CalleeBF->hasValidIndex()) {
|
|
|
|
|
return true;
|
|
|
|
|
} else if (!hasValidIndex() && CalleeBF->hasValidIndex()) {
|
|
|
|
|
return false;
|
|
|
|
|
} else {
|
|
|
|
|
return getAddress() < CalleeBF->getAddress();
|
|
|
|
|
}
|
2016-09-22 18:08:20 -07:00
|
|
|
} else {
|
|
|
|
|
// Absolute symbol.
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<uint64_t> CalleeAddressOrError = BC.getSymbolValue(*CalleeSymbol);
|
2019-06-04 15:30:22 -07:00
|
|
|
assert(CalleeAddressOrError && "unregistered symbol found");
|
|
|
|
|
return *CalleeAddressOrError > getAddress();
|
2016-09-22 18:08:20 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-08 19:58:33 -08:00
|
|
|
void BinaryFunction::dump(bool PrintInstructions) const {
|
|
|
|
|
print(dbgs(), "", PrintInstructions);
|
2016-07-23 08:01:53 -07:00
|
|
|
}
|
|
|
|
|
|
2015-10-23 15:52:59 -07:00
|
|
|
void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
|
|
|
|
bool PrintInstructions) const {
|
2020-05-03 13:54:45 -07:00
|
|
|
if (!opts::shouldPrint(*this))
|
2016-09-27 19:09:38 -07:00
|
|
|
return;
|
|
|
|
|
|
2018-07-08 12:14:08 -07:00
|
|
|
StringRef SectionName =
|
2020-10-09 16:06:27 -07:00
|
|
|
OriginSection ? OriginSection->getName() : "<no origin section>";
|
2016-08-07 12:35:23 -07:00
|
|
|
OS << "Binary Function \"" << *this << "\" " << Annotation << " {";
|
2021-04-08 00:19:26 -07:00
|
|
|
std::vector<StringRef> AllNames = getNames();
|
2020-01-13 11:56:59 -08:00
|
|
|
if (AllNames.size() > 1) {
|
|
|
|
|
OS << "\n All names : ";
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Sep = "";
|
|
|
|
|
for (const StringRef Name : AllNames) {
|
2020-01-13 11:56:59 -08:00
|
|
|
OS << Sep << Name;
|
2016-06-10 17:13:05 -07:00
|
|
|
Sep = "\n ";
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-07-16 17:35:55 -07:00
|
|
|
OS << "\n Number : " << FunctionNumber
|
|
|
|
|
<< "\n State : " << CurrentState
|
|
|
|
|
<< "\n Address : 0x" << Twine::utohexstr(Address)
|
|
|
|
|
<< "\n Size : 0x" << Twine::utohexstr(Size)
|
|
|
|
|
<< "\n MaxSize : 0x" << Twine::utohexstr(MaxSize)
|
|
|
|
|
<< "\n Offset : 0x" << Twine::utohexstr(FileOffset)
|
|
|
|
|
<< "\n Section : " << SectionName
|
|
|
|
|
<< "\n Orc Section : " << getCodeSectionName()
|
|
|
|
|
<< "\n LSDA : 0x" << Twine::utohexstr(getLSDAAddress())
|
|
|
|
|
<< "\n IsSimple : " << IsSimple
|
|
|
|
|
<< "\n IsMultiEntry: " << isMultiEntry()
|
|
|
|
|
<< "\n IsSplit : " << isSplit()
|
|
|
|
|
<< "\n BB Count : " << size();
|
2016-12-21 17:13:56 -08:00
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
if (HasFixedIndirectBranch) {
|
|
|
|
|
OS << "\n HasFixedIndirectBranch : true";
|
|
|
|
|
}
|
|
|
|
|
if (HasUnknownControlFlow) {
|
|
|
|
|
OS << "\n Unknown CF : true";
|
|
|
|
|
}
|
2020-06-22 13:08:28 -07:00
|
|
|
if (getPersonalityFunction()) {
|
|
|
|
|
OS << "\n Personality : " << getPersonalityFunction()->getName();
|
|
|
|
|
}
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
if (IsFragment) {
|
|
|
|
|
OS << "\n IsFragment : true";
|
|
|
|
|
}
|
2020-04-16 00:02:35 -07:00
|
|
|
if (isFolded()) {
|
|
|
|
|
OS << "\n FoldedInto : " << *getFoldedIntoFunction();
|
|
|
|
|
}
|
2020-09-14 15:48:32 -07:00
|
|
|
if (ParentFragment) {
|
|
|
|
|
OS << "\n Parent : " << *ParentFragment;
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
}
|
|
|
|
|
if (!Fragments.empty()) {
|
|
|
|
|
OS << "\n Fragments : ";
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Sep = "";
|
|
|
|
|
for (BinaryFunction *Frag : Fragments) {
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
OS << Sep << *Frag;
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-02-24 21:59:33 -08:00
|
|
|
if (hasCFG()) {
|
2020-07-16 17:35:55 -07:00
|
|
|
OS << "\n Hash : " << Twine::utohexstr(computeHash());
|
2016-12-21 17:13:56 -08:00
|
|
|
}
|
2020-04-19 22:29:54 -07:00
|
|
|
if (isMultiEntry()) {
|
|
|
|
|
OS << "\n Secondary Entry Points : ";
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Sep = "";
|
|
|
|
|
for (const std::pair<const MCSymbol *const, MCSymbol *> &KV :
|
|
|
|
|
SecondaryEntryPoints) {
|
2020-04-19 22:29:54 -07:00
|
|
|
OS << Sep << KV.second->getName();
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-11-08 12:23:54 -08:00
|
|
|
if (FrameInstructions.size()) {
|
2020-07-16 17:35:55 -07:00
|
|
|
OS << "\n CFI Instrs : " << FrameInstructions.size();
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
2015-10-23 15:52:59 -07:00
|
|
|
if (BasicBlocksLayout.size()) {
|
|
|
|
|
OS << "\n BB Layout : ";
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Sep = "";
|
|
|
|
|
for (BinaryBasicBlock *BB : BasicBlocksLayout) {
|
2015-10-23 15:52:59 -07:00
|
|
|
OS << Sep << BB->getName();
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (ImageAddress)
|
|
|
|
|
OS << "\n Image : 0x" << Twine::utohexstr(ImageAddress);
|
2016-06-15 18:36:16 -07:00
|
|
|
if (ExecutionCount != COUNT_NO_PROFILE) {
|
2015-10-12 12:30:47 -07:00
|
|
|
OS << "\n Exec Count : " << ExecutionCount;
|
2016-06-15 18:36:16 -07:00
|
|
|
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
|
|
|
|
|
}
|
2015-10-23 15:52:59 -07:00
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
|
2016-09-16 15:54:32 -07:00
|
|
|
OS << '\n';
|
2019-04-29 12:51:10 -07:00
|
|
|
DynoStats dynoStats = getDynoStats(*this);
|
2016-08-29 21:11:22 -07:00
|
|
|
OS << dynoStats;
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-12 12:30:47 -07:00
|
|
|
OS << "\n}\n";
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2017-12-06 15:45:57 -08:00
|
|
|
if (opts::PrintDynoStatsOnly || !PrintInstructions || !BC.InstPrinter)
|
2015-10-09 17:21:14 -07:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// Offset of the instruction in function.
|
|
|
|
|
uint64_t Offset{0};
|
|
|
|
|
|
2017-11-04 19:22:05 -07:00
|
|
|
if (BasicBlocks.empty() && !Instructions.empty()) {
|
2015-10-09 17:21:14 -07:00
|
|
|
// Print before CFG was built.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const std::pair<const uint32_t, MCInst> &II : Instructions) {
|
2016-06-15 18:36:16 -07:00
|
|
|
Offset = II.first;
|
2015-10-09 17:21:14 -07:00
|
|
|
|
|
|
|
|
// Print label if exists at this offset.
|
|
|
|
|
auto LI = Labels.find(Offset);
|
2020-04-19 22:29:54 -07:00
|
|
|
if (LI != Labels.end()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
if (const MCSymbol *EntrySymbol =
|
|
|
|
|
getSecondaryEntryPointSymbol(LI->second))
|
2020-04-19 22:29:54 -07:00
|
|
|
OS << EntrySymbol->getName() << " (Entry Point):\n";
|
2015-10-09 17:21:14 -07:00
|
|
|
OS << LI->second->getName() << ":\n";
|
2020-04-19 22:29:54 -07:00
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2017-11-04 19:22:05 -07:00
|
|
|
BC.printInstruction(OS, II.second, Offset, this);
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-19 17:59:41 -08:00
|
|
|
for (uint32_t I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *BB = BasicBlocksLayout[I];
|
2020-07-16 17:35:55 -07:00
|
|
|
if (I != 0 &&
|
|
|
|
|
BB->isCold() != BasicBlocksLayout[I - 1]->isCold())
|
2015-11-19 17:59:41 -08:00
|
|
|
OS << "------- HOT-COLD SPLIT POINT -------\n\n";
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
OS << BB->getName() << " ("
|
|
|
|
|
<< BB->size() << " instructions, align : " << BB->getAlignment()
|
|
|
|
|
<< ")\n";
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2020-04-19 22:29:54 -07:00
|
|
|
if (isEntryPoint(*BB)) {
|
2021-04-08 00:19:26 -07:00
|
|
|
if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB))
|
2020-04-19 22:29:54 -07:00
|
|
|
OS << " Secondary Entry Point: " << EntrySymbol->getName() << '\n';
|
|
|
|
|
else
|
|
|
|
|
OS << " Entry Point\n";
|
|
|
|
|
}
|
2016-09-29 11:19:06 -07:00
|
|
|
|
|
|
|
|
if (BB->isLandingPad())
|
2015-11-12 18:56:58 -08:00
|
|
|
OS << " Landing Pad\n";
|
|
|
|
|
|
2015-10-16 09:49:04 -07:00
|
|
|
uint64_t BBExecCount = BB->getExecutionCount();
|
2016-12-21 17:13:56 -08:00
|
|
|
if (hasValidProfile()) {
|
2018-02-13 11:21:59 -08:00
|
|
|
OS << " Exec Count : ";
|
|
|
|
|
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
|
|
|
OS << BBExecCount << '\n';
|
|
|
|
|
else
|
|
|
|
|
OS << "<unknown>\n";
|
2015-10-13 10:25:45 -07:00
|
|
|
}
|
2017-02-24 21:59:33 -08:00
|
|
|
if (BB->getCFIState() >= 0) {
|
|
|
|
|
OS << " CFI State : " << BB->getCFIState() << '\n';
|
2016-01-16 14:58:22 -08:00
|
|
|
}
|
2019-04-12 17:33:46 -07:00
|
|
|
if (opts::EnableBAT) {
|
|
|
|
|
OS << " Input offset: " << Twine::utohexstr(BB->getInputOffset())
|
|
|
|
|
<< "\n";
|
|
|
|
|
}
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->pred_empty()) {
|
2015-10-09 17:21:14 -07:00
|
|
|
OS << " Predecessors: ";
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Sep = "";
|
|
|
|
|
for (BinaryBasicBlock *Pred : BB->predecessors()) {
|
2015-10-09 17:21:14 -07:00
|
|
|
OS << Sep << Pred->getName();
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
OS << '\n';
|
|
|
|
|
}
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->throw_empty()) {
|
2016-05-26 15:10:09 -07:00
|
|
|
OS << " Throwers: ";
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Sep = "";
|
|
|
|
|
for (BinaryBasicBlock *Throw : BB->throwers()) {
|
2016-05-26 15:10:09 -07:00
|
|
|
OS << Sep << Throw->getName();
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
OS << '\n';
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Offset = alignTo(Offset, BB->getAlignment());
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
// Note: offsets are imprecise since this is happening prior to relaxation.
|
|
|
|
|
Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this);
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->succ_empty()) {
|
2015-10-09 17:21:14 -07:00
|
|
|
OS << " Successors: ";
|
2019-04-09 11:27:23 -07:00
|
|
|
// For more than 2 successors, sort them based on frequency.
|
|
|
|
|
std::vector<uint64_t> Indices(BB->succ_size());
|
|
|
|
|
std::iota(Indices.begin(), Indices.end(), 0);
|
|
|
|
|
if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) {
|
|
|
|
|
std::stable_sort(Indices.begin(), Indices.end(),
|
|
|
|
|
[&](const uint64_t A, const uint64_t B) {
|
|
|
|
|
return BB->BranchInfo[B] < BB->BranchInfo[A];
|
|
|
|
|
});
|
|
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Sep = "";
|
2019-04-09 11:27:23 -07:00
|
|
|
for (unsigned I = 0; I < Indices.size(); ++I) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *Succ = BB->Successors[Indices[I]];
|
|
|
|
|
BinaryBasicBlock::BinaryBranchInfo &BI = BB->BranchInfo[Indices[I]];
|
2015-10-13 10:25:45 -07:00
|
|
|
OS << Sep << Succ->getName();
|
|
|
|
|
if (ExecutionCount != COUNT_NO_PROFILE &&
|
2019-04-09 11:27:23 -07:00
|
|
|
BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
|
|
|
|
|
OS << " (mispreds: " << BI.MispredictedCount
|
|
|
|
|
<< ", count: " << BI.Count << ")";
|
2015-10-13 10:25:45 -07:00
|
|
|
} else if (ExecutionCount != COUNT_NO_PROFILE &&
|
2019-04-09 11:27:23 -07:00
|
|
|
BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
|
|
|
|
|
OS << " (inferred count: " << BI.Count << ")";
|
2015-10-13 10:25:45 -07:00
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
OS << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->lp_empty()) {
|
2016-05-26 15:10:09 -07:00
|
|
|
OS << " Landing Pads: ";
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Sep = "";
|
|
|
|
|
for (BinaryBasicBlock *LP : BB->landing_pads()) {
|
2016-05-26 15:10:09 -07:00
|
|
|
OS << Sep << LP->getName();
|
|
|
|
|
if (ExecutionCount != COUNT_NO_PROFILE) {
|
2016-09-13 17:12:00 -07:00
|
|
|
OS << " (count: " << LP->getExecutionCount() << ")";
|
2016-05-26 15:10:09 -07:00
|
|
|
}
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
OS << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
// In CFG_Finalized state we can miscalculate CFI state at exit.
|
|
|
|
|
if (CurrentState == State::CFG) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
|
2017-02-24 21:59:33 -08:00
|
|
|
if (CFIStateAtExit >= 0)
|
|
|
|
|
OS << " CFI State: " << CFIStateAtExit << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
OS << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-13 14:18:45 -08:00
|
|
|
// Dump new exception ranges for the function.
|
|
|
|
|
if (!CallSites.empty()) {
|
|
|
|
|
OS << "EH table:\n";
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const CallSite &CSI : CallSites) {
|
2015-11-13 14:18:45 -08:00
|
|
|
OS << " [" << *CSI.Start << ", " << *CSI.End << ") landing pad : ";
|
|
|
|
|
if (CSI.LP)
|
|
|
|
|
OS << *CSI.LP;
|
|
|
|
|
else
|
|
|
|
|
OS << "0";
|
|
|
|
|
OS << ", action : " << CSI.Action << '\n';
|
|
|
|
|
}
|
|
|
|
|
OS << '\n';
|
2015-11-04 16:48:47 -08:00
|
|
|
}
|
|
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
// Print all jump tables.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const std::pair<const uint64_t, JumpTable *> &JTI : JumpTables) {
|
2017-11-14 20:05:11 -08:00
|
|
|
JTI.second->print(OS);
|
2016-09-14 16:45:40 -07:00
|
|
|
}
|
|
|
|
|
|
2015-11-08 12:23:54 -08:00
|
|
|
OS << "DWARF CFI Instructions:\n";
|
|
|
|
|
if (OffsetToCFI.size()) {
|
|
|
|
|
// Pre-buildCFG information
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const std::pair<const uint32_t, uint32_t> &Elmt : OffsetToCFI) {
|
2015-11-08 12:23:54 -08:00
|
|
|
OS << format(" %08x:\t", Elmt.first);
|
|
|
|
|
assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
|
2017-05-01 16:52:54 -07:00
|
|
|
BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
|
2015-11-08 12:23:54 -08:00
|
|
|
OS << "\n";
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// Post-buildCFG information
|
|
|
|
|
for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
|
|
|
|
|
const MCCFIInstruction &CFI = FrameInstructions[I];
|
|
|
|
|
OS << format(" %d:\t", I);
|
2017-05-01 16:52:54 -07:00
|
|
|
BinaryContext::printCFI(OS, CFI);
|
2015-11-08 12:23:54 -08:00
|
|
|
OS << "\n";
|
2015-11-04 16:48:47 -08:00
|
|
|
}
|
|
|
|
|
}
|
2015-11-08 12:23:54 -08:00
|
|
|
if (FrameInstructions.empty())
|
|
|
|
|
OS << " <empty>\n";
|
2015-11-04 16:48:47 -08:00
|
|
|
|
2016-08-07 12:35:23 -07:00
|
|
|
OS << "End of Function \"" << *this << "\"\n\n";
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
void BinaryFunction::printRelocations(raw_ostream &OS,
|
|
|
|
|
uint64_t Offset,
|
2017-10-20 12:11:34 -07:00
|
|
|
uint64_t Size) const {
|
2018-07-12 10:13:03 -07:00
|
|
|
const char *Sep = " # Relocs: ";
|
2017-10-20 12:11:34 -07:00
|
|
|
|
|
|
|
|
auto RI = Relocations.lower_bound(Offset);
|
|
|
|
|
while (RI != Relocations.end() && RI->first < Offset + Size) {
|
|
|
|
|
OS << Sep << "(R: " << RI->second << ")";
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
++RI;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
RI = MoveRelocations.lower_bound(Offset);
|
|
|
|
|
while (RI != MoveRelocations.end() && RI->first < Offset + Size) {
|
|
|
|
|
OS << Sep << "(M: " << RI->second << ")";
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
++RI;
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-11-27 12:58:21 -08:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
namespace {
|
|
|
|
|
std::string mutateDWARFExpressionTargetReg(const MCCFIInstruction &Instr,
|
|
|
|
|
MCPhysReg NewReg) {
|
|
|
|
|
StringRef ExprBytes = Instr.getValues();
|
|
|
|
|
assert(ExprBytes.size() > 1 && "DWARF expression CFI is too short");
|
|
|
|
|
uint8_t Opcode = ExprBytes[0];
|
|
|
|
|
assert((Opcode == dwarf::DW_CFA_expression ||
|
|
|
|
|
Opcode == dwarf::DW_CFA_val_expression) &&
|
|
|
|
|
"invalid DWARF expression CFI");
|
|
|
|
|
const uint8_t *const Start =
|
|
|
|
|
reinterpret_cast<const uint8_t *>(ExprBytes.drop_front(1).data());
|
|
|
|
|
const uint8_t *const End =
|
|
|
|
|
reinterpret_cast<const uint8_t *>(Start + ExprBytes.size() - 1);
|
|
|
|
|
unsigned Size = 0;
|
|
|
|
|
decodeULEB128(Start, &Size, End);
|
|
|
|
|
assert(Size > 0 && "Invalid reg encoding for DWARF expression CFI");
|
|
|
|
|
SmallString<8> Tmp;
|
|
|
|
|
raw_svector_ostream OSE(Tmp);
|
|
|
|
|
encodeULEB128(NewReg, OSE);
|
|
|
|
|
return Twine(ExprBytes.slice(0, 1))
|
|
|
|
|
.concat(OSE.str())
|
|
|
|
|
.concat(ExprBytes.drop_front(1 + Size))
|
|
|
|
|
.str();
|
|
|
|
|
}
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::mutateCFIRegisterFor(const MCInst &Instr,
|
|
|
|
|
MCPhysReg NewReg) {
|
|
|
|
|
const MCCFIInstruction *OldCFI = getCFIFor(Instr);
|
|
|
|
|
assert(OldCFI && "invalid CFI instr");
|
|
|
|
|
switch (OldCFI->getOperation()) {
|
|
|
|
|
default:
|
|
|
|
|
llvm_unreachable("Unexpected instruction");
|
|
|
|
|
case MCCFIInstruction::OpDefCfa:
|
|
|
|
|
setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, NewReg,
|
|
|
|
|
OldCFI->getOffset()));
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpDefCfaRegister:
|
|
|
|
|
setCFIFor(Instr, MCCFIInstruction::createDefCfaRegister(nullptr, NewReg));
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpOffset:
|
|
|
|
|
setCFIFor(Instr, MCCFIInstruction::createOffset(nullptr, NewReg,
|
|
|
|
|
OldCFI->getOffset()));
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpRegister:
|
|
|
|
|
setCFIFor(Instr, MCCFIInstruction::createRegister(nullptr, NewReg,
|
|
|
|
|
OldCFI->getRegister2()));
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpSameValue:
|
|
|
|
|
setCFIFor(Instr, MCCFIInstruction::createSameValue(nullptr, NewReg));
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpEscape:
|
|
|
|
|
setCFIFor(Instr,
|
|
|
|
|
MCCFIInstruction::createEscape(
|
|
|
|
|
nullptr,
|
|
|
|
|
StringRef(mutateDWARFExpressionTargetReg(*OldCFI, NewReg))));
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpRestore:
|
|
|
|
|
setCFIFor(Instr, MCCFIInstruction::createRestore(nullptr, NewReg));
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpUndefined:
|
|
|
|
|
setCFIFor(Instr, MCCFIInstruction::createUndefined(nullptr, NewReg));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const MCCFIInstruction *BinaryFunction::mutateCFIOffsetFor(const MCInst &Instr,
|
|
|
|
|
int64_t NewOffset) {
|
|
|
|
|
const MCCFIInstruction *OldCFI = getCFIFor(Instr);
|
|
|
|
|
assert(OldCFI && "invalid CFI instr");
|
|
|
|
|
switch (OldCFI->getOperation()) {
|
|
|
|
|
default:
|
|
|
|
|
llvm_unreachable("Unexpected instruction");
|
|
|
|
|
case MCCFIInstruction::OpDefCfaOffset:
|
|
|
|
|
setCFIFor(Instr, MCCFIInstruction::cfiDefCfaOffset(nullptr, NewOffset));
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpAdjustCfaOffset:
|
|
|
|
|
setCFIFor(Instr,
|
|
|
|
|
MCCFIInstruction::createAdjustCfaOffset(nullptr, NewOffset));
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpDefCfa:
|
|
|
|
|
setCFIFor(Instr, MCCFIInstruction::cfiDefCfa(nullptr, OldCFI->getRegister(),
|
|
|
|
|
NewOffset));
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpOffset:
|
|
|
|
|
setCFIFor(Instr, MCCFIInstruction::createOffset(
|
|
|
|
|
nullptr, OldCFI->getRegister(), NewOffset));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
return getCFIFor(Instr);
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-06 11:22:45 -07:00
|
|
|
IndirectBranchType
|
2020-07-16 17:35:55 -07:00
|
|
|
BinaryFunction::processIndirectBranch(MCInst &Instruction,
|
|
|
|
|
unsigned Size,
|
2018-08-06 11:22:45 -07:00
|
|
|
uint64_t Offset,
|
|
|
|
|
uint64_t &TargetAddress) {
|
2021-03-23 13:41:41 -07:00
|
|
|
const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2021-03-23 13:41:41 -07:00
|
|
|
// The instruction referencing memory used by the branch instruction.
|
|
|
|
|
// It could be the branch instruction itself or one of the instructions
|
|
|
|
|
// setting the value of the register used by the branch.
|
2017-10-20 12:11:34 -07:00
|
|
|
MCInst *MemLocInstr;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
// Address of the table referenced by MemLocInstr. Could be either an
|
|
|
|
|
// array of function pointers, or a jump table.
|
|
|
|
|
uint64_t ArrayStart = 0;
|
|
|
|
|
|
2017-11-01 10:26:07 -07:00
|
|
|
unsigned BaseRegNum, IndexRegNum;
|
|
|
|
|
int64_t DispValue;
|
|
|
|
|
const MCExpr *DispExpr;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-10-16 11:12:22 -07:00
|
|
|
// In AArch, identify the instruction adding the PC-relative offset to
|
|
|
|
|
// jump table entries to correctly decode it.
|
|
|
|
|
MCInst *PCRelBaseInstr;
|
|
|
|
|
uint64_t PCRelAddr = 0;
|
|
|
|
|
|
2017-11-04 19:22:05 -07:00
|
|
|
auto Begin = Instructions.begin();
|
2018-03-20 14:34:58 -07:00
|
|
|
if (BC.isAArch64()) {
|
2017-12-09 21:40:39 -08:00
|
|
|
PreserveNops = BC.HasRelocations;
|
2017-10-16 11:12:22 -07:00
|
|
|
// Start at the last label as an approximation of the current basic block.
|
|
|
|
|
// This is a heuristic, since the full set of labels have yet to be
|
|
|
|
|
// determined
|
|
|
|
|
for (auto LI = Labels.rbegin(); LI != Labels.rend(); ++LI) {
|
2017-11-04 19:22:05 -07:00
|
|
|
auto II = Instructions.find(LI->first);
|
|
|
|
|
if (II != Instructions.end()) {
|
|
|
|
|
Begin = II;
|
2017-10-16 11:12:22 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-23 13:41:41 -07:00
|
|
|
IndirectBranchType BranchType =
|
|
|
|
|
BC.MIB->analyzeIndirectBranch(Instruction,
|
|
|
|
|
Begin,
|
|
|
|
|
Instructions.end(),
|
|
|
|
|
PtrSize,
|
|
|
|
|
MemLocInstr,
|
|
|
|
|
BaseRegNum,
|
|
|
|
|
IndexRegNum,
|
|
|
|
|
DispValue,
|
|
|
|
|
DispExpr,
|
|
|
|
|
PCRelBaseInstr);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2021-03-23 13:41:41 -07:00
|
|
|
if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
|
|
|
|
|
return BranchType;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
[BOLT] Optimize jump tables with hot entries
Summary:
This diff is similar to Bill's diff for optimizing jump tables
(and is built on top of it), but it differs in the strategy used to
optimize the jump table. The previous approach loads the target address
from the jump table and compare it to check if it is a hot target. This
accomplishes branch misprediction reduction by promote the indirect jmp
to a (more predictable) direct jmp.
load %r10, JMPTABLE
cmp %r10, HOTTARGET
je HOTTARGET
ijmp [JMPTABLE + %index * scale]
The idea in this diff is instead to make dcache better by avoiding the
load of the jump table, leaving branch mispredictions as a secondary
target. To do this we compare the index used in the indirect jmp and if
it matches a known hot entry, it performs a direct jump to the target.
cmp %index, HOTINDEX
je CORRESPONDING_TARGET
ijmp [JMPTABLE + %index * scale]
The downside of this approach is that we may have multiple indices
associated with a single target, but we only have profiling to show
which targets are hot and we have no clue about which indices are hot.
INDEX TARGET
0 4004f8
8 4004f8
10 4003d0
18 4004f8
Profiling data:
TARGET COUNT
4004f8 10020
4003d0 17
In this example, we know 4004f8 is hot, but to make a direct call to it
we need to check for indices 0, 8 and 18 -- 3 comparisons instead of 1.
Therefore, once we know a target is hot, we must generate code to
compare against all possible indices associated with this target because
we don't know which index is the hot one (IF there's a hotter index).
cmp %index, 0
je 4004f8
cmp %index, 8
je 4004f8
cmp %index, 18
je 4004f8
(... up to N comparisons as in --indirect-call-promotion-topn=N )
ijmp [JMPTABLE + %index * scale]
(cherry picked from FBD5005620)
2017-05-01 14:04:40 -07:00
|
|
|
if (MemLocInstr != &Instruction)
|
2019-06-28 09:21:27 -07:00
|
|
|
IndexRegNum = BC.MIB->getNoRegister();
|
[BOLT] Optimize jump tables with hot entries
Summary:
This diff is similar to Bill's diff for optimizing jump tables
(and is built on top of it), but it differs in the strategy used to
optimize the jump table. The previous approach loads the target address
from the jump table and compare it to check if it is a hot target. This
accomplishes branch misprediction reduction by promote the indirect jmp
to a (more predictable) direct jmp.
load %r10, JMPTABLE
cmp %r10, HOTTARGET
je HOTTARGET
ijmp [JMPTABLE + %index * scale]
The idea in this diff is instead to make dcache better by avoiding the
load of the jump table, leaving branch mispredictions as a secondary
target. To do this we compare the index used in the indirect jmp and if
it matches a known hot entry, it performs a direct jump to the target.
cmp %index, HOTINDEX
je CORRESPONDING_TARGET
ijmp [JMPTABLE + %index * scale]
The downside of this approach is that we may have multiple indices
associated with a single target, but we only have profiling to show
which targets are hot and we have no clue about which indices are hot.
INDEX TARGET
0 4004f8
8 4004f8
10 4003d0
18 4004f8
Profiling data:
TARGET COUNT
4004f8 10020
4003d0 17
In this example, we know 4004f8 is hot, but to make a direct call to it
we need to check for indices 0, 8 and 18 -- 3 comparisons instead of 1.
Therefore, once we know a target is hot, we must generate code to
compare against all possible indices associated with this target because
we don't know which index is the hot one (IF there's a hotter index).
cmp %index, 0
je 4004f8
cmp %index, 8
je 4004f8
cmp %index, 18
je 4004f8
(... up to N comparisons as in --indirect-call-promotion-topn=N )
ijmp [JMPTABLE + %index * scale]
(cherry picked from FBD5005620)
2017-05-01 14:04:40 -07:00
|
|
|
|
2018-03-20 14:34:58 -07:00
|
|
|
if (BC.isAArch64()) {
|
2021-03-23 13:41:41 -07:00
|
|
|
const MCSymbol *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
|
|
|
|
|
assert(Sym && "Symbol extraction failed");
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*Sym);
|
2019-06-04 15:30:22 -07:00
|
|
|
if (SymValueOrError) {
|
|
|
|
|
PCRelAddr = *SymValueOrError;
|
2017-10-16 11:12:22 -07:00
|
|
|
} else {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (std::pair<const uint32_t, MCSymbol *> &Elmt : Labels) {
|
2017-10-16 11:12:22 -07:00
|
|
|
if (Elmt.second == Sym) {
|
|
|
|
|
PCRelAddr = Elmt.first + getAddress();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
uint64_t InstrAddr = 0;
|
2017-11-04 19:22:05 -07:00
|
|
|
for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) {
|
|
|
|
|
if (&II->second == PCRelBaseInstr) {
|
2017-10-16 11:12:22 -07:00
|
|
|
InstrAddr = II->first + getAddress();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(InstrAddr != 0 && "instruction not found");
|
|
|
|
|
// We do this to avoid spurious references to code locations outside this
|
|
|
|
|
// function (for example, if the indirect jump lives in the last basic
|
|
|
|
|
// block of the function, it will create a reference to the next function).
|
|
|
|
|
// This replaces a symbol reference with an immediate.
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->replaceMemOperandDisp(*PCRelBaseInstr,
|
2017-10-16 11:12:22 -07:00
|
|
|
MCOperand::createImm(PCRelAddr - InstrAddr));
|
|
|
|
|
// FIXME: Disable full jump table processing for AArch64 until we have a
|
|
|
|
|
// proper way of determining the jump table limits.
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// RIP-relative addressing should be converted to symbol form by now
|
|
|
|
|
// in processed instructions (but not in jump).
|
|
|
|
|
if (DispExpr) {
|
2017-11-14 20:05:11 -08:00
|
|
|
const MCSymbol *TargetSym;
|
|
|
|
|
uint64_t TargetOffset;
|
2018-03-09 09:45:13 -08:00
|
|
|
std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(DispExpr);
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*TargetSym);
|
2019-06-04 15:30:22 -07:00
|
|
|
assert(SymValueOrError && "global symbol needs a value");
|
|
|
|
|
ArrayStart = *SymValueOrError + TargetOffset;
|
2019-06-28 09:21:27 -07:00
|
|
|
BaseRegNum = BC.MIB->getNoRegister();
|
2018-03-20 14:34:58 -07:00
|
|
|
if (BC.isAArch64()) {
|
2017-10-16 11:12:22 -07:00
|
|
|
ArrayStart &= ~0xFFFULL;
|
|
|
|
|
ArrayStart += DispValue & 0xFFFULL;
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
|
|
|
|
ArrayStart = static_cast<uint64_t>(DispValue);
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-01 10:26:07 -07:00
|
|
|
if (BaseRegNum == BC.MRI->getProgramCounter())
|
|
|
|
|
ArrayStart += getAddress() + Offset + Size;
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
|
|
|
|
|
<< Twine::utohexstr(ArrayStart) << '\n');
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> Section = BC.getSectionForAddress(ArrayStart);
|
2018-01-23 15:10:24 -08:00
|
|
|
if (!Section) {
|
2016-09-27 19:09:38 -07:00
|
|
|
// No section - possibly an absolute address. Since we don't allow
|
|
|
|
|
// internal function addresses to escape the function scope - we
|
|
|
|
|
// consider it a tail call.
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: no section for address 0x"
|
|
|
|
|
<< Twine::utohexstr(ArrayStart) << " referenced from function "
|
|
|
|
|
<< *this << '\n';
|
|
|
|
|
}
|
|
|
|
|
return IndirectBranchType::POSSIBLE_TAIL_CALL;
|
|
|
|
|
}
|
2018-01-23 15:10:24 -08:00
|
|
|
if (Section->isVirtual()) {
|
2016-09-27 19:09:38 -07:00
|
|
|
// The contents are filled at runtime.
|
|
|
|
|
return IndirectBranchType::POSSIBLE_TAIL_CALL;
|
|
|
|
|
}
|
2018-08-06 11:22:45 -07:00
|
|
|
|
2021-03-23 13:41:41 -07:00
|
|
|
if (BranchType == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
|
|
|
|
|
ErrorOr<uint64_t> Value = BC.getPointerAtAddress(ArrayStart);
|
2019-06-12 18:21:02 -07:00
|
|
|
if (!Value)
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
2019-04-30 15:47:10 -07:00
|
|
|
|
2019-06-12 18:21:02 -07:00
|
|
|
if (!BC.getSectionForAddress(ArrayStart)->isReadOnly())
|
2018-08-06 11:22:45 -07:00
|
|
|
return IndirectBranchType::UNKNOWN;
|
2019-06-12 18:21:02 -07:00
|
|
|
|
|
|
|
|
outs() << "BOLT-INFO: fixed indirect branch detected in " << *this
|
|
|
|
|
<< " at 0x" << Twine::utohexstr(getAddress() + Offset)
|
|
|
|
|
<< " referencing data at 0x" << Twine::utohexstr(ArrayStart)
|
|
|
|
|
<< " the destination value is 0x" << Twine::utohexstr(*Value)
|
|
|
|
|
<< '\n';
|
|
|
|
|
|
|
|
|
|
TargetAddress = *Value;
|
2021-03-23 13:41:41 -07:00
|
|
|
return BranchType;
|
2019-06-12 18:21:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check if there's already a jump table registered at this address.
|
2021-03-23 13:41:41 -07:00
|
|
|
MemoryContentsType MemType;
|
|
|
|
|
if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
|
|
|
|
|
switch (JT->Type) {
|
|
|
|
|
case JumpTable::JTT_NORMAL:
|
|
|
|
|
MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE;
|
|
|
|
|
break;
|
|
|
|
|
case JumpTable::JTT_PIC:
|
|
|
|
|
MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
|
|
|
|
|
break;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2021-03-23 13:41:41 -07:00
|
|
|
} else {
|
|
|
|
|
MemType = BC.analyzeMemoryAt(ArrayStart, *this);
|
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2021-03-23 13:41:41 -07:00
|
|
|
// Check that jump table type in instruction pattern matches memory contents.
|
|
|
|
|
JumpTable::JumpTableType JTType;
|
|
|
|
|
if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
|
|
|
|
|
if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
|
|
|
|
JTType = JumpTable::JTT_PIC;
|
|
|
|
|
} else {
|
|
|
|
|
if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2021-03-23 13:41:41 -07:00
|
|
|
if (MemType == MemoryContentsType::UNKNOWN)
|
|
|
|
|
return IndirectBranchType::POSSIBLE_TAIL_CALL;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2021-03-23 13:41:41 -07:00
|
|
|
BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE;
|
|
|
|
|
JTType = JumpTable::JTT_NORMAL;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2019-03-12 16:36:35 -07:00
|
|
|
|
2021-03-23 13:41:41 -07:00
|
|
|
// Convert the instruction into jump table branch.
|
|
|
|
|
const MCSymbol *JTLabel = BC.getOrCreateJumpTable(*this, ArrayStart, JTType);
|
|
|
|
|
BC.MIB->replaceMemOperandDisp(*MemLocInstr, JTLabel, BC.Ctx.get());
|
|
|
|
|
BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2021-03-23 13:41:41 -07:00
|
|
|
JTSites.emplace_back(Offset, ArrayStart);
|
2018-08-06 11:22:45 -07:00
|
|
|
|
2021-03-23 13:41:41 -07:00
|
|
|
return BranchType;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
|
|
|
|
|
bool CreatePastEnd) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t Offset = Address - getAddress();
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
if ((Offset == getSize()) && CreatePastEnd)
|
|
|
|
|
return getFunctionEndLabel();
|
|
|
|
|
|
|
|
|
|
auto LI = Labels.find(Offset);
|
2018-04-12 10:07:11 -07:00
|
|
|
if (LI != Labels.end())
|
|
|
|
|
return LI->second;
|
|
|
|
|
|
|
|
|
|
// For AArch64, check if this address is part of a constant island.
|
2020-04-19 22:29:54 -07:00
|
|
|
if (BC.isAArch64()) {
|
|
|
|
|
if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) {
|
|
|
|
|
return IslandSym;
|
|
|
|
|
}
|
2019-07-12 07:25:50 -07:00
|
|
|
}
|
2020-04-19 22:29:54 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSymbol *Label = BC.Ctx->createNamedTempSymbol();
|
2020-04-19 22:29:54 -07:00
|
|
|
Labels[Offset] = Label;
|
|
|
|
|
|
|
|
|
|
return Label;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
2020-02-10 15:35:11 -08:00
|
|
|
ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const {
|
2020-10-09 16:06:27 -07:00
|
|
|
BinarySection &Section = *getOriginSection();
|
2020-02-10 15:35:11 -08:00
|
|
|
assert(Section.containsRange(getAddress(), getMaxSize()) &&
|
|
|
|
|
"wrong section for function");
|
|
|
|
|
|
|
|
|
|
if (!Section.isText() || Section.isVirtual() || !Section.getSize()) {
|
|
|
|
|
return std::make_error_code(std::errc::bad_address);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
StringRef SectionContents = Section.getContents();
|
|
|
|
|
|
|
|
|
|
assert(SectionContents.size() == Section.getSize() &&
|
|
|
|
|
"section size mismatch");
|
|
|
|
|
|
|
|
|
|
// Function offset from the section start.
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t Offset = getAddress() - Section.getAddress();
|
2020-02-10 15:35:11 -08:00
|
|
|
auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
|
|
|
|
|
return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize());
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-03 15:49:58 -07:00
|
|
|
size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
|
|
|
|
|
if (Islands.DataOffsets.find(Offset) == Islands.DataOffsets.end())
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
auto Iter = Islands.CodeOffsets.upper_bound(Offset);
|
|
|
|
|
if (Iter != Islands.CodeOffsets.end()) {
|
|
|
|
|
return *Iter - Offset;
|
|
|
|
|
}
|
|
|
|
|
return getSize() - Offset;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const {
|
|
|
|
|
ArrayRef<uint8_t> FunctionData = *getData();
|
|
|
|
|
uint64_t EndOfCode = getSize();
|
|
|
|
|
auto Iter = Islands.DataOffsets.upper_bound(Offset);
|
|
|
|
|
if (Iter != Islands.DataOffsets.end())
|
|
|
|
|
EndOfCode = *Iter;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (uint64_t I = Offset; I < EndOfCode; ++I) {
|
2020-05-03 15:49:58 -07:00
|
|
|
if (FunctionData[I] != 0) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
bool BinaryFunction::disassemble() {
|
2019-07-12 07:25:50 -07:00
|
|
|
NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs",
|
|
|
|
|
"Build Binary Functions", opts::TimeBuild);
|
2020-02-10 15:35:11 -08:00
|
|
|
ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
|
2020-05-03 15:49:58 -07:00
|
|
|
assert(ErrorOrFunctionData && "function data is not available");
|
2020-02-10 15:35:11 -08:00
|
|
|
ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
assert(FunctionData.size() == getMaxSize() &&
|
2015-10-09 17:21:14 -07:00
|
|
|
"function size does not match raw data size");
|
|
|
|
|
|
|
|
|
|
auto &Ctx = BC.Ctx;
|
2018-03-09 09:45:13 -08:00
|
|
|
auto &MIB = BC.MIB;
|
2016-02-25 16:57:07 -08:00
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
// Insert a label at the beginning of the function. This will be our first
|
|
|
|
|
// basic block.
|
2020-12-01 16:29:39 -08:00
|
|
|
Labels[0] = Ctx->createNamedTempSymbol("BB0");
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
auto handlePCRelOperand =
|
|
|
|
|
[&](MCInst &Instruction, uint64_t Address, uint64_t Size) {
|
2018-04-30 14:47:32 -07:00
|
|
|
uint64_t TargetAddress{0};
|
|
|
|
|
if (!MIB->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
|
|
|
|
|
Size)) {
|
|
|
|
|
errs() << "BOLT-ERROR: PC-relative operand can't be evaluated:\n";
|
2020-12-01 16:29:39 -08:00
|
|
|
BC.InstPrinter->printInst(&Instruction, 0, "", *BC.STI, errs());
|
2018-04-30 14:47:32 -07:00
|
|
|
errs() << '\n';
|
|
|
|
|
Instruction.dump_pretty(errs(), BC.InstPrinter.get());
|
|
|
|
|
errs() << '\n';
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2019-06-28 09:21:27 -07:00
|
|
|
if (TargetAddress == 0 && opts::Verbosity >= 1) {
|
|
|
|
|
outs() << "BOLT-INFO: PC-relative operand is zero in function " << *this
|
|
|
|
|
<< '\n';
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
2018-04-30 14:47:32 -07:00
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
const MCSymbol *TargetSymbol;
|
2019-06-04 15:30:22 -07:00
|
|
|
uint64_t TargetOffset;
|
2019-06-28 09:21:27 -07:00
|
|
|
std::tie(TargetSymbol, TargetOffset) =
|
2020-07-16 17:35:55 -07:00
|
|
|
BC.handleAddressRef(TargetAddress, *this, /*IsPCRel*/ true);
|
|
|
|
|
const MCExpr *Expr = MCSymbolRefExpr::create(TargetSymbol,
|
|
|
|
|
MCSymbolRefExpr::VK_None,
|
|
|
|
|
*BC.Ctx);
|
2017-11-14 20:05:11 -08:00
|
|
|
if (TargetOffset) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCConstantExpr *Offset =
|
|
|
|
|
MCConstantExpr::create(TargetOffset, *BC.Ctx);
|
2017-11-14 20:05:11 -08:00
|
|
|
Expr = MCBinaryExpr::createAdd(Expr, Offset, *BC.Ctx);
|
|
|
|
|
}
|
2020-07-16 17:35:55 -07:00
|
|
|
MIB->replaceMemOperandDisp(
|
|
|
|
|
Instruction, MCOperand::createExpr(BC.MIB->getTargetExprFor(
|
|
|
|
|
Instruction,
|
|
|
|
|
Expr,
|
|
|
|
|
*BC.Ctx, 0)));
|
2016-08-07 12:35:23 -07:00
|
|
|
return true;
|
|
|
|
|
};
|
2015-11-02 09:46:50 -08:00
|
|
|
|
2018-04-30 14:47:32 -07:00
|
|
|
// Used to fix the target of linker-generated AArch64 stubs with no relocation
|
|
|
|
|
// info
|
|
|
|
|
auto fixStubTarget = [&](MCInst &LoadLowBits, MCInst &LoadHiBits,
|
|
|
|
|
uint64_t Target) {
|
2019-06-28 09:21:27 -07:00
|
|
|
const MCSymbol *TargetSymbol;
|
2018-04-30 14:47:32 -07:00
|
|
|
uint64_t Addend{0};
|
2019-06-28 09:21:27 -07:00
|
|
|
std::tie(TargetSymbol, Addend) = BC.handleAddressRef(Target, *this, true);
|
2019-06-04 15:30:22 -07:00
|
|
|
|
2018-04-30 14:47:32 -07:00
|
|
|
int64_t Val;
|
2019-06-04 15:30:22 -07:00
|
|
|
MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(),
|
|
|
|
|
Val, ELF::R_AARCH64_ADR_PREL_PG_HI21);
|
|
|
|
|
MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
|
|
|
|
|
Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
|
2018-04-30 14:47:32 -07:00
|
|
|
};
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
uint64_t Size = 0; // instruction size
|
2017-03-03 11:35:41 -08:00
|
|
|
for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
|
2015-10-09 17:21:14 -07:00
|
|
|
MCInst Instruction;
|
2016-09-27 19:09:38 -07:00
|
|
|
const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
|
2016-02-25 16:57:07 -08:00
|
|
|
|
2017-09-20 10:43:01 -07:00
|
|
|
// Check for data inside code and ignore it
|
2021-04-08 00:19:26 -07:00
|
|
|
if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
|
2020-05-03 15:49:58 -07:00
|
|
|
Size = DataInCodeSize;
|
|
|
|
|
continue;
|
2017-09-20 10:43:01 -07:00
|
|
|
}
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
if (!BC.DisAsm->getInstruction(Instruction,
|
|
|
|
|
Size,
|
2015-10-09 17:21:14 -07:00
|
|
|
FunctionData.slice(Offset),
|
2020-07-16 17:35:55 -07:00
|
|
|
AbsoluteInstrAddr,
|
|
|
|
|
nulls())) {
|
2017-02-08 09:14:10 -08:00
|
|
|
// Functions with "soft" boundaries, e.g. coming from assembly source,
|
|
|
|
|
// can have 0-byte padding at the end.
|
2020-05-03 15:49:58 -07:00
|
|
|
if (isZeroPaddingAt(Offset))
|
|
|
|
|
break;
|
2017-02-08 09:14:10 -08:00
|
|
|
|
2020-05-03 15:49:58 -07:00
|
|
|
errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Offset) << " (address 0x"
|
2020-07-16 17:35:55 -07:00
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
|
|
|
|
|
<< *this << '\n';
|
2020-05-03 15:49:58 -07:00
|
|
|
// Some AVX-512 instructions could not be disassembled at all.
|
|
|
|
|
if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) {
|
|
|
|
|
setTrapOnEntry();
|
|
|
|
|
BC.TrappedFunctions.push_back(this);
|
|
|
|
|
} else {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
setIgnored();
|
2017-02-08 09:14:10 -08:00
|
|
|
}
|
2020-05-03 15:49:58 -07:00
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-31 16:03:49 -07:00
|
|
|
// Check integrity of LLVM assembler/disassembler.
|
|
|
|
|
if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
|
|
|
|
|
!BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
|
2019-11-22 14:53:20 -08:00
|
|
|
if (!BC.validateEncoding(Instruction, FunctionData.slice(Offset, Size))) {
|
2019-07-31 16:03:49 -07:00
|
|
|
errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
|
2019-11-22 14:53:20 -08:00
|
|
|
<< "function " << *this << " for instruction :\n";
|
2019-07-31 16:03:49 -07:00
|
|
|
BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
|
2019-11-22 14:53:20 -08:00
|
|
|
errs() << '\n';
|
2019-07-31 16:03:49 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-22 14:53:20 -08:00
|
|
|
// Special handling for AVX-512 instructions.
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->hasEVEXEncoding(Instruction)) {
|
2018-02-02 16:07:11 -08:00
|
|
|
if (BC.HasRelocations && opts::TrapOnAVX512) {
|
|
|
|
|
setTrapOnEntry();
|
|
|
|
|
BC.TrappedFunctions.push_back(this);
|
2019-11-22 14:53:20 -08:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check if our disassembly is correct and matches the assembler output.
|
|
|
|
|
if (!BC.validateEncoding(Instruction, FunctionData.slice(Offset, Size))) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: internal assembler/disassembler error "
|
2019-11-22 14:53:20 -08:00
|
|
|
"detected for AVX512 instruction:\n";
|
|
|
|
|
BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
|
|
|
|
|
errs() << " in function " << *this << '\n';
|
|
|
|
|
}
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
|
|
|
|
|
setIgnored();
|
|
|
|
|
break;
|
2018-02-02 16:07:11 -08:00
|
|
|
}
|
2016-10-16 18:56:56 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Check if there's a relocation associated with this instruction.
|
2017-10-16 11:12:22 -07:00
|
|
|
bool UsedReloc{false};
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
for (auto Itr = Relocations.lower_bound(Offset),
|
2020-07-16 17:35:55 -07:00
|
|
|
ItrE = Relocations.lower_bound(Offset + Size); Itr != ItrE; ++Itr) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const Relocation &Relocation = Itr->second;
|
2019-06-28 09:21:27 -07:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: replacing immediate 0x"
|
|
|
|
|
<< Twine::utohexstr(Relocation.Value)
|
|
|
|
|
<< " with relocation"
|
|
|
|
|
" against "
|
|
|
|
|
<< Relocation.Symbol << "+" << Relocation.Addend
|
|
|
|
|
<< " in function " << *this
|
|
|
|
|
<< " for instruction at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Offset) << '\n');
|
2019-06-04 15:30:22 -07:00
|
|
|
|
|
|
|
|
// Process reference to the primary symbol.
|
|
|
|
|
if (!Relocation.isPCRelative())
|
2020-07-16 17:35:55 -07:00
|
|
|
BC.handleAddressRef(Relocation.Value - Relocation.Addend,
|
|
|
|
|
*this,
|
2019-06-28 09:21:27 -07:00
|
|
|
/*IsPCRel*/ false);
|
2019-06-04 15:30:22 -07:00
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
int64_t Value = Relocation.Value;
|
2021-04-08 00:19:26 -07:00
|
|
|
const bool Result = BC.MIB->replaceImmWithSymbolRef(
|
|
|
|
|
Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(), Value,
|
|
|
|
|
Relocation.Type);
|
2017-11-14 20:05:11 -08:00
|
|
|
(void)Result;
|
|
|
|
|
assert(Result && "cannot replace immediate with relocation");
|
2019-06-04 15:30:22 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
// For aarch, if we replaced an immediate with a symbol from a
|
|
|
|
|
// relocation, we mark it so we do not try to further process a
|
|
|
|
|
// pc-relative operand. All we need is the symbol.
|
2018-03-20 14:34:58 -07:00
|
|
|
if (BC.isAArch64())
|
2017-11-14 20:05:11 -08:00
|
|
|
UsedReloc = true;
|
|
|
|
|
|
|
|
|
|
// Make sure we replaced the correct immediate (instruction
|
|
|
|
|
// can have multiple immediate operands).
|
2018-05-14 11:10:26 -07:00
|
|
|
if (BC.isX86()) {
|
|
|
|
|
assert(truncateToSize(static_cast<uint64_t>(Value),
|
|
|
|
|
Relocation::getSizeForType(Relocation.Type)) ==
|
2020-07-16 17:35:55 -07:00
|
|
|
truncateToSize(Relocation.Value,
|
|
|
|
|
Relocation::getSizeForType(Relocation.Type)) &&
|
|
|
|
|
"immediate value mismatch in function");
|
2018-05-14 11:10:26 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-20 14:34:58 -07:00
|
|
|
// Convert instruction to a shorter version that could be relaxed if
|
|
|
|
|
// needed.
|
2018-03-09 09:45:13 -08:00
|
|
|
MIB->shortenInstruction(Instruction);
|
2016-07-19 11:19:18 -07:00
|
|
|
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
|
2016-08-22 14:24:09 -07:00
|
|
|
uint64_t TargetAddress = 0;
|
2018-03-20 14:34:58 -07:00
|
|
|
if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
|
2016-08-22 14:24:09 -07:00
|
|
|
TargetAddress)) {
|
2015-10-09 17:21:14 -07:00
|
|
|
// Check if the target is within the same function. Otherwise it's
|
|
|
|
|
// a call, possibly a tail call.
|
|
|
|
|
//
|
|
|
|
|
// If the target *is* the function address it could be either a branch
|
|
|
|
|
// or a recursive call.
|
2018-03-09 09:45:13 -08:00
|
|
|
bool IsCall = MIB->isCall(Instruction);
|
|
|
|
|
const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
|
2017-10-12 14:57:11 -07:00
|
|
|
MCSymbol *TargetSymbol = nullptr;
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-08-22 14:24:09 -07:00
|
|
|
if (IsCall && containsAddress(TargetAddress)) {
|
|
|
|
|
if (TargetAddress == getAddress()) {
|
2015-10-09 17:21:14 -07:00
|
|
|
// Recursive call.
|
2016-08-07 12:35:23 -07:00
|
|
|
TargetSymbol = getSymbol();
|
2015-10-09 17:21:14 -07:00
|
|
|
} else {
|
2018-06-11 13:18:44 -07:00
|
|
|
if (BC.isX86()) {
|
|
|
|
|
// Dangerous old-style x86 PIC code. We may need to freeze this
|
|
|
|
|
// function, so preserve the function as is for now.
|
|
|
|
|
PreserveNops = true;
|
|
|
|
|
} else {
|
|
|
|
|
errs() << "BOLT-WARNING: internal call detected at 0x"
|
|
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr) << " in function "
|
|
|
|
|
<< *this << ". Skipping.\n";
|
|
|
|
|
IsSimple = false;
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!TargetSymbol) {
|
|
|
|
|
// Create either local label or external symbol.
|
2016-08-22 14:24:09 -07:00
|
|
|
if (containsAddress(TargetAddress)) {
|
|
|
|
|
TargetSymbol = getOrCreateLocalLabel(TargetAddress);
|
2015-10-09 17:21:14 -07:00
|
|
|
} else {
|
2017-03-03 11:35:41 -08:00
|
|
|
if (TargetAddress == getAddress() + getSize() &&
|
|
|
|
|
TargetAddress < getAddress() + getMaxSize()) {
|
|
|
|
|
// Result of __builtin_unreachable().
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
|
|
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr)
|
|
|
|
|
<< " in function " << *this
|
|
|
|
|
<< " : replacing with nop.\n");
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->createNoop(Instruction);
|
2017-03-03 11:35:41 -08:00
|
|
|
if (IsCondBranch) {
|
2017-10-12 14:57:11 -07:00
|
|
|
// Register branch offset for profile validation.
|
2017-04-18 23:32:11 -07:00
|
|
|
IgnoredBranches.emplace_back(Offset, Offset + Size);
|
2017-03-03 11:35:41 -08:00
|
|
|
}
|
|
|
|
|
goto add_instruction;
|
|
|
|
|
}
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
InterproceduralReferences.insert(TargetAddress);
|
2017-12-09 21:40:39 -08:00
|
|
|
if (opts::Verbosity >= 2 && !IsCall && Size == 2 &&
|
|
|
|
|
!BC.HasRelocations) {
|
2016-02-05 14:42:04 -08:00
|
|
|
errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
|
2018-03-20 14:34:58 -07:00
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr) << " in function "
|
|
|
|
|
<< *this << ". Code size will be increased.\n";
|
2015-10-20 10:51:17 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-09 09:45:13 -08:00
|
|
|
assert(!MIB->isTailCall(Instruction) &&
|
2016-07-13 18:57:40 -07:00
|
|
|
"synthetic tail call instruction found");
|
|
|
|
|
|
2015-11-12 18:56:58 -08:00
|
|
|
// This is a call regardless of the opcode.
|
|
|
|
|
// Assign proper opcode for tail calls, so that they could be
|
|
|
|
|
// treated as calls.
|
|
|
|
|
if (!IsCall) {
|
2019-06-28 09:21:27 -07:00
|
|
|
if (!MIB->convertJmpToTailCall(Instruction)) {
|
2016-07-13 18:57:40 -07:00
|
|
|
assert(IsCondBranch && "unknown tail call instruction");
|
2017-08-04 19:39:45 -07:00
|
|
|
if (opts::Verbosity >= 2) {
|
|
|
|
|
errs() << "BOLT-WARNING: conditional tail call detected in "
|
|
|
|
|
<< "function " << *this << " at 0x"
|
|
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr) << ".\n";
|
|
|
|
|
}
|
2016-07-13 18:57:40 -07:00
|
|
|
}
|
2015-11-12 18:56:58 -08:00
|
|
|
IsCall = true;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
TargetSymbol =
|
|
|
|
|
BC.getOrCreateGlobalSymbol(TargetAddress, "FUNCat");
|
2016-08-22 14:24:09 -07:00
|
|
|
if (TargetAddress == 0) {
|
2018-03-20 14:34:58 -07:00
|
|
|
// We actually see calls to address 0 in presence of weak
|
|
|
|
|
// symbols originating from libraries. This code is never meant
|
|
|
|
|
// to be executed.
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::Verbosity >= 2) {
|
|
|
|
|
outs() << "BOLT-INFO: Function " << *this
|
|
|
|
|
<< " has a call to address zero.\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC.HasRelocations) {
|
2016-09-27 19:09:38 -07:00
|
|
|
// Check if we need to create relocation to move this function's
|
|
|
|
|
// code without re-assembly.
|
|
|
|
|
size_t RelSize = (Size < 5) ? 1 : 4;
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t RelOffset = Offset + Size - RelSize;
|
2018-03-20 14:34:58 -07:00
|
|
|
if (BC.isAArch64()) {
|
2017-09-20 10:43:01 -07:00
|
|
|
RelSize = 0;
|
|
|
|
|
RelOffset = Offset;
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
auto RI = MoveRelocations.find(RelOffset);
|
|
|
|
|
if (RI == MoveRelocations.end()) {
|
2018-03-20 14:34:58 -07:00
|
|
|
uint64_t RelType =
|
|
|
|
|
(RelSize == 1) ? ELF::R_X86_64_PC8 : ELF::R_X86_64_PC32;
|
|
|
|
|
if (BC.isAArch64())
|
2017-09-20 10:43:01 -07:00
|
|
|
RelType = ELF::R_AARCH64_CALL26;
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs()
|
|
|
|
|
<< "BOLT-DEBUG: creating relocation for static"
|
|
|
|
|
<< " function call to " << TargetSymbol->getName()
|
|
|
|
|
<< " at offset 0x" << Twine::utohexstr(RelOffset)
|
|
|
|
|
<< " with size " << RelSize << " for function "
|
|
|
|
|
<< *this << '\n');
|
2016-09-27 19:09:38 -07:00
|
|
|
addRelocation(getAddress() + RelOffset, TargetSymbol, RelType,
|
|
|
|
|
-RelSize, 0);
|
|
|
|
|
}
|
2015-11-02 09:46:50 -08:00
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!IsCall) {
|
2016-06-15 18:36:16 -07:00
|
|
|
// Add taken branch info.
|
2016-09-14 16:45:40 -07:00
|
|
|
TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->replaceBranchTarget(Instruction, TargetSymbol, &*Ctx);
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
// Mark CTC.
|
|
|
|
|
if (IsCondBranch && IsCall) {
|
2018-03-09 09:45:13 -08:00
|
|
|
MIB->setConditionalTailCall(Instruction, TargetAddress);
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
} else {
|
2016-08-22 14:24:09 -07:00
|
|
|
// Could not evaluate branch. Should be an indirect call or an
|
|
|
|
|
// indirect branch. Bail out on the latter case.
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isIndirectBranch(Instruction)) {
|
2018-08-06 11:22:45 -07:00
|
|
|
uint64_t IndirectTarget{0};
|
2021-04-08 00:19:26 -07:00
|
|
|
IndirectBranchType Result =
|
2018-08-06 11:22:45 -07:00
|
|
|
processIndirectBranch(Instruction, Size, Offset, IndirectTarget);
|
2016-08-22 14:24:09 -07:00
|
|
|
switch (Result) {
|
|
|
|
|
default:
|
|
|
|
|
llvm_unreachable("unexpected result");
|
2018-03-20 14:34:58 -07:00
|
|
|
case IndirectBranchType::POSSIBLE_TAIL_CALL: {
|
2021-04-08 00:19:26 -07:00
|
|
|
bool Result = MIB->convertJmpToTailCall(Instruction);
|
2018-03-20 14:34:58 -07:00
|
|
|
(void)Result;
|
|
|
|
|
assert(Result);
|
2018-08-06 11:22:45 -07:00
|
|
|
break;
|
|
|
|
|
}
|
2016-09-14 16:45:40 -07:00
|
|
|
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
|
2016-09-27 19:09:38 -07:00
|
|
|
case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
|
2016-09-16 15:54:32 -07:00
|
|
|
if (opts::JumpTables == JTS_NONE)
|
2016-09-14 16:45:40 -07:00
|
|
|
IsSimple = false;
|
2016-08-22 14:24:09 -07:00
|
|
|
break;
|
2018-08-06 11:22:45 -07:00
|
|
|
case IndirectBranchType::POSSIBLE_FIXED_BRANCH: {
|
|
|
|
|
if (containsAddress(IndirectTarget)) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCSymbol *TargetSymbol =
|
|
|
|
|
getOrCreateLocalLabel(IndirectTarget);
|
2018-08-06 11:22:45 -07:00
|
|
|
Instruction.clear();
|
|
|
|
|
MIB->createUncondBranch(Instruction, TargetSymbol, BC.Ctx.get());
|
|
|
|
|
TakenBranches.emplace_back(Offset, IndirectTarget - getAddress());
|
|
|
|
|
HasFixedIndirectBranch = true;
|
|
|
|
|
} else {
|
2019-06-28 09:21:27 -07:00
|
|
|
MIB->convertJmpToTailCall(Instruction);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
InterproceduralReferences.insert(IndirectTarget);
|
2018-08-06 11:22:45 -07:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
2016-08-22 14:24:09 -07:00
|
|
|
case IndirectBranchType::UNKNOWN:
|
|
|
|
|
// Keep processing. We'll do more checks and fixes in
|
|
|
|
|
// postProcessIndirectBranches().
|
2019-06-28 09:21:27 -07:00
|
|
|
UnknownIndirectBranchOffsets.emplace(Offset);
|
2016-08-22 14:24:09 -07:00
|
|
|
break;
|
|
|
|
|
};
|
2017-03-03 11:35:41 -08:00
|
|
|
}
|
2019-06-12 18:21:02 -07:00
|
|
|
// Indirect call. We only need to fix it if the operand is RIP-relative.
|
2018-03-09 09:45:13 -08:00
|
|
|
if (IsSimple && MIB->hasPCRelOperand(Instruction)) {
|
2017-08-24 14:37:35 -07:00
|
|
|
if (!handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size)) {
|
2017-10-16 11:12:22 -07:00
|
|
|
errs() << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
|
2016-09-27 19:09:38 -07:00
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr)
|
|
|
|
|
<< ". Skipping function " << *this << ".\n";
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC.HasRelocations)
|
2017-10-12 14:57:11 -07:00
|
|
|
exit(1);
|
2015-11-02 09:46:50 -08:00
|
|
|
IsSimple = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-04-30 14:47:32 -07:00
|
|
|
// AArch64 indirect call - check for linker veneers, which lack
|
|
|
|
|
// relocations and need manual adjustments
|
|
|
|
|
MCInst *TargetHiBits, *TargetLowBits;
|
|
|
|
|
uint64_t TargetAddress;
|
|
|
|
|
if (BC.isAArch64() &&
|
|
|
|
|
MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
|
|
|
|
|
AbsoluteInstrAddr, Instruction, TargetHiBits,
|
|
|
|
|
TargetLowBits, TargetAddress)) {
|
2018-06-07 11:10:37 -07:00
|
|
|
MIB->addAnnotation(Instruction, "AArch64Veneer", true);
|
|
|
|
|
|
|
|
|
|
uint8_t Counter = 0;
|
|
|
|
|
for (auto It = std::prev(Instructions.end()); Counter != 2;
|
|
|
|
|
--It, ++Counter) {
|
|
|
|
|
MIB->addAnnotation(It->second, "AArch64Veneer", true);
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-30 14:47:32 -07:00
|
|
|
fixStubTarget(*TargetLowBits, *TargetHiBits, TargetAddress);
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
|
2017-08-24 14:37:35 -07:00
|
|
|
if (!handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size)) {
|
2017-10-16 11:12:22 -07:00
|
|
|
errs() << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
|
2016-09-27 19:09:38 -07:00
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr)
|
|
|
|
|
<< ". Skipping function " << *this << ".\n";
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC.HasRelocations)
|
2017-10-12 14:57:11 -07:00
|
|
|
exit(1);
|
2015-10-09 21:47:18 -07:00
|
|
|
IsSimple = false;
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
add_instruction:
|
2020-10-12 21:04:42 -07:00
|
|
|
if (getDWARFLineTable()) {
|
2016-02-25 16:57:07 -08:00
|
|
|
Instruction.setLoc(
|
2020-10-12 21:04:42 -07:00
|
|
|
findDebugLineInformationForInstructionAt(AbsoluteInstrAddr,
|
|
|
|
|
getDWARFUnit(),
|
|
|
|
|
getDWARFLineTable()));
|
2016-02-25 16:57:07 -08:00
|
|
|
}
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
// Record offset of the instruction for profile matching.
|
2018-02-01 14:36:29 -08:00
|
|
|
if (BC.keepOffsetForInstruction(Instruction)) {
|
2019-04-12 17:33:46 -07:00
|
|
|
MIB->addAnnotation(Instruction, "Offset", static_cast<uint32_t>(Offset));
|
2018-02-01 14:36:29 -08:00
|
|
|
}
|
2017-11-28 09:57:21 -08:00
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
addInstruction(Offset, std::move(Instruction));
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-08 14:41:31 -08:00
|
|
|
clearList(Relocations);
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (!IsSimple) {
|
|
|
|
|
clearList(Instructions);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
updateState(State::Disassembled);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
|
|
|
|
|
return true;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
bool BinaryFunction::scanExternalRefs() {
|
|
|
|
|
bool Success = true;
|
|
|
|
|
bool DisassemblyFailed = false;
|
|
|
|
|
|
|
|
|
|
// Ignore pseudo functions.
|
|
|
|
|
if (isPseudo())
|
|
|
|
|
return Success;
|
|
|
|
|
|
2020-12-30 12:23:58 -08:00
|
|
|
if (opts::NoScan) {
|
|
|
|
|
clearList(Relocations);
|
|
|
|
|
clearList(ExternallyReferencedOffsets);
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
// List of external references for this function.
|
|
|
|
|
std::vector<Relocation> FunctionRelocations;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
static BinaryContext::IndependentCodeEmitter Emitter =
|
|
|
|
|
BC.createIndependentMCCodeEmitter();
|
2020-05-03 15:49:58 -07:00
|
|
|
|
|
|
|
|
ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData();
|
|
|
|
|
assert(ErrorOrFunctionData && "function data is not available");
|
|
|
|
|
ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData;
|
|
|
|
|
assert(FunctionData.size() == getMaxSize() &&
|
|
|
|
|
"function size does not match raw data size");
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
uint64_t Size = 0; // instruction size
|
2020-05-03 15:49:58 -07:00
|
|
|
for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
|
|
|
|
|
// Check for data inside code and ignore it
|
2021-04-08 00:19:26 -07:00
|
|
|
if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) {
|
2020-05-03 15:49:58 -07:00
|
|
|
Size = DataInCodeSize;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
|
|
|
|
|
MCInst Instruction;
|
2020-07-16 17:35:55 -07:00
|
|
|
if (!BC.DisAsm->getInstruction(Instruction,
|
|
|
|
|
Size,
|
2020-05-03 15:49:58 -07:00
|
|
|
FunctionData.slice(Offset),
|
2020-07-16 17:35:55 -07:00
|
|
|
AbsoluteInstrAddr,
|
|
|
|
|
nulls())) {
|
2020-05-03 15:49:58 -07:00
|
|
|
if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) {
|
|
|
|
|
errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Offset) << " (address 0x"
|
|
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
|
|
|
|
|
<< *this << '\n';
|
|
|
|
|
}
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
Success = false;
|
|
|
|
|
DisassemblyFailed = true;
|
2020-05-03 15:49:58 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
// Return true if we can skip handling the Target function reference.
|
|
|
|
|
auto ignoreFunctionRef = [&](const BinaryFunction &Target) {
|
|
|
|
|
if (&Target == this)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
// Note that later we may decide not to emit Target function. In that
|
|
|
|
|
// case, we conservatively create references that will be ignored or
|
|
|
|
|
// resolved to the same function.
|
|
|
|
|
if (!BC.shouldEmit(Target))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Return true if we can ignore reference to the symbol.
|
|
|
|
|
auto ignoreReference = [&](const MCSymbol *TargetSymbol) {
|
2020-06-18 11:10:41 -07:00
|
|
|
if (!TargetSymbol)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (BC.forceSymbolRelocations(TargetSymbol->getName()))
|
|
|
|
|
return false;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction *TargetFunction = BC.getFunctionForSymbol(TargetSymbol);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (!TargetFunction)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return ignoreFunctionRef(*TargetFunction);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Detect if the instruction references an address.
|
2020-05-03 15:49:58 -07:00
|
|
|
// Without relocations, we can only trust PC-relative address modes.
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
uint64_t TargetAddress = 0;
|
|
|
|
|
bool IsPCRel = false;
|
|
|
|
|
bool IsBranch = false;
|
2020-05-03 15:49:58 -07:00
|
|
|
if (BC.MIB->hasPCRelOperand(Instruction)) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (BC.MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
|
|
|
|
|
AbsoluteInstrAddr, Size)) {
|
|
|
|
|
IsPCRel = true;
|
2020-05-03 15:49:58 -07:00
|
|
|
}
|
|
|
|
|
} else if (BC.MIB->isCall(Instruction) || BC.MIB->isBranch(Instruction)) {
|
2020-06-22 12:50:49 -07:00
|
|
|
if (BC.MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
|
|
|
|
|
TargetAddress)) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
IsBranch = true;
|
2020-05-03 15:49:58 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
MCSymbol *TargetSymbol = nullptr;
|
2020-05-03 15:49:58 -07:00
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
// Create an entry point at reference address if needed.
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction *TargetFunction =
|
|
|
|
|
BC.getBinaryFunctionContainingAddress(TargetAddress);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (TargetFunction && !ignoreFunctionRef(*TargetFunction)) {
|
|
|
|
|
const uint64_t FunctionOffset =
|
2020-07-16 17:35:55 -07:00
|
|
|
TargetAddress - TargetFunction->getAddress();
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
TargetSymbol = FunctionOffset
|
2020-07-16 17:35:55 -07:00
|
|
|
? TargetFunction->addEntryPointAtOffset(FunctionOffset)
|
|
|
|
|
: TargetFunction->getSymbol();
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Can't find more references and not creating relocations.
|
|
|
|
|
if (!BC.HasRelocations)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Create a relocation against the TargetSymbol as the symbol might get
|
|
|
|
|
// moved.
|
|
|
|
|
if (TargetSymbol) {
|
|
|
|
|
if (IsBranch) {
|
|
|
|
|
BC.MIB->replaceBranchTarget(Instruction, TargetSymbol,
|
|
|
|
|
Emitter.LocalCtx.get());
|
|
|
|
|
} else if (IsPCRel) {
|
2020-07-16 17:35:55 -07:00
|
|
|
const MCExpr *Expr = MCSymbolRefExpr::create(TargetSymbol,
|
|
|
|
|
MCSymbolRefExpr::VK_None,
|
|
|
|
|
*Emitter.LocalCtx.get());
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
BC.MIB->replaceMemOperandDisp(
|
|
|
|
|
Instruction, MCOperand::createExpr(BC.MIB->getTargetExprFor(
|
2020-07-16 17:35:55 -07:00
|
|
|
Instruction,
|
|
|
|
|
Expr,
|
|
|
|
|
*Emitter.LocalCtx.get(), 0)));
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create more relocations based on input file relocations.
|
|
|
|
|
bool HasRel = false;
|
|
|
|
|
for (auto Itr = Relocations.lower_bound(Offset),
|
2020-07-16 17:35:55 -07:00
|
|
|
ItrE = Relocations.lower_bound(Offset + Size); Itr != ItrE; ++Itr) {
|
2021-04-08 00:19:26 -07:00
|
|
|
Relocation &Relocation = Itr->second;
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (ignoreReference(Relocation.Symbol))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
int64_t Value = Relocation.Value;
|
2021-04-08 00:19:26 -07:00
|
|
|
const bool Result =
|
2020-07-16 17:35:55 -07:00
|
|
|
BC.MIB->replaceImmWithSymbolRef(Instruction,
|
|
|
|
|
Relocation.Symbol,
|
|
|
|
|
Relocation.Addend,
|
|
|
|
|
Emitter.LocalCtx.get(),
|
|
|
|
|
Value,
|
|
|
|
|
Relocation.Type);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
(void)Result;
|
|
|
|
|
assert(Result && "cannot replace immediate with relocation");
|
|
|
|
|
|
|
|
|
|
HasRel = true;
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-22 12:50:49 -07:00
|
|
|
if (!TargetSymbol && !HasRel)
|
2020-05-03 15:49:58 -07:00
|
|
|
continue;
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
// Emit the instruction using temp emitter and generate relocations.
|
|
|
|
|
SmallString<256> Code;
|
|
|
|
|
SmallVector<MCFixup, 4> Fixups;
|
|
|
|
|
raw_svector_ostream VecOS(Code);
|
|
|
|
|
Emitter.MCE->encodeInstruction(Instruction, VecOS, Fixups, *BC.STI);
|
|
|
|
|
|
|
|
|
|
// Create relocation for every fixup.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const MCFixup &Fixup : Fixups) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
Optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
|
|
|
|
|
if (!Rel) {
|
|
|
|
|
Success = false;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Relocation::getSizeForType(Rel->Type) < 4) {
|
|
|
|
|
// If the instruction uses a short form, then we might not be able
|
|
|
|
|
// to handle the rewrite without relaxation, and hence cannot reliably
|
|
|
|
|
// create an external reference relocation.
|
|
|
|
|
Success = false;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-10-09 16:06:27 -07:00
|
|
|
Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
FunctionRelocations.push_back(*Rel);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!Success)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add relocations unless disassembly failed for this function.
|
|
|
|
|
if (!DisassemblyFailed) {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (Relocation &Rel : FunctionRelocations) {
|
2020-10-09 16:06:27 -07:00
|
|
|
getOriginSection()->addPendingRelocation(Rel);
|
2020-05-03 15:49:58 -07:00
|
|
|
}
|
|
|
|
|
}
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
|
|
|
|
|
// Inform BinaryContext that this function symbols will not be defined and
|
|
|
|
|
// relocations should not be created against them.
|
|
|
|
|
if (BC.HasRelocations) {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (std::pair<const uint32_t, MCSymbol *> &LI : Labels) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
BC.UndefinedSymbols.insert(LI.second);
|
|
|
|
|
}
|
|
|
|
|
if (FunctionEndLabel) {
|
|
|
|
|
BC.UndefinedSymbols.insert(FunctionEndLabel);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
clearList(Relocations);
|
|
|
|
|
clearList(ExternallyReferencedOffsets);
|
|
|
|
|
|
|
|
|
|
if (Success && BC.HasRelocations) {
|
|
|
|
|
HasExternalRefRelocations = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (opts::Verbosity >= 1 && !Success) {
|
|
|
|
|
outs() << "BOLT-INFO: failed to scan refs for " << *this << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Success;
|
2020-05-03 15:49:58 -07:00
|
|
|
}
|
|
|
|
|
|
2019-04-03 22:31:12 -07:00
|
|
|
void BinaryFunction::postProcessEntryPoints() {
|
2019-04-15 11:56:55 -07:00
|
|
|
if (!isSimple())
|
|
|
|
|
return;
|
|
|
|
|
|
2020-04-19 22:29:54 -07:00
|
|
|
for (auto &KV : Labels) {
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSymbol *Label = KV.second;
|
2020-04-19 22:29:54 -07:00
|
|
|
if (!getSecondaryEntryPointSymbol(Label))
|
|
|
|
|
continue;
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
// In non-relocation mode there's potentially an external undetectable
|
|
|
|
|
// reference to the entry point and hence we cannot move this entry
|
|
|
|
|
// point. Optimizing without moving could be difficult.
|
|
|
|
|
if (!BC.HasRelocations)
|
|
|
|
|
setSimple(false);
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint32_t Offset = KV.first;
|
2020-04-19 22:29:54 -07:00
|
|
|
|
2020-01-14 17:12:03 -08:00
|
|
|
// If we are at Offset 0 and there is no instruction associated with it,
|
|
|
|
|
// this means this is an empty function. Just ignore. If we find an
|
|
|
|
|
// instruction at this offset, this entry point is valid.
|
2020-04-19 22:29:54 -07:00
|
|
|
if (!Offset || getInstructionAtOffset(Offset)) {
|
2020-01-14 17:12:03 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
2019-04-03 22:31:12 -07:00
|
|
|
|
2020-01-14 17:12:03 -08:00
|
|
|
// On AArch64 there are legitimate reasons to have references past the
|
|
|
|
|
// end of the function, e.g. jump tables.
|
|
|
|
|
if (BC.isAArch64() && Offset == getSize()) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2019-06-24 20:23:22 -07:00
|
|
|
|
2020-01-14 17:12:03 -08:00
|
|
|
errs() << "BOLT-WARNING: reference in the middle of instruction "
|
2020-07-16 17:35:55 -07:00
|
|
|
"detected in function " << *this
|
|
|
|
|
<< " at offset 0x" << Twine::utohexstr(Offset) << '\n';
|
2020-01-14 17:12:03 -08:00
|
|
|
if (BC.HasRelocations) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
setIgnored();
|
2019-04-03 22:31:12 -07:00
|
|
|
}
|
2020-01-14 17:12:03 -08:00
|
|
|
setSimple(false);
|
2020-09-29 19:37:47 -07:00
|
|
|
return;
|
2019-04-03 22:31:12 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
void BinaryFunction::postProcessJumpTables() {
|
|
|
|
|
// Create labels for all entries.
|
|
|
|
|
for (auto &JTI : JumpTables) {
|
2021-04-08 00:19:26 -07:00
|
|
|
JumpTable &JT = *JTI.second;
|
[BOLT] Fix support for PIC jump tables
Summary:
BOLT heuristics failed to work if false PIC jump table entries were
accepted when they were pointing inside a function, but not at
an instruction boundary.
This fix checks if the destination falls at instruction boundary, and
if it does not, it truncates the jump table. This, of course, still does not
guarantee that the entry corresponds to a real destination, and we can
have "false positive" entry(ies). However, it shouldn't affect
correctness of the function, but the CFG may have edges that are never
taken. We may update an incorrect jump table entry, corresponding to an
unrelated data, and for that reason we force moving of jump tables if a
PIC jump table was detected.
(cherry picked from FBD8559588)
2018-06-20 21:43:22 -07:00
|
|
|
if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) {
|
|
|
|
|
opts::JumpTables = JTS_MOVE;
|
|
|
|
|
outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was "
|
2020-07-16 17:35:55 -07:00
|
|
|
"detected in function " << *this << '\n';
|
[BOLT] Fix support for PIC jump tables
Summary:
BOLT heuristics failed to work if false PIC jump table entries were
accepted when they were pointing inside a function, but not at
an instruction boundary.
This fix checks if the destination falls at instruction boundary, and
if it does not, it truncates the jump table. This, of course, still does not
guarantee that the entry corresponds to a real destination, and we can
have "false positive" entry(ies). However, it shouldn't affect
correctness of the function, but the CFG may have edges that are never
taken. We may update an incorrect jump table entry, corresponding to an
unrelated data, and for that reason we force moving of jump tables if a
PIC jump table was detected.
(cherry picked from FBD8559588)
2018-06-20 21:43:22 -07:00
|
|
|
}
|
|
|
|
|
for (unsigned I = 0; I < JT.OffsetEntries.size(); ++I) {
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSymbol *Label =
|
|
|
|
|
getOrCreateLocalLabel(getAddress() + JT.OffsetEntries[I],
|
|
|
|
|
/*CreatePastEnd*/ true);
|
2016-09-27 19:09:38 -07:00
|
|
|
JT.Entries.push_back(Label);
|
|
|
|
|
}
|
2019-06-28 09:21:27 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t BDSize =
|
|
|
|
|
BC.getBinaryDataAtAddress(JT.getAddress())->getSize();
|
2019-06-28 09:21:27 -07:00
|
|
|
if (!BDSize) {
|
|
|
|
|
BC.setBinaryDataSize(JT.getAddress(), JT.getSize());
|
|
|
|
|
} else {
|
|
|
|
|
assert(BDSize >= JT.getSize() &&
|
|
|
|
|
"jump table cannot be larger than the containing object");
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add TakenBranches from JumpTables.
|
2016-09-16 15:54:32 -07:00
|
|
|
//
|
2016-09-27 19:09:38 -07:00
|
|
|
// We want to do it after initial processing since we don't know jump tables'
|
2016-09-16 15:54:32 -07:00
|
|
|
// boundaries until we process them all.
|
|
|
|
|
for (auto &JTSite : JTSites) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t JTSiteOffset = JTSite.first;
|
|
|
|
|
const uint64_t JTAddress = JTSite.second;
|
|
|
|
|
const JumpTable *JT = getJumpTableContainingAddress(JTAddress);
|
2016-09-16 15:54:32 -07:00
|
|
|
assert(JT && "cannot find jump table for address");
|
2019-08-19 14:06:36 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t EntryOffset = JTAddress - JT->getAddress();
|
2016-09-27 19:09:38 -07:00
|
|
|
while (EntryOffset < JT->getSize()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t TargetOffset = JT->OffsetEntries[EntryOffset / JT->EntrySize];
|
2019-06-28 09:21:27 -07:00
|
|
|
if (TargetOffset < getSize()) {
|
2016-09-16 15:54:32 -07:00
|
|
|
TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
if (opts::StrictMode)
|
|
|
|
|
registerReferencedOffset(TargetOffset);
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-21 16:15:15 -08:00
|
|
|
// Take ownership of jump table relocations.
|
2018-02-01 16:33:43 -08:00
|
|
|
if (BC.HasRelocations) {
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t EntryAddress = JT->getAddress() + EntryOffset;
|
|
|
|
|
bool Res = BC.removeRelocationAt(EntryAddress);
|
2018-02-01 16:33:43 -08:00
|
|
|
(void)Res;
|
2021-04-08 00:19:26 -07:00
|
|
|
LLVM_DEBUG(ErrorOr<BinarySection &> Section =
|
|
|
|
|
BC.getSectionForAddress(EntryAddress);
|
|
|
|
|
uint64_t Offset = EntryAddress - Section->getAddress();
|
2020-12-01 16:29:39 -08:00
|
|
|
dbgs() << "BOLT-DEBUG: removing relocation from section "
|
|
|
|
|
<< Section->getName() << " at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Offset) << " = " << Res << '\n');
|
2018-02-01 16:33:43 -08:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
EntryOffset += JT->EntrySize;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
// A label at the next entry means the end of this jump table.
|
2016-09-27 19:09:38 -07:00
|
|
|
if (JT->Labels.count(EntryOffset))
|
2016-09-16 15:54:32 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-05-02 17:42:06 -07:00
|
|
|
clearList(JTSites);
|
2016-09-16 15:54:32 -07:00
|
|
|
|
|
|
|
|
// Free memory used by jump table offsets.
|
|
|
|
|
for (auto &JTI : JumpTables) {
|
2021-04-08 00:19:26 -07:00
|
|
|
JumpTable &JT = *JTI.second;
|
2016-09-16 15:54:32 -07:00
|
|
|
clearList(JT.OffsetEntries);
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
// Conservatively populate all possible destinations for unknown indirect
|
|
|
|
|
// branches.
|
|
|
|
|
if (opts::StrictMode && hasInternalReference()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (uint64_t Offset : UnknownIndirectBranchOffsets) {
|
|
|
|
|
for (uint64_t PossibleDestination : ExternallyReferencedOffsets) {
|
2019-10-20 20:46:32 -07:00
|
|
|
// Ignore __builtin_unreachable().
|
|
|
|
|
if (PossibleDestination == getSize())
|
|
|
|
|
continue;
|
2019-06-28 09:21:27 -07:00
|
|
|
TakenBranches.emplace_back(Offset, PossibleDestination);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
// Remove duplicates branches. We can get a bunch of them from jump tables.
|
|
|
|
|
// Without doing jump table value profiling we don't have use for extra
|
|
|
|
|
// (duplicate) branches.
|
|
|
|
|
std::sort(TakenBranches.begin(), TakenBranches.end());
|
|
|
|
|
auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
|
|
|
|
|
TakenBranches.erase(NewEnd, TakenBranches.end());
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
|
2019-07-12 07:25:50 -07:00
|
|
|
bool BinaryFunction::postProcessIndirectBranches(
|
|
|
|
|
MCPlusBuilder::AllocatorIdTy AllocId) {
|
2019-06-28 09:21:27 -07:00
|
|
|
auto addUnknownControlFlow = [&](BinaryBasicBlock &BB) {
|
|
|
|
|
HasUnknownControlFlow = true;
|
|
|
|
|
BB.removeAllSuccessors();
|
2021-04-08 00:19:26 -07:00
|
|
|
for (uint64_t PossibleDestination : ExternallyReferencedOffsets) {
|
|
|
|
|
if (BinaryBasicBlock *SuccBB = getBasicBlockAtOffset(PossibleDestination))
|
2019-06-28 09:21:27 -07:00
|
|
|
BB.addSuccessor(SuccBB);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
uint64_t NumIndirectJumps{0};
|
|
|
|
|
MCInst *LastIndirectJump = nullptr;
|
|
|
|
|
BinaryBasicBlock *LastIndirectJumpBB{nullptr};
|
|
|
|
|
uint64_t LastJT{0};
|
|
|
|
|
uint16_t LastJTIndexReg = BC.MIB->getNoRegister();
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : layout()) {
|
|
|
|
|
for (MCInst &Instr : *BB) {
|
2018-03-09 09:45:13 -08:00
|
|
|
if (!BC.MIB->isIndirectBranch(Instr))
|
2016-08-22 14:24:09 -07:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// If there's an indirect branch in a single-block function -
|
|
|
|
|
// it must be a tail call.
|
|
|
|
|
if (layout_size() == 1) {
|
2019-06-28 09:21:27 -07:00
|
|
|
BC.MIB->convertJmpToTailCall(Instr);
|
2016-08-22 14:24:09 -07:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
++NumIndirectJumps;
|
|
|
|
|
|
|
|
|
|
if (opts::StrictMode && !hasInternalReference()) {
|
|
|
|
|
BC.MIB->convertJmpToTailCall(Instr);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-22 18:14:34 -07:00
|
|
|
// Validate the tail call or jump table assumptions now that we know
|
|
|
|
|
// basic block boundaries.
|
2018-03-09 09:45:13 -08:00
|
|
|
if (BC.MIB->isTailCall(Instr) || BC.MIB->getJumpTable(Instr)) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();
|
2019-05-22 18:14:34 -07:00
|
|
|
MCInst *MemLocInstr;
|
|
|
|
|
unsigned BaseRegNum, IndexRegNum;
|
|
|
|
|
int64_t DispValue;
|
|
|
|
|
const MCExpr *DispExpr;
|
|
|
|
|
MCInst *PCRelBaseInstr;
|
2021-04-08 00:19:26 -07:00
|
|
|
IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
|
|
|
|
|
Instr, BB->begin(), BB->end(), PtrSize, MemLocInstr, BaseRegNum,
|
|
|
|
|
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
|
2019-06-28 09:21:27 -07:00
|
|
|
if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (!opts::StrictMode)
|
2016-09-27 19:09:38 -07:00
|
|
|
return false;
|
|
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
if (BC.MIB->isTailCall(Instr)) {
|
|
|
|
|
BC.MIB->convertTailCallToJmp(Instr);
|
|
|
|
|
} else {
|
|
|
|
|
LastIndirectJump = &Instr;
|
|
|
|
|
LastIndirectJumpBB = BB;
|
|
|
|
|
LastJT = BC.MIB->getJumpTable(Instr);
|
|
|
|
|
LastJTIndexReg = BC.MIB->getJumpTableIndexReg(Instr);
|
|
|
|
|
BC.MIB->unsetJumpTable(Instr);
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
JumpTable *JT = BC.getJumpTableContainingAddress(LastJT);
|
2019-06-28 09:21:27 -07:00
|
|
|
if (JT->Type == JumpTable::JTT_NORMAL) {
|
|
|
|
|
// Invalidating the jump table may also invalidate other jump table
|
|
|
|
|
// boundaries. Until we have/need a support for this, mark the
|
|
|
|
|
// function as non-simple.
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejected jump table reference"
|
|
|
|
|
<< JT->getName() << " in " << *this << '\n');
|
2019-06-28 09:21:27 -07:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
addUnknownControlFlow(*BB);
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
2016-08-22 14:24:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If this block contains an epilogue code and has an indirect branch,
|
|
|
|
|
// then most likely it's a tail call. Otherwise, we cannot tell for sure
|
|
|
|
|
// what it is and conservatively reject the function's CFG.
|
|
|
|
|
bool IsEpilogue = false;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const MCInst &Instr : *BB) {
|
2018-03-09 09:45:13 -08:00
|
|
|
if (BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr)) {
|
2016-08-22 14:24:09 -07:00
|
|
|
IsEpilogue = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-06-28 09:21:27 -07:00
|
|
|
if (IsEpilogue) {
|
|
|
|
|
BC.MIB->convertJmpToTailCall(Instr);
|
|
|
|
|
BB->removeAllSuccessors();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (opts::Verbosity >= 2) {
|
|
|
|
|
outs() << "BOLT-INFO: rejected potential indirect tail call in "
|
2020-07-16 17:35:55 -07:00
|
|
|
<< "function " << *this << " in basic block "
|
|
|
|
|
<< BB->getName() << ".\n";
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(BC.printInstructions(dbgs(), BB->begin(), BB->end(),
|
|
|
|
|
BB->getOffset(), this, true));
|
2016-08-22 14:24:09 -07:00
|
|
|
}
|
2019-06-28 09:21:27 -07:00
|
|
|
|
|
|
|
|
if (!opts::StrictMode)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
addUnknownControlFlow(*BB);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (HasInternalLabelReference)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// If there's only one jump table, and one indirect jump, and no other
|
|
|
|
|
// references, then we should be able to derive the jump table even if we
|
|
|
|
|
// fail to match the pattern.
|
|
|
|
|
if (HasUnknownControlFlow && NumIndirectJumps == 1 &&
|
|
|
|
|
JumpTables.size() == 1 && LastIndirectJump) {
|
2019-07-12 07:25:50 -07:00
|
|
|
BC.MIB->setJumpTable(*LastIndirectJump, LastJT, LastJTIndexReg, AllocId);
|
2019-06-28 09:21:27 -07:00
|
|
|
HasUnknownControlFlow = false;
|
|
|
|
|
|
|
|
|
|
// re-populate successors based on the jump table.
|
|
|
|
|
std::set<const MCSymbol *> JTLabels;
|
|
|
|
|
LastIndirectJumpBB->removeAllSuccessors();
|
2021-04-08 00:19:26 -07:00
|
|
|
const JumpTable *JT = getJumpTableContainingAddress(LastJT);
|
|
|
|
|
for (const MCSymbol *Label : JT->Entries) {
|
2019-06-28 09:21:27 -07:00
|
|
|
JTLabels.emplace(Label);
|
|
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const MCSymbol *Label : JTLabels) {
|
|
|
|
|
BinaryBasicBlock *BB = getBasicBlockForLabel(Label);
|
2020-01-28 18:38:10 -08:00
|
|
|
// Ignore __builtin_unreachable()
|
|
|
|
|
if (!BB) {
|
|
|
|
|
assert(Label == getFunctionEndLabel() && "if no BB found, must be end");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
LastIndirectJumpBB->addSuccessor(BB);
|
2016-08-22 14:24:09 -07:00
|
|
|
}
|
|
|
|
|
}
|
2018-08-06 11:22:45 -07:00
|
|
|
|
|
|
|
|
if (HasFixedIndirectBranch)
|
|
|
|
|
return false;
|
|
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
if (HasUnknownControlFlow && !BC.HasRelocations)
|
|
|
|
|
return false;
|
|
|
|
|
|
2016-08-22 14:24:09 -07:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-26 18:36:30 -07:00
|
|
|
void BinaryFunction::recomputeLandingPads() {
|
|
|
|
|
updateBBIndices(0);
|
2016-07-23 12:50:34 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
2017-10-26 18:36:30 -07:00
|
|
|
BB->LandingPads.clear();
|
|
|
|
|
BB->Throwers.clear();
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
2017-10-23 23:32:40 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
2017-12-08 20:27:49 -08:00
|
|
|
std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (MCInst &Instr : *BB) {
|
2018-03-09 09:45:13 -08:00
|
|
|
if (!BC.MIB->isInvoke(Instr))
|
2017-10-26 18:36:30 -07:00
|
|
|
continue;
|
2016-07-23 12:50:34 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(Instr);
|
[BOLT][Refactoring] Isolate changes to MC layer
Summary:
Changes that we made to MCInst, MCOperand, MCExpr, etc. are now all
moved into tools/llvm-bolt. That required a change to the way we handle
annotations and any extra operands for MCInst.
Any MCPlus information is now attached via an extra operand of type
MCInst with an opcode ANNOTATION_LABEL. Since this operand is MCInst, we
attach extra info as operands to this instruction. For first-level
annotations use functions to access the information, such as
getConditionalTailCall() or getEHInfo(), etc. For the rest, optional or
second-class annotations, use a general named-annotation interface such
as getAnnotationAs<uint64_t>(Inst, "Count").
I did a test on HHVM binary, and a memory consumption went down a little
bit while the runtime remained the same.
(cherry picked from FBD7405412)
2018-03-19 18:32:12 -07:00
|
|
|
if (!EHInfo || !EHInfo->first)
|
2017-10-26 18:36:30 -07:00
|
|
|
continue;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *LPBlock = getBasicBlockForLabel(EHInfo->first);
|
2017-12-08 20:27:49 -08:00
|
|
|
if (!BBLandingPads.count(LPBlock)) {
|
|
|
|
|
BBLandingPads.insert(LPBlock);
|
|
|
|
|
BB->LandingPads.emplace_back(LPBlock);
|
|
|
|
|
LPBlock->Throwers.emplace_back(BB);
|
|
|
|
|
}
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
2017-10-26 18:36:30 -07:00
|
|
|
}
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2019-07-12 07:25:50 -07:00
|
|
|
bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
2018-03-09 09:45:13 -08:00
|
|
|
auto &MIB = BC.MIB;
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!isSimple()) {
|
2017-12-09 21:40:39 -08:00
|
|
|
assert(!BC.HasRelocations &&
|
2016-09-27 19:09:38 -07:00
|
|
|
"cannot process file with non-simple function in relocs mode");
|
2015-10-09 17:21:14 -07:00
|
|
|
return false;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2019-04-03 22:31:12 -07:00
|
|
|
if (CurrentState != State::Disassembled)
|
2015-10-09 17:21:14 -07:00
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
assert(BasicBlocks.empty() && "basic block list should be empty");
|
|
|
|
|
assert((Labels.find(0) != Labels.end()) &&
|
|
|
|
|
"first instruction should always have a label");
|
|
|
|
|
|
|
|
|
|
// Create basic blocks in the original layout order:
|
|
|
|
|
//
|
|
|
|
|
// * Every instruction with associated label marks
|
|
|
|
|
// the beginning of a basic block.
|
|
|
|
|
// * Conditional instruction marks the end of a basic block,
|
|
|
|
|
// except when the following instruction is an
|
|
|
|
|
// unconditional branch, and the unconditional branch is not
|
|
|
|
|
// a destination of another branch. In the latter case, the
|
|
|
|
|
// basic block will consist of a single unconditional branch
|
2017-10-12 14:57:11 -07:00
|
|
|
// (missed "double-jump" optimization).
|
2015-10-09 17:21:14 -07:00
|
|
|
//
|
|
|
|
|
// Created basic blocks are sorted in layout order since they are
|
|
|
|
|
// created in the same order as instructions, and instructions are
|
|
|
|
|
// sorted by offsets.
|
|
|
|
|
BinaryBasicBlock *InsertBB{nullptr};
|
|
|
|
|
BinaryBasicBlock *PrevBB{nullptr};
|
2016-07-13 18:57:40 -07:00
|
|
|
bool IsLastInstrNop{false};
|
2018-02-13 11:21:59 -08:00
|
|
|
uint64_t LastInstrOffset{0};
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
auto addCFIPlaceholders =
|
|
|
|
|
[this](uint64_t CFIOffset, BinaryBasicBlock *InsertBB) {
|
|
|
|
|
for (auto FI = OffsetToCFI.lower_bound(CFIOffset),
|
|
|
|
|
FE = OffsetToCFI.upper_bound(CFIOffset);
|
|
|
|
|
FI != FE; ++FI) {
|
|
|
|
|
addCFIPseudo(InsertBB, InsertBB->end(), FI->second);
|
|
|
|
|
}
|
|
|
|
|
};
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2018-02-13 11:21:59 -08:00
|
|
|
// For profiling purposes we need to save the offset of the last instruction
|
|
|
|
|
// in the basic block. But in certain cases we don't if the instruction was
|
|
|
|
|
// the last one, and we have to go back and update its offset.
|
|
|
|
|
auto updateOffset = [&](uint64_t Offset) {
|
|
|
|
|
assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
|
2021-04-08 00:19:26 -07:00
|
|
|
MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
|
2018-03-09 09:45:13 -08:00
|
|
|
if (PrevInstr && !MIB->hasAnnotation(*PrevInstr, "Offset"))
|
2019-07-12 07:25:50 -07:00
|
|
|
MIB->addAnnotation(*PrevInstr, "Offset", static_cast<uint32_t>(Offset),
|
|
|
|
|
AllocatorId);
|
2018-02-13 11:21:59 -08:00
|
|
|
};
|
|
|
|
|
|
2017-11-04 19:22:05 -07:00
|
|
|
for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint32_t Offset = I->first;
|
|
|
|
|
MCInst &Instr = I->second;
|
2016-07-13 18:57:40 -07:00
|
|
|
|
|
|
|
|
auto LI = Labels.find(Offset);
|
2015-10-09 17:21:14 -07:00
|
|
|
if (LI != Labels.end()) {
|
|
|
|
|
// Always create new BB at branch destination.
|
|
|
|
|
PrevBB = InsertBB;
|
2015-10-20 10:51:17 -07:00
|
|
|
InsertBB = addBasicBlock(LI->first, LI->second,
|
2017-11-07 15:42:28 -08:00
|
|
|
opts::PreserveBlocksAlignment && IsLastInstrNop);
|
2018-02-13 11:21:59 -08:00
|
|
|
if (PrevBB)
|
|
|
|
|
updateOffset(LastInstrOffset);
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
2019-05-16 12:46:32 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t InstrInputAddr = I->first + Address;
|
2019-05-16 12:46:32 -07:00
|
|
|
bool IsSDTMarker =
|
2019-05-17 07:58:27 -07:00
|
|
|
MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr);
|
2020-08-04 13:50:00 -07:00
|
|
|
bool IsLKMarker = BC.LKMarkers.count(InstrInputAddr);
|
|
|
|
|
if (IsSDTMarker || IsLKMarker) {
|
2019-05-16 12:46:32 -07:00
|
|
|
HasSDTMarker = true;
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "SDTMarker or LKMarker detected in the input at : "
|
|
|
|
|
<< utohexstr(InstrInputAddr) << "\n");
|
2019-11-03 21:57:15 -08:00
|
|
|
if (!MIB->hasAnnotation(Instr, "Offset")) {
|
|
|
|
|
MIB->addAnnotation(Instr, "Offset", static_cast<uint32_t>(Offset),
|
|
|
|
|
AllocatorId);
|
2019-07-12 07:25:50 -07:00
|
|
|
}
|
2019-05-17 07:58:27 -07:00
|
|
|
}
|
2019-05-16 12:46:32 -07:00
|
|
|
|
|
|
|
|
// Ignore nops except SDT markers. We use nops to derive alignment of the
|
|
|
|
|
// next basic block. It will not always work, as some blocks are naturally
|
|
|
|
|
// aligned, but it's just part of heuristic for block alignment.
|
2020-08-04 13:50:00 -07:00
|
|
|
if (MIB->isNoop(Instr) && !PreserveNops && !IsSDTMarker && !IsLKMarker) {
|
2016-01-19 00:20:06 -08:00
|
|
|
IsLastInstrNop = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
if (!InsertBB) {
|
2015-10-20 10:17:38 -07:00
|
|
|
// It must be a fallthrough or unreachable code. Create a new block unless
|
2017-10-12 14:57:11 -07:00
|
|
|
// we see an unconditional branch following a conditional one. The latter
|
|
|
|
|
// should not be a conditional tail call.
|
2015-10-09 17:21:14 -07:00
|
|
|
assert(PrevBB && "no previous basic block for a fall through");
|
2021-04-08 00:19:26 -07:00
|
|
|
MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr();
|
2015-10-20 10:17:38 -07:00
|
|
|
assert(PrevInstr && "no previous instruction for a fall through");
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isUnconditionalBranch(Instr) &&
|
|
|
|
|
!MIB->isUnconditionalBranch(*PrevInstr) &&
|
|
|
|
|
!MIB->getConditionalTailCall(*PrevInstr)) {
|
2015-10-09 17:21:14 -07:00
|
|
|
// Temporarily restore inserter basic block.
|
|
|
|
|
InsertBB = PrevBB;
|
|
|
|
|
} else {
|
2019-07-12 07:25:50 -07:00
|
|
|
MCSymbol *Label;
|
|
|
|
|
{
|
2019-08-07 16:09:50 -07:00
|
|
|
auto L = BC.scopeLock();
|
2020-12-01 16:29:39 -08:00
|
|
|
Label = BC.Ctx->createNamedTempSymbol("FT");
|
2019-07-12 07:25:50 -07:00
|
|
|
}
|
|
|
|
|
InsertBB = addBasicBlock(
|
|
|
|
|
Offset, Label, opts::PreserveBlocksAlignment && IsLastInstrNop);
|
2018-02-13 11:21:59 -08:00
|
|
|
updateOffset(LastInstrOffset);
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
}
|
2016-07-13 18:57:40 -07:00
|
|
|
if (Offset == 0) {
|
2015-11-08 12:23:54 -08:00
|
|
|
// Add associated CFI pseudos in the first offset (0)
|
|
|
|
|
addCFIPlaceholders(0, InsertBB);
|
|
|
|
|
}
|
2015-10-20 10:51:17 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const bool IsBlockEnd = MIB->isTerminator(Instr);
|
2018-03-09 09:45:13 -08:00
|
|
|
IsLastInstrNop = MIB->isNoop(Instr);
|
2018-02-13 11:21:59 -08:00
|
|
|
LastInstrOffset = Offset;
|
|
|
|
|
InsertBB->addInstruction(std::move(Instr));
|
2016-07-13 18:57:40 -07:00
|
|
|
|
2015-11-08 12:23:54 -08:00
|
|
|
// Add associated CFI instrs. We always add the CFI instruction that is
|
|
|
|
|
// located immediately after this instruction, since the next CFI
|
|
|
|
|
// instruction reflects the change in state caused by this instruction.
|
2016-08-29 21:11:22 -07:00
|
|
|
auto NextInstr = std::next(I);
|
2015-11-08 12:23:54 -08:00
|
|
|
uint64_t CFIOffset;
|
|
|
|
|
if (NextInstr != E)
|
|
|
|
|
CFIOffset = NextInstr->first;
|
|
|
|
|
else
|
|
|
|
|
CFIOffset = getSize();
|
2018-02-13 11:21:59 -08:00
|
|
|
|
|
|
|
|
// Note: this potentially invalidates instruction pointers/iterators.
|
2015-11-08 12:23:54 -08:00
|
|
|
addCFIPlaceholders(CFIOffset, InsertBB);
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2018-02-13 11:21:59 -08:00
|
|
|
if (IsBlockEnd) {
|
2015-10-09 17:21:14 -07:00
|
|
|
PrevBB = InsertBB;
|
|
|
|
|
InsertBB = nullptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-12 11:30:05 -07:00
|
|
|
if (BasicBlocks.empty()) {
|
|
|
|
|
setSimple(false);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
// Intermediate dump.
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(print(dbgs(), "after creating basic blocks"));
|
2015-10-09 17:21:14 -07:00
|
|
|
|
|
|
|
|
// TODO: handle properly calls to no-return functions,
|
|
|
|
|
// e.g. exit(3), etc. Otherwise we'll see a false fall-through
|
|
|
|
|
// blocks.
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (std::pair<uint32_t, uint32_t> &Branch : TakenBranches) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "registering branch [0x"
|
|
|
|
|
<< Twine::utohexstr(Branch.first) << "] -> [0x"
|
|
|
|
|
<< Twine::utohexstr(Branch.second) << "]\n");
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
|
|
|
|
|
BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
|
2018-06-20 12:03:24 -07:00
|
|
|
if (!FromBB || !ToBB) {
|
|
|
|
|
if (!FromBB)
|
|
|
|
|
errs() << "BOLT-ERROR: cannot find BB containing the branch.\n";
|
2019-10-21 15:57:36 -07:00
|
|
|
if (!ToBB)
|
2018-06-20 12:03:24 -07:00
|
|
|
errs() << "BOLT-ERROR: cannot find BB containing branch destination.\n";
|
|
|
|
|
BC.exitWithBugReport("disassembly failed - inconsistent branch found.",
|
|
|
|
|
*this);
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
FromBB->addSuccessor(ToBB);
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
// Add fall-through branches.
|
2015-10-09 17:21:14 -07:00
|
|
|
PrevBB = nullptr;
|
|
|
|
|
bool IsPrevFT = false; // Is previous block a fall-through.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
2015-10-09 17:21:14 -07:00
|
|
|
if (IsPrevFT) {
|
2017-11-28 09:57:21 -08:00
|
|
|
PrevBB->addSuccessor(BB);
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
2016-06-07 16:27:52 -07:00
|
|
|
if (BB->empty()) {
|
2015-10-20 10:51:17 -07:00
|
|
|
IsPrevFT = true;
|
2016-06-07 16:27:52 -07:00
|
|
|
PrevBB = BB;
|
2015-11-08 12:23:54 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
MCInst *LastInstr = BB->getLastNonPseudoInstr();
|
2017-10-23 23:32:40 -07:00
|
|
|
assert(LastInstr &&
|
|
|
|
|
"should have non-pseudo instruction in non-empty block");
|
2016-07-13 18:57:40 -07:00
|
|
|
|
2016-06-07 16:27:52 -07:00
|
|
|
if (BB->succ_size() == 0) {
|
2017-11-28 09:57:21 -08:00
|
|
|
// Since there's no existing successors, we know the last instruction is
|
|
|
|
|
// not a conditional branch. Thus if it's a terminator, it shouldn't be a
|
|
|
|
|
// fall-through.
|
|
|
|
|
//
|
|
|
|
|
// Conditional tail call is a special case since we don't add a taken
|
|
|
|
|
// branch successor for it.
|
2018-03-09 09:45:13 -08:00
|
|
|
IsPrevFT = !MIB->isTerminator(*LastInstr) ||
|
|
|
|
|
MIB->getConditionalTailCall(*LastInstr);
|
2016-06-07 16:27:52 -07:00
|
|
|
} else if (BB->succ_size() == 1) {
|
2018-03-09 09:45:13 -08:00
|
|
|
IsPrevFT = MIB->isConditionalBranch(*LastInstr);
|
2015-10-09 17:21:14 -07:00
|
|
|
} else {
|
|
|
|
|
IsPrevFT = false;
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-07 16:27:52 -07:00
|
|
|
PrevBB = BB;
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!IsPrevFT) {
|
|
|
|
|
// Possibly a call that does not return.
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "last block was marked as a fall-through in " << *this
|
|
|
|
|
<< '\n');
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
2019-07-02 10:48:43 -07:00
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
// Assign landing pads and throwers info.
|
2017-10-26 18:36:30 -07:00
|
|
|
recomputeLandingPads();
|
2016-05-24 09:26:25 -07:00
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
// Assign CFI information to each BB entry.
|
|
|
|
|
annotateCFIState();
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
// Annotate invoke instructions with GNU_args_size data.
|
2019-07-12 07:25:50 -07:00
|
|
|
propagateGnuArgsSizeInfo(AllocatorId);
|
2017-11-28 09:57:21 -08:00
|
|
|
|
|
|
|
|
// Set the basic block layout to the original order and set end offsets.
|
2017-05-16 09:27:34 -07:00
|
|
|
PrevBB = nullptr;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
2016-07-13 18:57:40 -07:00
|
|
|
BasicBlocksLayout.emplace_back(BB);
|
2017-05-16 09:27:34 -07:00
|
|
|
if (PrevBB)
|
|
|
|
|
PrevBB->setEndOffset(BB->getOffset());
|
|
|
|
|
PrevBB = BB;
|
2016-07-13 18:57:40 -07:00
|
|
|
}
|
2017-05-16 09:27:34 -07:00
|
|
|
PrevBB->setEndOffset(getSize());
|
2016-07-13 18:57:40 -07:00
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
updateLayoutIndices();
|
2017-10-23 23:32:40 -07:00
|
|
|
|
2019-01-31 11:23:02 -08:00
|
|
|
normalizeCFIState();
|
|
|
|
|
|
2018-02-02 14:46:21 -08:00
|
|
|
// Clean-up memory taken by intermediate structures.
|
|
|
|
|
//
|
|
|
|
|
// NB: don't clear Labels list as we may need them if we mark the function
|
|
|
|
|
// as non-simple later in the process of discovering extra entry points.
|
|
|
|
|
clearList(Instructions);
|
|
|
|
|
clearList(OffsetToCFI);
|
|
|
|
|
clearList(TakenBranches);
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
// Update the state.
|
|
|
|
|
CurrentState = State::CFG;
|
2016-08-22 14:24:09 -07:00
|
|
|
|
2019-05-22 11:26:58 -07:00
|
|
|
// Make any necessary adjustments for indirect branches.
|
2019-07-12 07:25:50 -07:00
|
|
|
if (!postProcessIndirectBranches(AllocatorId)) {
|
2019-05-22 11:26:58 -07:00
|
|
|
if (opts::Verbosity) {
|
|
|
|
|
errs() << "BOLT-WARNING: failed to post-process indirect branches for "
|
|
|
|
|
<< *this << '\n';
|
|
|
|
|
}
|
|
|
|
|
// In relocation mode we want to keep processing the function but avoid
|
|
|
|
|
// optimizing it.
|
|
|
|
|
setSimple(false);
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
clearList(ExternallyReferencedOffsets);
|
|
|
|
|
clearList(UnknownIndirectBranchOffsets);
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
return true;
|
|
|
|
|
}
|
2017-03-03 11:35:41 -08:00
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
void BinaryFunction::postProcessCFG() {
|
|
|
|
|
if (isSimple() && !BasicBlocks.empty()) {
|
|
|
|
|
// Convert conditional tail call branches to conditional branches that jump
|
|
|
|
|
// to a tail call.
|
|
|
|
|
removeConditionalTailCalls();
|
|
|
|
|
|
2019-05-22 11:26:58 -07:00
|
|
|
postProcessProfile();
|
2017-11-28 09:57:21 -08:00
|
|
|
|
2019-05-22 11:26:58 -07:00
|
|
|
// Eliminate inconsistencies between branch instructions and CFG.
|
|
|
|
|
postProcessBranches();
|
2017-11-28 09:57:21 -08:00
|
|
|
}
|
2017-08-02 10:59:33 -07:00
|
|
|
|
2019-05-22 11:26:58 -07:00
|
|
|
calculateMacroOpFusionStats();
|
|
|
|
|
|
2018-02-02 14:46:21 -08:00
|
|
|
// The final cleanup of intermediate structures.
|
2017-04-18 23:32:11 -07:00
|
|
|
clearList(IgnoredBranches);
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2019-11-03 21:57:15 -08:00
|
|
|
// Remove "Offset" annotations, unless we need an address-translation table
|
|
|
|
|
// later. This has no cost, since annotations are allocated by a bumpptr
|
|
|
|
|
// allocator and won't be released anyway until late in the pipeline.
|
|
|
|
|
if (!requiresAddressTranslation() && !opts::Instrument)
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : layout())
|
|
|
|
|
for (MCInst &Inst : *BB)
|
2019-04-12 17:33:46 -07:00
|
|
|
BC.MIB->removeAnnotation(Inst, "Offset");
|
2017-02-27 21:44:38 -08:00
|
|
|
|
2018-04-13 15:46:19 -07:00
|
|
|
assert((!isSimple() || validateCFG()) &&
|
|
|
|
|
"invalid CFG detected after post-processing");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::calculateMacroOpFusionStats() {
|
|
|
|
|
if (!getBinaryContext().isX86())
|
|
|
|
|
return;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : layout()) {
|
2018-04-13 15:46:19 -07:00
|
|
|
auto II = BB->getMacroOpFusionPair();
|
|
|
|
|
if (II == BB->end())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Check offset of the second instruction.
|
|
|
|
|
// FIXME: arch-specific.
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint32_t Offset =
|
|
|
|
|
BC.MIB->getAnnotationWithDefault<uint32_t>(*std::next(II), "Offset", 0);
|
2018-04-13 15:46:19 -07:00
|
|
|
if (!Offset || (getAddress() + Offset) % 64)
|
|
|
|
|
continue;
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
|
|
|
|
|
<< Twine::utohexstr(getAddress() + Offset)
|
|
|
|
|
<< " in function " << *this << "; executed "
|
|
|
|
|
<< BB->getKnownExecutionCount() << " times.\n");
|
2018-04-13 15:46:19 -07:00
|
|
|
++BC.MissedMacroFusionPairs;
|
|
|
|
|
BC.MissedMacroFusionExecCount += BB->getKnownExecutionCount();
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
|
2017-08-02 10:59:33 -07:00
|
|
|
void BinaryFunction::removeTagsFromProfile() {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
2017-08-02 10:59:33 -07:00
|
|
|
if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
|
|
|
BB->ExecutionCount = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
|
2017-08-02 10:59:33 -07:00
|
|
|
if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
|
|
|
|
|
BI.MispredictedCount != BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
|
|
|
continue;
|
|
|
|
|
BI.Count = 0;
|
|
|
|
|
BI.MispredictedCount = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
void BinaryFunction::removeConditionalTailCalls() {
|
2017-10-23 23:32:40 -07:00
|
|
|
// Blocks to be appended at the end.
|
|
|
|
|
std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
|
|
|
|
|
|
|
|
|
|
for (auto BBI = begin(); BBI != end(); ++BBI) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock &BB = *BBI;
|
|
|
|
|
MCInst *CTCInstr = BB.getLastNonPseudoInstr();
|
2017-10-23 23:32:40 -07:00
|
|
|
if (!CTCInstr)
|
2016-07-13 18:57:40 -07:00
|
|
|
continue;
|
2017-10-23 23:32:40 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
Optional<uint64_t> TargetAddressOrNone =
|
|
|
|
|
BC.MIB->getConditionalTailCall(*CTCInstr);
|
2017-10-23 23:32:40 -07:00
|
|
|
if (!TargetAddressOrNone)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Gather all necessary information about CTC instruction before
|
|
|
|
|
// annotations are destroyed.
|
2021-04-08 00:19:26 -07:00
|
|
|
const int32_t CFIStateBeforeCTC = BB.getCFIStateAtInstr(CTCInstr);
|
2017-10-23 23:32:40 -07:00
|
|
|
uint64_t CTCTakenCount = BinaryBasicBlock::COUNT_NO_PROFILE;
|
|
|
|
|
uint64_t CTCMispredCount = BinaryBasicBlock::COUNT_NO_PROFILE;
|
|
|
|
|
if (hasValidProfile()) {
|
2020-07-16 17:35:55 -07:00
|
|
|
CTCTakenCount =
|
|
|
|
|
BC.MIB->getAnnotationWithDefault<uint64_t>(*CTCInstr, "CTCTakenCount");
|
|
|
|
|
CTCMispredCount =
|
|
|
|
|
BC.MIB->getAnnotationWithDefault<uint64_t>(*CTCInstr,
|
|
|
|
|
"CTCMispredCount");
|
2016-07-13 18:57:40 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Assert that the tail call does not throw.
|
[BOLT][Refactoring] Isolate changes to MC layer
Summary:
Changes that we made to MCInst, MCOperand, MCExpr, etc. are now all
moved into tools/llvm-bolt. That required a change to the way we handle
annotations and any extra operands for MCInst.
Any MCPlus information is now attached via an extra operand of type
MCInst with an opcode ANNOTATION_LABEL. Since this operand is MCInst, we
attach extra info as operands to this instruction. For first-level
annotations use functions to access the information, such as
getConditionalTailCall() or getEHInfo(), etc. For the rest, optional or
second-class annotations, use a general named-annotation interface such
as getAnnotationAs<uint64_t>(Inst, "Count").
I did a test on HHVM binary, and a memory consumption went down a little
bit while the runtime remained the same.
(cherry picked from FBD7405412)
2018-03-19 18:32:12 -07:00
|
|
|
assert(!BC.MIB->getEHInfo(*CTCInstr) &&
|
|
|
|
|
"found tail call with associated landing pad");
|
2016-07-13 18:57:40 -07:00
|
|
|
|
2017-10-23 23:32:40 -07:00
|
|
|
// Create a basic block with an unconditional tail call instruction using
|
|
|
|
|
// the same destination.
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCSymbol *CTCTargetLabel = BC.MIB->getTargetSymbol(*CTCInstr);
|
2017-10-23 23:32:40 -07:00
|
|
|
assert(CTCTargetLabel && "symbol expected for conditional tail call");
|
|
|
|
|
MCInst TailCallInstr;
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->createTailCall(TailCallInstr, CTCTargetLabel, BC.Ctx.get());
|
2019-04-12 17:33:46 -07:00
|
|
|
// Link new BBs to the original input offset of the BB where the CTC
|
|
|
|
|
// is, so we can map samples recorded in new BBs back to the original BB
|
|
|
|
|
// seem in the input binary (if using BAT)
|
2021-04-08 00:19:26 -07:00
|
|
|
std::unique_ptr<BinaryBasicBlock> TailCallBB = createBasicBlock(
|
|
|
|
|
BB.getInputOffset(), BC.Ctx->createNamedTempSymbol("TC"));
|
2017-10-23 23:32:40 -07:00
|
|
|
TailCallBB->addInstruction(TailCallInstr);
|
|
|
|
|
TailCallBB->setCFIState(CFIStateBeforeCTC);
|
2016-08-29 21:11:22 -07:00
|
|
|
|
2017-10-23 23:32:40 -07:00
|
|
|
// Add CFG edge with profile info from BB to TailCallBB.
|
|
|
|
|
BB.addSuccessor(TailCallBB.get(), CTCTakenCount, CTCMispredCount);
|
2016-07-13 18:57:40 -07:00
|
|
|
|
|
|
|
|
// Add execution count for the block.
|
2017-10-23 23:32:40 -07:00
|
|
|
TailCallBB->setExecutionCount(CTCTakenCount);
|
|
|
|
|
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->convertTailCallToJmp(*CTCInstr);
|
2017-11-22 16:17:36 -08:00
|
|
|
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
|
2018-02-13 11:21:59 -08:00
|
|
|
BC.Ctx.get());
|
2017-10-23 23:32:40 -07:00
|
|
|
|
2018-02-13 11:21:59 -08:00
|
|
|
// Add basic block to the list that will be added to the end.
|
|
|
|
|
NewBlocks.emplace_back(std::move(TailCallBB));
|
|
|
|
|
|
|
|
|
|
// Swap edges as the TailCallBB corresponds to the taken branch.
|
|
|
|
|
BB.swapConditionalSuccessors();
|
2017-11-28 09:57:21 -08:00
|
|
|
|
|
|
|
|
// This branch is no longer a conditional tail call.
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->unsetConditionalTailCall(*CTCInstr);
|
2016-07-13 18:57:40 -07:00
|
|
|
}
|
2017-10-23 23:32:40 -07:00
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
insertBasicBlocks(std::prev(end()),
|
|
|
|
|
std::move(NewBlocks),
|
2017-10-23 23:32:40 -07:00
|
|
|
/* UpdateLayout */ true,
|
|
|
|
|
/* UpdateCFIState */ false);
|
2016-07-13 18:57:40 -07:00
|
|
|
}
|
|
|
|
|
|
2017-12-07 15:00:41 -08:00
|
|
|
uint64_t BinaryFunction::getFunctionScore() const {
|
2015-11-19 17:59:41 -08:00
|
|
|
if (FunctionScore != -1)
|
|
|
|
|
return FunctionScore;
|
|
|
|
|
|
2017-12-13 23:12:01 -08:00
|
|
|
if (!isSimple() || !hasValidProfile()) {
|
|
|
|
|
FunctionScore = 0;
|
|
|
|
|
return FunctionScore;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-13 15:27:59 -08:00
|
|
|
uint64_t TotalScore = 0ULL;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : layout()) {
|
2015-11-13 15:27:59 -08:00
|
|
|
uint64_t BBExecCount = BB->getExecutionCount();
|
|
|
|
|
if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
|
|
|
continue;
|
|
|
|
|
TotalScore += BBExecCount;
|
|
|
|
|
}
|
2015-11-19 17:59:41 -08:00
|
|
|
FunctionScore = TotalScore;
|
|
|
|
|
return FunctionScore;
|
2015-11-13 15:27:59 -08:00
|
|
|
}
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
void BinaryFunction::annotateCFIState() {
|
|
|
|
|
assert(CurrentState == State::Disassembled && "unexpected function state");
|
2015-11-08 12:23:54 -08:00
|
|
|
assert(!BasicBlocks.empty() && "basic block list should not be empty");
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
// This is an index of the last processed CFI in FDE CFI program.
|
2017-10-23 23:32:40 -07:00
|
|
|
uint32_t State = 0;
|
2017-02-24 21:59:33 -08:00
|
|
|
|
|
|
|
|
// This is an index of RememberState CFI reflecting effective state right
|
|
|
|
|
// after execution of RestoreState CFI.
|
|
|
|
|
//
|
|
|
|
|
// It differs from State iff the CFI at (State-1)
|
|
|
|
|
// was RestoreState (modulo GNU_args_size CFIs, which are ignored).
|
|
|
|
|
//
|
|
|
|
|
// This allows us to generate shorter replay sequences when producing new
|
|
|
|
|
// CFI programs.
|
2017-10-23 23:32:40 -07:00
|
|
|
uint32_t EffectiveState = 0;
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
// For tracking RememberState/RestoreState sequences.
|
2017-10-23 23:32:40 -07:00
|
|
|
std::stack<uint32_t> StateStack;
|
2017-02-24 21:59:33 -08:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
2017-02-24 21:59:33 -08:00
|
|
|
BB->setCFIState(EffectiveState);
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const MCInst &Instr : *BB) {
|
|
|
|
|
const MCCFIInstruction *CFI = getCFIFor(Instr);
|
2017-02-24 21:59:33 -08:00
|
|
|
if (!CFI)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
++State;
|
|
|
|
|
|
2017-10-23 23:32:40 -07:00
|
|
|
switch (CFI->getOperation()) {
|
|
|
|
|
case MCCFIInstruction::OpRememberState:
|
2017-02-24 21:59:33 -08:00
|
|
|
StateStack.push(EffectiveState);
|
2017-11-13 11:05:47 -08:00
|
|
|
EffectiveState = State;
|
2017-10-23 23:32:40 -07:00
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpRestoreState:
|
2017-02-24 21:59:33 -08:00
|
|
|
assert(!StateStack.empty() && "corrupt CFI stack");
|
|
|
|
|
EffectiveState = StateStack.top();
|
2015-11-08 12:23:54 -08:00
|
|
|
StateStack.pop();
|
2017-10-23 23:32:40 -07:00
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpGnuArgsSize:
|
2017-02-24 21:59:33 -08:00
|
|
|
// OpGnuArgsSize CFIs do not affect the CFI state.
|
2017-10-23 23:32:40 -07:00
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
// Any other CFI updates the state.
|
2017-02-24 21:59:33 -08:00
|
|
|
EffectiveState = State;
|
2017-10-23 23:32:40 -07:00
|
|
|
break;
|
2016-09-07 18:59:23 -07:00
|
|
|
}
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
assert(StateStack.empty() && "corrupt CFI stack");
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
|
|
|
|
|
2018-09-05 14:36:52 -07:00
|
|
|
namespace {
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2018-09-05 14:36:52 -07:00
|
|
|
/// Our full interpretation of a DWARF CFI machine state at a given point
|
|
|
|
|
struct CFISnapshot {
|
|
|
|
|
/// CFA register number and offset defining the canonical frame at this
|
|
|
|
|
/// point, or the number of a rule (CFI state) that computes it with a
|
|
|
|
|
/// DWARF expression. This number will be negative if it refers to a CFI
|
|
|
|
|
/// located in the CIE instead of the FDE.
|
|
|
|
|
uint32_t CFAReg;
|
|
|
|
|
int32_t CFAOffset;
|
|
|
|
|
int32_t CFARule;
|
|
|
|
|
/// Mapping of rules (CFI states) that define the location of each
|
|
|
|
|
/// register. If absent, no rule defining the location of such register
|
|
|
|
|
/// was ever read. This number will be negative if it refers to a CFI
|
|
|
|
|
/// located in the CIE instead of the FDE.
|
|
|
|
|
DenseMap<int32_t, int32_t> RegRule;
|
|
|
|
|
|
|
|
|
|
/// References to CIE, FDE and expanded instructions after a restore state
|
|
|
|
|
const std::vector<MCCFIInstruction> &CIE;
|
|
|
|
|
const std::vector<MCCFIInstruction> &FDE;
|
|
|
|
|
const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents;
|
|
|
|
|
|
|
|
|
|
/// Current FDE CFI number representing the state where the snapshot is at
|
|
|
|
|
int32_t CurState;
|
|
|
|
|
|
|
|
|
|
/// Used when we don't have information about which state/rule to apply
|
|
|
|
|
/// to recover the location of either the CFA or a specific register
|
|
|
|
|
constexpr static int32_t UNKNOWN = std::numeric_limits<int32_t>::min();
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
/// Update our snapshot by executing a single CFI
|
|
|
|
|
void update(const MCCFIInstruction &Instr, int32_t RuleNumber) {
|
|
|
|
|
switch (Instr.getOperation()) {
|
|
|
|
|
case MCCFIInstruction::OpSameValue:
|
|
|
|
|
case MCCFIInstruction::OpRelOffset:
|
|
|
|
|
case MCCFIInstruction::OpOffset:
|
|
|
|
|
case MCCFIInstruction::OpRestore:
|
|
|
|
|
case MCCFIInstruction::OpUndefined:
|
|
|
|
|
case MCCFIInstruction::OpRegister:
|
|
|
|
|
RegRule[Instr.getRegister()] = RuleNumber;
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpDefCfaRegister:
|
|
|
|
|
CFAReg = Instr.getRegister();
|
|
|
|
|
CFARule = UNKNOWN;
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpDefCfaOffset:
|
|
|
|
|
CFAOffset = Instr.getOffset();
|
|
|
|
|
CFARule = UNKNOWN;
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpDefCfa:
|
|
|
|
|
CFAReg = Instr.getRegister();
|
|
|
|
|
CFAOffset = Instr.getOffset();
|
|
|
|
|
CFARule = UNKNOWN;
|
|
|
|
|
break;
|
2020-12-01 16:29:39 -08:00
|
|
|
case MCCFIInstruction::OpEscape: {
|
|
|
|
|
Optional<uint8_t> Reg = readDWARFExpressionTargetReg(Instr.getValues());
|
|
|
|
|
// Handle DW_CFA_def_cfa_expression
|
|
|
|
|
if (!Reg) {
|
|
|
|
|
CFARule = RuleNumber;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
RegRule[*Reg] = RuleNumber;
|
2018-09-05 14:36:52 -07:00
|
|
|
break;
|
2020-12-01 16:29:39 -08:00
|
|
|
}
|
2018-09-05 14:36:52 -07:00
|
|
|
case MCCFIInstruction::OpAdjustCfaOffset:
|
|
|
|
|
case MCCFIInstruction::OpWindowSave:
|
2020-12-01 16:29:39 -08:00
|
|
|
case MCCFIInstruction::OpNegateRAState:
|
2018-09-05 14:36:52 -07:00
|
|
|
llvm_unreachable("unsupported CFI opcode");
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpRememberState:
|
|
|
|
|
case MCCFIInstruction::OpRestoreState:
|
|
|
|
|
case MCCFIInstruction::OpGnuArgsSize:
|
|
|
|
|
// do not affect CFI state
|
|
|
|
|
break;
|
2017-02-24 21:59:33 -08:00
|
|
|
}
|
2018-09-05 14:36:52 -07:00
|
|
|
}
|
2016-01-16 14:58:22 -08:00
|
|
|
|
2018-09-05 14:36:52 -07:00
|
|
|
public:
|
|
|
|
|
/// Advance state reading FDE CFI instructions up to State number
|
|
|
|
|
void advanceTo(int32_t State) {
|
|
|
|
|
for (int32_t I = CurState, E = State; I != E; ++I) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCCFIInstruction &Instr = FDE[I];
|
2018-09-05 14:36:52 -07:00
|
|
|
if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
|
|
|
|
|
update(Instr, I);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// If restore state instruction, fetch the equivalent CFIs that have
|
|
|
|
|
// the same effect of this restore. This is used to ensure remember-
|
|
|
|
|
// restore pairs are completely removed.
|
|
|
|
|
auto Iter = FrameRestoreEquivalents.find(I);
|
|
|
|
|
if (Iter == FrameRestoreEquivalents.end())
|
|
|
|
|
continue;
|
|
|
|
|
for (int32_t RuleNumber : Iter->second) {
|
|
|
|
|
update(FDE[RuleNumber], RuleNumber);
|
2017-02-24 21:59:33 -08:00
|
|
|
}
|
|
|
|
|
}
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2018-09-05 14:36:52 -07:00
|
|
|
assert(((CFAReg != (uint32_t)UNKNOWN && CFAOffset != UNKNOWN) ||
|
|
|
|
|
CFARule != UNKNOWN) &&
|
|
|
|
|
"CIE did not define default CFA?");
|
|
|
|
|
|
|
|
|
|
CurState = State;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Interpret all CIE and FDE instructions up until CFI State number and
|
|
|
|
|
/// populate this snapshot
|
|
|
|
|
CFISnapshot(
|
|
|
|
|
const std::vector<MCCFIInstruction> &CIE,
|
|
|
|
|
const std::vector<MCCFIInstruction> &FDE,
|
|
|
|
|
const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
|
|
|
|
|
int32_t State)
|
|
|
|
|
: CIE(CIE), FDE(FDE), FrameRestoreEquivalents(FrameRestoreEquivalents) {
|
|
|
|
|
CFAReg = UNKNOWN;
|
|
|
|
|
CFAOffset = UNKNOWN;
|
|
|
|
|
CFARule = UNKNOWN;
|
|
|
|
|
CurState = 0;
|
|
|
|
|
|
|
|
|
|
for (int32_t I = 0, E = CIE.size(); I != E; ++I) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCCFIInstruction &Instr = CIE[I];
|
2018-09-05 14:36:52 -07:00
|
|
|
update(Instr, -I);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
advanceTo(State);
|
|
|
|
|
}
|
2020-07-16 17:35:55 -07:00
|
|
|
|
2018-09-05 14:36:52 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/// A CFI snapshot with the capability of checking if incremental additions to
|
|
|
|
|
/// it are redundant. This is used to ensure we do not emit two CFI instructions
|
|
|
|
|
/// back-to-back that are doing the same state change, or to avoid emitting a
|
|
|
|
|
/// CFI at all when the state at that point would not be modified after that CFI
|
|
|
|
|
struct CFISnapshotDiff : public CFISnapshot {
|
|
|
|
|
bool RestoredCFAReg{false};
|
|
|
|
|
bool RestoredCFAOffset{false};
|
|
|
|
|
DenseMap<int32_t, bool> RestoredRegs;
|
|
|
|
|
|
|
|
|
|
CFISnapshotDiff(const CFISnapshot &S) : CFISnapshot(S) {}
|
|
|
|
|
|
|
|
|
|
CFISnapshotDiff(
|
|
|
|
|
const std::vector<MCCFIInstruction> &CIE,
|
|
|
|
|
const std::vector<MCCFIInstruction> &FDE,
|
|
|
|
|
const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents,
|
|
|
|
|
int32_t State)
|
|
|
|
|
: CFISnapshot(CIE, FDE, FrameRestoreEquivalents, State) {}
|
|
|
|
|
|
|
|
|
|
/// Return true if applying Instr to this state is redundant and can be
|
|
|
|
|
/// dismissed.
|
|
|
|
|
bool isRedundant(const MCCFIInstruction &Instr) {
|
|
|
|
|
switch (Instr.getOperation()) {
|
|
|
|
|
case MCCFIInstruction::OpSameValue:
|
|
|
|
|
case MCCFIInstruction::OpRelOffset:
|
|
|
|
|
case MCCFIInstruction::OpOffset:
|
|
|
|
|
case MCCFIInstruction::OpRestore:
|
|
|
|
|
case MCCFIInstruction::OpUndefined:
|
|
|
|
|
case MCCFIInstruction::OpRegister:
|
2020-12-01 16:29:39 -08:00
|
|
|
case MCCFIInstruction::OpEscape: {
|
|
|
|
|
uint32_t Reg;
|
|
|
|
|
if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
|
|
|
|
|
Reg = Instr.getRegister();
|
|
|
|
|
} else {
|
|
|
|
|
Optional<uint8_t> R = readDWARFExpressionTargetReg(Instr.getValues());
|
|
|
|
|
// Handle DW_CFA_def_cfa_expression
|
|
|
|
|
if (!R) {
|
|
|
|
|
if (RestoredCFAReg && RestoredCFAOffset)
|
|
|
|
|
return true;
|
|
|
|
|
RestoredCFAReg = true;
|
|
|
|
|
RestoredCFAOffset = true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
Reg = *R;
|
|
|
|
|
}
|
|
|
|
|
if (RestoredRegs[Reg])
|
2018-09-05 14:36:52 -07:00
|
|
|
return true;
|
2020-12-01 16:29:39 -08:00
|
|
|
RestoredRegs[Reg] = true;
|
2018-09-05 14:36:52 -07:00
|
|
|
const int32_t CurRegRule =
|
2020-12-01 16:29:39 -08:00
|
|
|
RegRule.find(Reg) != RegRule.end() ? RegRule[Reg] : UNKNOWN;
|
2018-09-05 14:36:52 -07:00
|
|
|
if (CurRegRule == UNKNOWN) {
|
|
|
|
|
if (Instr.getOperation() == MCCFIInstruction::OpRestore ||
|
|
|
|
|
Instr.getOperation() == MCCFIInstruction::OpSameValue)
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
const MCCFIInstruction &LastDef =
|
|
|
|
|
CurRegRule < 0 ? CIE[-CurRegRule] : FDE[CurRegRule];
|
|
|
|
|
return LastDef == Instr;
|
|
|
|
|
}
|
|
|
|
|
case MCCFIInstruction::OpDefCfaRegister:
|
|
|
|
|
if (RestoredCFAReg)
|
|
|
|
|
return true;
|
|
|
|
|
RestoredCFAReg = true;
|
|
|
|
|
return CFAReg == Instr.getRegister();
|
|
|
|
|
case MCCFIInstruction::OpDefCfaOffset:
|
|
|
|
|
if (RestoredCFAOffset)
|
|
|
|
|
return true;
|
|
|
|
|
RestoredCFAOffset = true;
|
|
|
|
|
return CFAOffset == Instr.getOffset();
|
|
|
|
|
case MCCFIInstruction::OpDefCfa:
|
|
|
|
|
if (RestoredCFAReg && RestoredCFAOffset)
|
|
|
|
|
return true;
|
|
|
|
|
RestoredCFAReg = true;
|
|
|
|
|
RestoredCFAOffset = true;
|
|
|
|
|
return CFAReg == Instr.getRegister() && CFAOffset == Instr.getOffset();
|
|
|
|
|
case MCCFIInstruction::OpAdjustCfaOffset:
|
|
|
|
|
case MCCFIInstruction::OpWindowSave:
|
2020-12-01 16:29:39 -08:00
|
|
|
case MCCFIInstruction::OpNegateRAState:
|
2018-09-05 14:36:52 -07:00
|
|
|
llvm_unreachable("unsupported CFI opcode");
|
|
|
|
|
return false;
|
|
|
|
|
case MCCFIInstruction::OpRememberState:
|
|
|
|
|
case MCCFIInstruction::OpRestoreState:
|
|
|
|
|
case MCCFIInstruction::OpGnuArgsSize:
|
|
|
|
|
// do not affect CFI state
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
|
|
bool BinaryFunction::replayCFIInstrs(int32_t FromState, int32_t ToState,
|
|
|
|
|
BinaryBasicBlock *InBB,
|
|
|
|
|
BinaryBasicBlock::iterator InsertIt) {
|
|
|
|
|
if (FromState == ToState)
|
2017-02-24 21:59:33 -08:00
|
|
|
return true;
|
2018-09-05 14:36:52 -07:00
|
|
|
assert(FromState < ToState && "can only replay CFIs forward");
|
|
|
|
|
|
|
|
|
|
CFISnapshotDiff CFIDiff(CIEFrameInstructions, FrameInstructions,
|
|
|
|
|
FrameRestoreEquivalents, FromState);
|
|
|
|
|
|
|
|
|
|
std::vector<uint32_t> NewCFIs;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (int32_t CurState = FromState; CurState < ToState; ++CurState) {
|
2018-09-05 14:36:52 -07:00
|
|
|
MCCFIInstruction *Instr = &FrameInstructions[CurState];
|
|
|
|
|
if (Instr->getOperation() == MCCFIInstruction::OpRestoreState) {
|
|
|
|
|
auto Iter = FrameRestoreEquivalents.find(CurState);
|
|
|
|
|
assert(Iter != FrameRestoreEquivalents.end());
|
2020-07-16 17:35:55 -07:00
|
|
|
NewCFIs.insert(NewCFIs.end(), Iter->second.begin(),
|
|
|
|
|
Iter->second.end());
|
2018-09-05 14:36:52 -07:00
|
|
|
// RestoreState / Remember will be filtered out later by CFISnapshotDiff,
|
|
|
|
|
// so we might as well fall-through here.
|
|
|
|
|
}
|
|
|
|
|
NewCFIs.push_back(CurState);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Replay instructions while avoiding duplicates
|
|
|
|
|
for (auto I = NewCFIs.rbegin(), E = NewCFIs.rend(); I != E; ++I) {
|
|
|
|
|
if (CFIDiff.isRedundant(FrameInstructions[*I]))
|
|
|
|
|
continue;
|
|
|
|
|
InsertIt = addCFIPseudo(InBB, InsertIt, *I);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SmallVector<int32_t, 4>
|
|
|
|
|
BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
|
|
|
|
|
BinaryBasicBlock *InBB,
|
|
|
|
|
BinaryBasicBlock::iterator &InsertIt) {
|
|
|
|
|
SmallVector<int32_t, 4> NewStates;
|
|
|
|
|
|
|
|
|
|
CFISnapshot ToCFITable(CIEFrameInstructions, FrameInstructions,
|
2020-07-16 17:35:55 -07:00
|
|
|
FrameRestoreEquivalents, ToState);
|
2018-09-05 14:36:52 -07:00
|
|
|
CFISnapshotDiff FromCFITable(ToCFITable);
|
|
|
|
|
FromCFITable.advanceTo(FromState);
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
auto undoStateDefCfa = [&]() {
|
|
|
|
|
if (ToCFITable.CFARule == CFISnapshot::UNKNOWN) {
|
|
|
|
|
FrameInstructions.emplace_back(MCCFIInstruction::cfiDefCfa(
|
|
|
|
|
nullptr, ToCFITable.CFAReg, ToCFITable.CFAOffset));
|
|
|
|
|
if (FromCFITable.isRedundant(FrameInstructions.back())) {
|
|
|
|
|
FrameInstructions.pop_back();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
NewStates.push_back(FrameInstructions.size() - 1);
|
|
|
|
|
InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
|
|
|
|
|
++InsertIt;
|
|
|
|
|
} else if (ToCFITable.CFARule < 0) {
|
|
|
|
|
if (FromCFITable.isRedundant(CIEFrameInstructions[-ToCFITable.CFARule]))
|
|
|
|
|
return;
|
|
|
|
|
NewStates.push_back(FrameInstructions.size());
|
|
|
|
|
InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
|
|
|
|
|
++InsertIt;
|
|
|
|
|
FrameInstructions.emplace_back(CIEFrameInstructions[-ToCFITable.CFARule]);
|
|
|
|
|
} else if (!FromCFITable.isRedundant(
|
|
|
|
|
FrameInstructions[ToCFITable.CFARule])) {
|
|
|
|
|
NewStates.push_back(ToCFITable.CFARule);
|
|
|
|
|
InsertIt = addCFIPseudo(InBB, InsertIt, ToCFITable.CFARule);
|
|
|
|
|
++InsertIt;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2018-09-05 14:36:52 -07:00
|
|
|
auto undoState = [&](const MCCFIInstruction &Instr) {
|
|
|
|
|
switch (Instr.getOperation()) {
|
|
|
|
|
case MCCFIInstruction::OpRememberState:
|
|
|
|
|
case MCCFIInstruction::OpRestoreState:
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpSameValue:
|
|
|
|
|
case MCCFIInstruction::OpRelOffset:
|
|
|
|
|
case MCCFIInstruction::OpOffset:
|
|
|
|
|
case MCCFIInstruction::OpRestore:
|
|
|
|
|
case MCCFIInstruction::OpUndefined:
|
2020-12-01 16:29:39 -08:00
|
|
|
case MCCFIInstruction::OpEscape:
|
|
|
|
|
case MCCFIInstruction::OpRegister: {
|
|
|
|
|
uint32_t Reg;
|
|
|
|
|
if (Instr.getOperation() != MCCFIInstruction::OpEscape) {
|
|
|
|
|
Reg = Instr.getRegister();
|
|
|
|
|
} else {
|
|
|
|
|
Optional<uint8_t> R = readDWARFExpressionTargetReg(Instr.getValues());
|
|
|
|
|
// Handle DW_CFA_def_cfa_expression
|
|
|
|
|
if (!R) {
|
|
|
|
|
undoStateDefCfa();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
Reg = *R;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ToCFITable.RegRule.find(Reg) == ToCFITable.RegRule.end()) {
|
2018-09-05 14:36:52 -07:00
|
|
|
FrameInstructions.emplace_back(
|
2020-12-01 16:29:39 -08:00
|
|
|
MCCFIInstruction::createRestore(nullptr, Reg));
|
2018-09-05 14:36:52 -07:00
|
|
|
if (FromCFITable.isRedundant(FrameInstructions.back())) {
|
|
|
|
|
FrameInstructions.pop_back();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
NewStates.push_back(FrameInstructions.size() - 1);
|
|
|
|
|
InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size() - 1);
|
|
|
|
|
++InsertIt;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2020-12-01 16:29:39 -08:00
|
|
|
const int32_t Rule = ToCFITable.RegRule[Reg];
|
2018-09-05 14:36:52 -07:00
|
|
|
if (Rule < 0) {
|
|
|
|
|
if (FromCFITable.isRedundant(CIEFrameInstructions[-Rule]))
|
|
|
|
|
break;
|
|
|
|
|
NewStates.push_back(FrameInstructions.size());
|
|
|
|
|
InsertIt = addCFIPseudo(InBB, InsertIt, FrameInstructions.size());
|
|
|
|
|
++InsertIt;
|
|
|
|
|
FrameInstructions.emplace_back(CIEFrameInstructions[-Rule]);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (FromCFITable.isRedundant(FrameInstructions[Rule]))
|
|
|
|
|
break;
|
|
|
|
|
NewStates.push_back(Rule);
|
|
|
|
|
InsertIt = addCFIPseudo(InBB, InsertIt, Rule);
|
|
|
|
|
++InsertIt;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case MCCFIInstruction::OpDefCfaRegister:
|
|
|
|
|
case MCCFIInstruction::OpDefCfaOffset:
|
|
|
|
|
case MCCFIInstruction::OpDefCfa:
|
2020-12-01 16:29:39 -08:00
|
|
|
undoStateDefCfa();
|
2018-09-05 14:36:52 -07:00
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpAdjustCfaOffset:
|
|
|
|
|
case MCCFIInstruction::OpWindowSave:
|
2020-12-01 16:29:39 -08:00
|
|
|
case MCCFIInstruction::OpNegateRAState:
|
2018-09-05 14:36:52 -07:00
|
|
|
llvm_unreachable("unsupported CFI opcode");
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpGnuArgsSize:
|
|
|
|
|
// do not affect CFI state
|
|
|
|
|
break;
|
|
|
|
|
}
|
2017-02-24 21:59:33 -08:00
|
|
|
};
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2018-09-05 14:36:52 -07:00
|
|
|
// Undo all modifications from ToState to FromState
|
|
|
|
|
for (int32_t I = ToState, E = FromState; I != E; ++I) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCCFIInstruction &Instr = FrameInstructions[I];
|
2018-09-05 14:36:52 -07:00
|
|
|
if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
|
|
|
|
|
undoState(Instr);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
auto Iter = FrameRestoreEquivalents.find(I);
|
|
|
|
|
if (Iter == FrameRestoreEquivalents.end())
|
|
|
|
|
continue;
|
|
|
|
|
for (int32_t State : Iter->second)
|
|
|
|
|
undoState(FrameInstructions[State]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NewStates;
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-31 11:23:02 -08:00
|
|
|
void BinaryFunction::normalizeCFIState() {
|
2018-09-05 14:36:52 -07:00
|
|
|
// Reordering blocks with remember-restore state instructions can be specially
|
|
|
|
|
// tricky. When rewriting the CFI, we omit remember-restore state instructions
|
|
|
|
|
// entirely. For restore state, we build a map expanding each restore to the
|
|
|
|
|
// equivalent unwindCFIState sequence required at that point to achieve the
|
|
|
|
|
// same effect of the restore. All remember state are then just ignored.
|
2019-01-31 11:23:02 -08:00
|
|
|
std::stack<int32_t> Stack;
|
|
|
|
|
for (BinaryBasicBlock *CurBB : BasicBlocksLayout) {
|
2018-09-05 14:36:52 -07:00
|
|
|
for (auto II = CurBB->begin(); II != CurBB->end(); ++II) {
|
2020-12-01 16:29:39 -08:00
|
|
|
if (const MCCFIInstruction *CFI = getCFIFor(*II)) {
|
2018-09-05 14:36:52 -07:00
|
|
|
if (CFI->getOperation() == MCCFIInstruction::OpRememberState) {
|
|
|
|
|
Stack.push(II->getOperand(0).getImm());
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) {
|
|
|
|
|
const int32_t RememberState = Stack.top();
|
|
|
|
|
const int32_t CurState = II->getOperand(0).getImm();
|
|
|
|
|
FrameRestoreEquivalents[CurState] =
|
|
|
|
|
unwindCFIState(CurState, RememberState, CurBB, II);
|
|
|
|
|
Stack.pop();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-01-31 11:23:02 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool BinaryFunction::finalizeCFIState() {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "Trying to fix CFI states for each BB after reordering.\n");
|
|
|
|
|
LLVM_DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this
|
|
|
|
|
<< ": ");
|
2019-07-02 10:48:43 -07:00
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
int32_t State = 0;
|
|
|
|
|
bool SeenCold = false;
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Sep = "";
|
2017-05-25 10:29:38 -07:00
|
|
|
(void)Sep;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocksLayout) {
|
|
|
|
|
const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2015-11-19 17:59:41 -08:00
|
|
|
// Hot-cold border: check if this is the first BB to be allocated in a cold
|
2018-09-05 14:36:52 -07:00
|
|
|
// region (with a different FDE). If yes, we need to reset the CFI state.
|
2017-02-24 21:59:33 -08:00
|
|
|
if (!SeenCold && BB->isCold()) {
|
2015-11-19 17:59:41 -08:00
|
|
|
State = 0;
|
2017-02-24 21:59:33 -08:00
|
|
|
SeenCold = true;
|
2016-08-24 14:25:33 -07:00
|
|
|
}
|
2015-11-19 17:59:41 -08:00
|
|
|
|
2016-01-16 14:58:22 -08:00
|
|
|
// We need to recover the correct state if it doesn't match expected
|
|
|
|
|
// state at BB entry point.
|
2017-02-24 21:59:33 -08:00
|
|
|
if (BB->getCFIState() < State) {
|
2016-01-16 14:58:22 -08:00
|
|
|
// In this case, State is currently higher than what this BB expect it
|
2018-09-05 14:36:52 -07:00
|
|
|
// to be. To solve this, we need to insert CFI instructions to undo
|
|
|
|
|
// the effect of all CFI from BB's state to current State.
|
|
|
|
|
auto InsertIt = BB->begin();
|
|
|
|
|
unwindCFIState(State, BB->getCFIState(), BB, InsertIt);
|
2017-02-24 21:59:33 -08:00
|
|
|
} else if (BB->getCFIState() > State) {
|
|
|
|
|
// If BB's CFI state is greater than State, it means we are behind in the
|
2016-01-16 14:58:22 -08:00
|
|
|
// state. Just emit all instructions to reach this state at the
|
|
|
|
|
// beginning of this BB. If this sequence of instructions involve
|
|
|
|
|
// remember state or restore state, bail out.
|
2017-02-24 21:59:33 -08:00
|
|
|
if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin()))
|
2016-01-16 14:58:22 -08:00
|
|
|
return false;
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
State = CFIStateAtExit;
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << Sep << State; Sep = ", ");
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "\n");
|
2018-09-05 14:36:52 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocksLayout) {
|
2020-07-16 17:35:55 -07:00
|
|
|
for (auto II = BB->begin(); II != BB->end(); ) {
|
2020-12-01 16:29:39 -08:00
|
|
|
const MCCFIInstruction *CFI = getCFIFor(*II);
|
2020-07-16 17:35:55 -07:00
|
|
|
if (CFI &&
|
|
|
|
|
(CFI->getOperation() == MCCFIInstruction::OpRememberState ||
|
|
|
|
|
CFI->getOperation() == MCCFIInstruction::OpRestoreState)) {
|
2019-01-31 11:23:02 -08:00
|
|
|
II = BB->eraseInstruction(II);
|
|
|
|
|
} else {
|
|
|
|
|
++II;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-09-05 14:36:52 -07:00
|
|
|
|
2015-11-08 12:23:54 -08:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-03 21:57:15 -08:00
|
|
|
bool BinaryFunction::requiresAddressTranslation() const {
|
2020-07-16 17:35:55 -07:00
|
|
|
return opts::EnableBAT || hasSDTMarker();
|
2019-11-03 21:57:15 -08:00
|
|
|
}
|
|
|
|
|
|
2017-05-22 11:04:01 -07:00
|
|
|
uint64_t BinaryFunction::getInstructionCount() const {
|
|
|
|
|
uint64_t Count = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *const &Block : BasicBlocksLayout) {
|
2017-05-22 11:04:01 -07:00
|
|
|
Count += Block->getNumNonPseudos();
|
|
|
|
|
}
|
|
|
|
|
return Count;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
bool BinaryFunction::hasLayoutChanged() const {
|
|
|
|
|
return ModifiedLayout;
|
|
|
|
|
}
|
2017-05-22 11:04:01 -07:00
|
|
|
|
|
|
|
|
uint64_t BinaryFunction::getEditDistance() const {
|
2017-11-08 14:29:20 -08:00
|
|
|
return ComputeEditDistance<BinaryBasicBlock *>(BasicBlocksPreviousLayout,
|
|
|
|
|
BasicBlocksLayout);
|
2017-05-22 11:04:01 -07:00
|
|
|
}
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
void BinaryFunction::clearDisasmState() {
|
2018-02-02 16:07:11 -08:00
|
|
|
clearList(Instructions);
|
|
|
|
|
clearList(IgnoredBranches);
|
|
|
|
|
clearList(TakenBranches);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
clearList(InterproceduralReferences);
|
|
|
|
|
|
|
|
|
|
if (BC.HasRelocations) {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (std::pair<const uint32_t, MCSymbol *> &LI : Labels) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
BC.UndefinedSymbols.insert(LI.second);
|
|
|
|
|
}
|
|
|
|
|
if (FunctionEndLabel) {
|
|
|
|
|
BC.UndefinedSymbols.insert(FunctionEndLabel);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::setTrapOnEntry() {
|
|
|
|
|
clearDisasmState();
|
2018-02-02 16:07:11 -08:00
|
|
|
|
2020-04-19 22:29:54 -07:00
|
|
|
auto addTrapAtOffset = [&](uint64_t Offset) {
|
2018-02-02 16:07:11 -08:00
|
|
|
MCInst TrapInstr;
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->createTrap(TrapInstr);
|
2020-04-19 22:29:54 -07:00
|
|
|
addInstruction(Offset, std::move(TrapInstr));
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
addTrapAtOffset(0);
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const std::pair<const uint32_t, MCSymbol *> &KV : getLabels()) {
|
2020-04-19 22:29:54 -07:00
|
|
|
if (getSecondaryEntryPointSymbol(KV.second)) {
|
|
|
|
|
addTrapAtOffset(KV.first);
|
|
|
|
|
}
|
2018-02-02 16:07:11 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TrapsOnEntry = true;
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
void BinaryFunction::setIgnored() {
|
|
|
|
|
if (opts::processAllFunctions()) {
|
|
|
|
|
// We can accept ignored functions before they've been disassembled.
|
|
|
|
|
// In that case, they would still get disassembled and emited, but not
|
|
|
|
|
// optimized.
|
|
|
|
|
assert(CurrentState == State::Empty &&
|
|
|
|
|
"cannot ignore non-empty functions in current mode");
|
|
|
|
|
IsIgnored = true;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
clearDisasmState();
|
|
|
|
|
|
|
|
|
|
// Clear CFG state too.
|
|
|
|
|
if (hasCFG()) {
|
|
|
|
|
releaseCFG();
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
delete BB;
|
|
|
|
|
}
|
2020-06-18 20:59:01 -07:00
|
|
|
clearList(BasicBlocks);
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : DeletedBasicBlocks) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
delete BB;
|
|
|
|
|
}
|
2020-06-18 20:59:01 -07:00
|
|
|
clearList(DeletedBasicBlocks);
|
|
|
|
|
|
|
|
|
|
clearList(BasicBlocksLayout);
|
|
|
|
|
clearList(BasicBlocksPreviousLayout);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CurrentState = State::Empty;
|
|
|
|
|
|
|
|
|
|
IsIgnored = true;
|
|
|
|
|
IsSimple = false;
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "Ignoring " << getPrintName() << '\n');
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
}
|
|
|
|
|
|
2017-11-09 16:59:18 -08:00
|
|
|
void BinaryFunction::duplicateConstantIslands() {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : layout()) {
|
2017-11-09 16:59:18 -08:00
|
|
|
if (!BB->isCold())
|
|
|
|
|
continue;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (MCInst &Inst : *BB) {
|
2017-11-09 16:59:18 -08:00
|
|
|
int OpNum = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (MCOperand &Operand : Inst) {
|
2017-11-09 16:59:18 -08:00
|
|
|
if (!Operand.isExpr()) {
|
|
|
|
|
++OpNum;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum);
|
2018-05-31 10:33:53 -07:00
|
|
|
// Check if this is an island symbol
|
2020-03-06 15:06:37 -08:00
|
|
|
if (!Islands.Symbols.count(Symbol) &&
|
|
|
|
|
!Islands.ProxySymbols.count(Symbol))
|
2017-11-09 16:59:18 -08:00
|
|
|
continue;
|
2018-05-31 10:33:53 -07:00
|
|
|
|
|
|
|
|
// Create cold symbol, if missing
|
2020-03-06 15:06:37 -08:00
|
|
|
auto ISym = Islands.ColdSymbols.find(Symbol);
|
2018-05-31 10:33:53 -07:00
|
|
|
MCSymbol *ColdSymbol;
|
2020-03-06 15:06:37 -08:00
|
|
|
if (ISym != Islands.ColdSymbols.end()) {
|
2018-05-31 10:33:53 -07:00
|
|
|
ColdSymbol = ISym->second;
|
|
|
|
|
} else {
|
|
|
|
|
ColdSymbol = BC.Ctx->getOrCreateSymbol(Symbol->getName() + ".cold");
|
2020-03-06 15:06:37 -08:00
|
|
|
Islands.ColdSymbols[Symbol] = ColdSymbol;
|
2018-05-31 10:33:53 -07:00
|
|
|
// Check if this is a proxy island symbol and update owner proxy map
|
2020-03-06 15:06:37 -08:00
|
|
|
if (Islands.ProxySymbols.count(Symbol)) {
|
|
|
|
|
BinaryFunction *Owner = Islands.ProxySymbols[Symbol];
|
|
|
|
|
auto IProxiedSym = Owner->Islands.Proxies[this].find(Symbol);
|
2020-07-16 17:35:55 -07:00
|
|
|
Owner->Islands.ColdProxies[this][IProxiedSym->second] =
|
|
|
|
|
ColdSymbol;
|
2018-05-31 10:33:53 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update instruction reference
|
2018-03-09 09:45:13 -08:00
|
|
|
Operand = MCOperand::createExpr(BC.MIB->getTargetExprFor(
|
2017-11-09 16:59:18 -08:00
|
|
|
Inst,
|
2018-05-31 10:33:53 -07:00
|
|
|
MCSymbolRefExpr::create(ColdSymbol, MCSymbolRefExpr::VK_None,
|
2017-11-09 16:59:18 -08:00
|
|
|
*BC.Ctx),
|
|
|
|
|
*BC.Ctx, 0));
|
|
|
|
|
++OpNum;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-01 08:40:56 -07:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
#ifndef MAX_PATH
|
|
|
|
|
#define MAX_PATH 255
|
|
|
|
|
#endif
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
std::string constructFilename(std::string Filename,
|
|
|
|
|
std::string Annotation,
|
2016-07-01 08:40:56 -07:00
|
|
|
std::string Suffix) {
|
|
|
|
|
std::replace(Filename.begin(), Filename.end(), '/', '-');
|
|
|
|
|
if (!Annotation.empty()) {
|
|
|
|
|
Annotation.insert(0, "-");
|
|
|
|
|
}
|
|
|
|
|
if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {
|
|
|
|
|
assert(Suffix.size() + Annotation.size() <= MAX_PATH);
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: Filename \"" << Filename << Annotation << Suffix
|
|
|
|
|
<< "\" exceeds the " << MAX_PATH << " size limit, truncating.\n";
|
|
|
|
|
}
|
2016-07-01 08:40:56 -07:00
|
|
|
Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size()));
|
|
|
|
|
}
|
|
|
|
|
Filename += Annotation;
|
|
|
|
|
Filename += Suffix;
|
|
|
|
|
return Filename;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
std::string formatEscapes(const std::string& Str) {
|
2016-07-29 19:18:37 -07:00
|
|
|
std::string Result;
|
|
|
|
|
for (unsigned I = 0; I < Str.size(); ++I) {
|
2021-04-08 00:19:26 -07:00
|
|
|
char C = Str[I];
|
2016-07-29 19:18:37 -07:00
|
|
|
switch (C) {
|
|
|
|
|
case '\n':
|
|
|
|
|
Result += " ";
|
|
|
|
|
break;
|
|
|
|
|
case '"':
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
Result += C;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return Result;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
}
|
2016-07-01 08:40:56 -07:00
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
void BinaryFunction::dumpGraph(raw_ostream& OS) const {
|
2016-07-29 19:18:37 -07:00
|
|
|
OS << "strict digraph \"" << getPrintName() << "\" {\n";
|
|
|
|
|
uint64_t Offset = Address;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
2020-07-16 17:35:55 -07:00
|
|
|
auto LayoutPos = std::find(BasicBlocksLayout.begin(),
|
|
|
|
|
BasicBlocksLayout.end(),
|
|
|
|
|
BB);
|
2016-07-29 19:18:37 -07:00
|
|
|
unsigned Layout = LayoutPos - BasicBlocksLayout.begin();
|
2020-07-16 17:35:55 -07:00
|
|
|
const char* ColdStr = BB->isCold() ? " (cold)" : "";
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u:CFI:%u)\"]\n",
|
2020-07-16 17:35:55 -07:00
|
|
|
BB->getName().data(),
|
|
|
|
|
BB->getName().data(),
|
|
|
|
|
ColdStr,
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
(BB->ExecutionCount != BinaryBasicBlock::COUNT_NO_PROFILE
|
2020-07-16 17:35:55 -07:00
|
|
|
? BB->ExecutionCount
|
|
|
|
|
: 0),
|
|
|
|
|
BB->getOffset(),
|
|
|
|
|
getIndex(BB),
|
|
|
|
|
Layout,
|
|
|
|
|
BB->getCFIState());
|
2016-07-29 19:18:37 -07:00
|
|
|
OS << format("\"%s\" [shape=box]\n", BB->getName().data());
|
|
|
|
|
if (opts::DotToolTipCode) {
|
|
|
|
|
std::string Str;
|
|
|
|
|
raw_string_ostream CS(Str);
|
|
|
|
|
Offset = BC.printInstructions(CS, BB->begin(), BB->end(), Offset, this);
|
2021-04-08 00:19:26 -07:00
|
|
|
const std::string Code = formatEscapes(CS.str());
|
2020-07-16 17:35:55 -07:00
|
|
|
OS << format("\"%s\" [tooltip=\"%s\"]\n",
|
|
|
|
|
BB->getName().data(),
|
2016-07-29 19:18:37 -07:00
|
|
|
Code.c_str());
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-13 20:32:12 -07:00
|
|
|
// analyzeBranch is just used to get the names of the branch
|
|
|
|
|
// opcodes.
|
2016-07-29 19:18:37 -07:00
|
|
|
const MCSymbol *TBB = nullptr;
|
|
|
|
|
const MCSymbol *FBB = nullptr;
|
|
|
|
|
MCInst *CondBranch = nullptr;
|
|
|
|
|
MCInst *UncondBranch = nullptr;
|
2020-07-16 17:35:55 -07:00
|
|
|
const bool Success = BB->analyzeBranch(TBB,
|
|
|
|
|
FBB,
|
|
|
|
|
CondBranch,
|
|
|
|
|
UncondBranch);
|
2016-07-29 19:18:37 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCInst *LastInstr = BB->getLastNonPseudoInstr();
|
2018-03-09 09:45:13 -08:00
|
|
|
const bool IsJumpTable = LastInstr && BC.MIB->getJumpTable(*LastInstr);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2016-09-13 17:12:00 -07:00
|
|
|
auto BI = BB->branch_info_begin();
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *Succ : BB->successors()) {
|
2016-07-29 19:18:37 -07:00
|
|
|
std::string Branch;
|
|
|
|
|
if (Success) {
|
2016-09-13 20:32:12 -07:00
|
|
|
if (Succ == BB->getConditionalSuccessor(true)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
Branch = CondBranch ? std::string(BC.InstPrinter->getOpcodeName(
|
|
|
|
|
CondBranch->getOpcode()))
|
|
|
|
|
: "TB";
|
2016-09-13 20:32:12 -07:00
|
|
|
} else if (Succ == BB->getConditionalSuccessor(false)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
Branch = UncondBranch ? std::string(BC.InstPrinter->getOpcodeName(
|
|
|
|
|
UncondBranch->getOpcode()))
|
|
|
|
|
: "FB";
|
2016-07-29 19:18:37 -07:00
|
|
|
} else {
|
|
|
|
|
Branch = "FT";
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-09-13 20:32:12 -07:00
|
|
|
if (IsJumpTable) {
|
|
|
|
|
Branch = "JT";
|
|
|
|
|
}
|
2020-07-16 17:35:55 -07:00
|
|
|
OS << format("\"%s\" -> \"%s\" [label=\"%s",
|
|
|
|
|
BB->getName().data(),
|
|
|
|
|
Succ->getName().data(),
|
|
|
|
|
Branch.c_str());
|
2016-07-29 19:18:37 -07:00
|
|
|
|
2016-09-13 17:12:00 -07:00
|
|
|
if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
|
2016-12-21 17:13:56 -08:00
|
|
|
BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
OS << "\\n(C:" << BI->Count << ",M:" << BI->MispredictedCount << ")";
|
2016-07-29 19:18:37 -07:00
|
|
|
} else if (ExecutionCount != COUNT_NO_PROFILE &&
|
2016-12-21 17:13:56 -08:00
|
|
|
BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
|
2016-09-13 17:12:00 -07:00
|
|
|
OS << "\\n(IC:" << BI->Count << ")";
|
2016-07-29 19:18:37 -07:00
|
|
|
}
|
|
|
|
|
OS << "\"]\n";
|
|
|
|
|
|
2016-09-13 17:12:00 -07:00
|
|
|
++BI;
|
2016-07-29 19:18:37 -07:00
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *LP : BB->landing_pads()) {
|
2016-07-29 19:18:37 -07:00
|
|
|
OS << format("\"%s\" -> \"%s\" [constraint=false style=dashed]\n",
|
2020-07-16 17:35:55 -07:00
|
|
|
BB->getName().data(),
|
|
|
|
|
LP->getName().data());
|
2016-07-01 08:40:56 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
OS << "}\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::viewGraph() const {
|
|
|
|
|
SmallString<MAX_PATH> Filename;
|
2021-04-08 00:19:26 -07:00
|
|
|
if (std::error_code EC =
|
|
|
|
|
sys::fs::createTemporaryFile("bolt-cfg", "dot", Filename)) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: " << EC.message() << ", unable to create "
|
2016-07-01 08:40:56 -07:00
|
|
|
<< " bolt-cfg-XXXXX.dot temporary file.\n";
|
|
|
|
|
return;
|
|
|
|
|
}
|
2020-12-01 16:29:39 -08:00
|
|
|
dumpGraphToFile(std::string(Filename));
|
2016-07-01 08:40:56 -07:00
|
|
|
if (DisplayGraph(Filename)) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: Can't display " << Filename << " with graphviz.\n";
|
2016-07-01 08:40:56 -07:00
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
if (std::error_code EC = sys::fs::remove(Filename)) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-WARNING: " << EC.message() << ", failed to remove "
|
|
|
|
|
<< Filename << "\n";
|
2016-07-01 08:40:56 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
|
2021-04-08 00:19:26 -07:00
|
|
|
std::string Filename = constructFilename(getPrintName(), Annotation, ".dot");
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "BOLT-DEBUG: Dumping CFG to " << Filename << "\n";
|
2016-07-23 08:01:53 -07:00
|
|
|
dumpGraphToFile(Filename);
|
2016-07-01 08:40:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::dumpGraphToFile(std::string Filename) const {
|
|
|
|
|
std::error_code EC;
|
2020-12-01 16:29:39 -08:00
|
|
|
raw_fd_ostream of(Filename, EC, sys::fs::OF_None);
|
2016-07-01 08:40:56 -07:00
|
|
|
if (EC) {
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: " << EC.message() << ", unable to open "
|
|
|
|
|
<< Filename << " for output.\n";
|
|
|
|
|
}
|
2016-07-01 08:40:56 -07:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
dumpGraph(of);
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-27 21:44:38 -08:00
|
|
|
bool BinaryFunction::validateCFG() const {
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
bool Valid = true;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
Valid &= BB->validateSuccessorInvariants();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!Valid)
|
|
|
|
|
return Valid;
|
|
|
|
|
|
2018-03-30 17:44:14 -07:00
|
|
|
// Make sure all blocks in CFG are valid.
|
|
|
|
|
auto validateBlock = [this](const BinaryBasicBlock *BB, StringRef Desc) {
|
|
|
|
|
if (!BB->isValid()) {
|
|
|
|
|
errs() << "BOLT-ERROR: deleted " << Desc << " " << BB->getName()
|
|
|
|
|
<< " detected in:\n";
|
|
|
|
|
this->dump();
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
};
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *BB : BasicBlocks) {
|
2018-03-30 17:44:14 -07:00
|
|
|
if (!validateBlock(BB, "block"))
|
|
|
|
|
return false;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *PredBB : BB->predecessors())
|
2018-03-30 17:44:14 -07:00
|
|
|
if (!validateBlock(PredBB, "predecessor"))
|
|
|
|
|
return false;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *SuccBB : BB->successors())
|
2018-03-30 17:44:14 -07:00
|
|
|
if (!validateBlock(SuccBB, "successor"))
|
|
|
|
|
return false;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *LP : BB->landing_pads())
|
2018-03-30 17:44:14 -07:00
|
|
|
if (!validateBlock(LP, "landing pad"))
|
|
|
|
|
return false;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *Thrower : BB->throwers())
|
2018-03-30 17:44:14 -07:00
|
|
|
if (!validateBlock(Thrower, "thrower"))
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *BB : BasicBlocks) {
|
2017-12-08 20:27:49 -08:00
|
|
|
std::unordered_set<const BinaryBasicBlock *> BBLandingPads;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *LP : BB->landing_pads()) {
|
2017-12-08 20:27:49 -08:00
|
|
|
if (BBLandingPads.count(LP)) {
|
|
|
|
|
errs() << "BOLT-ERROR: duplicate landing pad detected in"
|
|
|
|
|
<< BB->getName() << " in function " << *this << '\n';
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
BBLandingPads.insert(LP);
|
2017-10-26 18:36:30 -07:00
|
|
|
}
|
2017-12-08 20:27:49 -08:00
|
|
|
|
|
|
|
|
std::unordered_set<const BinaryBasicBlock *> BBThrowers;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *Thrower : BB->throwers()) {
|
2017-12-08 20:27:49 -08:00
|
|
|
if (BBThrowers.count(Thrower)) {
|
2020-07-16 17:35:55 -07:00
|
|
|
errs() << "BOLT-ERROR: duplicate thrower detected in"
|
|
|
|
|
<< BB->getName() << " in function " << *this << '\n';
|
2017-12-08 20:27:49 -08:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
BBThrowers.insert(Thrower);
|
2017-10-26 18:36:30 -07:00
|
|
|
}
|
2017-12-08 20:27:49 -08:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *LPBlock : BB->landing_pads()) {
|
2020-07-16 17:35:55 -07:00
|
|
|
if (std::find(LPBlock->throw_begin(), LPBlock->throw_end(), BB)
|
|
|
|
|
== LPBlock->throw_end()) {
|
|
|
|
|
errs() << "BOLT-ERROR: inconsistent landing pad detected in "
|
|
|
|
|
<< *this << ": " << BB->getName()
|
|
|
|
|
<< " is in LandingPads but not in " << LPBlock->getName()
|
|
|
|
|
<< " Throwers\n";
|
2017-10-26 18:36:30 -07:00
|
|
|
return false;
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
}
|
|
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *Thrower : BB->throwers()) {
|
2020-07-16 17:35:55 -07:00
|
|
|
if (std::find(Thrower->lp_begin(), Thrower->lp_end(), BB)
|
|
|
|
|
== Thrower->lp_end()) {
|
|
|
|
|
errs() << "BOLT-ERROR: inconsistent thrower detected in "
|
|
|
|
|
<< *this << ": " << BB->getName()
|
|
|
|
|
<< " is in Throwers list but not in " << Thrower->getName()
|
|
|
|
|
<< " LandingPads\n";
|
2017-12-08 20:27:49 -08:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Valid;
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-16 09:49:04 -07:00
|
|
|
void BinaryFunction::fixBranches() {
|
2018-03-09 09:45:13 -08:00
|
|
|
auto &MIB = BC.MIB;
|
2021-04-08 00:19:26 -07:00
|
|
|
MCContext *Ctx = BC.Ctx.get();
|
2015-10-16 09:49:04 -07:00
|
|
|
|
|
|
|
|
for (unsigned I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
|
|
|
|
|
BinaryBasicBlock *BB = BasicBlocksLayout[I];
|
|
|
|
|
const MCSymbol *TBB = nullptr;
|
|
|
|
|
const MCSymbol *FBB = nullptr;
|
|
|
|
|
MCInst *CondBranch = nullptr;
|
|
|
|
|
MCInst *UncondBranch = nullptr;
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
|
2015-10-16 09:49:04 -07:00
|
|
|
continue;
|
|
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
// We will create unconditional branch with correct destination if needed.
|
|
|
|
|
if (UncondBranch)
|
2019-01-31 11:23:02 -08:00
|
|
|
BB->eraseInstruction(BB->findInstruction(UncondBranch));
|
2015-10-16 09:49:04 -07:00
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
// Basic block that follows the current one in the final layout.
|
|
|
|
|
const BinaryBasicBlock *NextBB = nullptr;
|
2016-09-13 17:12:00 -07:00
|
|
|
if (I + 1 != E && BB->isCold() == BasicBlocksLayout[I + 1]->isCold())
|
2016-08-29 21:11:22 -07:00
|
|
|
NextBB = BasicBlocksLayout[I + 1];
|
|
|
|
|
|
|
|
|
|
if (BB->succ_size() == 1) {
|
|
|
|
|
// __builtin_unreachable() could create a conditional branch that
|
|
|
|
|
// falls-through into the next function - hence the block will have only
|
2016-09-27 19:09:38 -07:00
|
|
|
// one valid successor. Since behaviour is undefined - we replace
|
2016-08-29 21:11:22 -07:00
|
|
|
// the conditional branch with an unconditional if required.
|
|
|
|
|
if (CondBranch)
|
2019-01-31 11:23:02 -08:00
|
|
|
BB->eraseInstruction(BB->findInstruction(CondBranch));
|
2016-08-29 21:11:22 -07:00
|
|
|
if (BB->getSuccessor() == NextBB)
|
2015-10-16 09:49:04 -07:00
|
|
|
continue;
|
2016-08-29 21:11:22 -07:00
|
|
|
BB->addBranchInstruction(BB->getSuccessor());
|
|
|
|
|
} else if (BB->succ_size() == 2) {
|
|
|
|
|
assert(CondBranch && "conditional branch expected");
|
2021-04-08 00:19:26 -07:00
|
|
|
const BinaryBasicBlock *TSuccessor = BB->getConditionalSuccessor(true);
|
|
|
|
|
const BinaryBasicBlock *FSuccessor = BB->getConditionalSuccessor(false);
|
2020-07-15 23:02:58 -07:00
|
|
|
// Check whether we support reversing this branch direction
|
2021-04-08 00:19:26 -07:00
|
|
|
const bool IsSupported =
|
2020-07-15 23:02:58 -07:00
|
|
|
!MIB->isUnsupportedBranch(CondBranch->getOpcode());
|
|
|
|
|
if (NextBB && NextBB == TSuccessor && IsSupported) {
|
2016-08-29 21:11:22 -07:00
|
|
|
std::swap(TSuccessor, FSuccessor);
|
2019-07-08 12:32:58 -07:00
|
|
|
{
|
2019-08-07 16:09:50 -07:00
|
|
|
auto L = BC.scopeLock();
|
2019-07-08 12:32:58 -07:00
|
|
|
MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
|
|
|
|
|
}
|
2016-08-29 21:11:22 -07:00
|
|
|
BB->swapConditionalSuccessors();
|
|
|
|
|
} else {
|
2019-08-07 16:09:50 -07:00
|
|
|
auto L = BC.scopeLock();
|
2018-03-09 09:45:13 -08:00
|
|
|
MIB->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
|
2015-10-16 09:49:04 -07:00
|
|
|
}
|
2017-03-20 22:44:25 -07:00
|
|
|
if (TSuccessor == FSuccessor) {
|
|
|
|
|
BB->removeDuplicateConditionalSuccessor(CondBranch);
|
|
|
|
|
}
|
2020-07-15 23:02:58 -07:00
|
|
|
if (!NextBB ||
|
|
|
|
|
((NextBB != TSuccessor || !IsSupported) && NextBB != FSuccessor)) {
|
2018-11-14 14:43:59 -08:00
|
|
|
// If one of the branches is guaranteed to be "long" while the other
|
|
|
|
|
// could be "short", then prioritize short for "taken". This will
|
|
|
|
|
// generate a sequence 1 byte shorter on x86.
|
2020-07-15 23:02:58 -07:00
|
|
|
if (IsSupported && BC.isX86() &&
|
|
|
|
|
TSuccessor->isCold() != FSuccessor->isCold() &&
|
2018-11-14 14:43:59 -08:00
|
|
|
BB->isCold() != TSuccessor->isCold()) {
|
|
|
|
|
std::swap(TSuccessor, FSuccessor);
|
2019-07-08 12:32:58 -07:00
|
|
|
{
|
2019-08-07 16:09:50 -07:00
|
|
|
auto L = BC.scopeLock();
|
2019-07-08 12:32:58 -07:00
|
|
|
MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(),
|
|
|
|
|
Ctx);
|
|
|
|
|
}
|
2018-11-14 14:43:59 -08:00
|
|
|
BB->swapConditionalSuccessors();
|
|
|
|
|
}
|
2016-08-29 21:11:22 -07:00
|
|
|
BB->addBranchInstruction(FSuccessor);
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
2016-08-29 21:11:22 -07:00
|
|
|
// Cases where the number of successors is 0 (block ends with a
|
|
|
|
|
// terminator) or more than 2 (switch table) don't require branch
|
|
|
|
|
// instruction adjustments.
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
2020-07-16 17:35:55 -07:00
|
|
|
assert((!isSimple() || validateCFG())
|
|
|
|
|
&& "Invalid CFG detected after fixing branches");
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
|
2019-07-12 07:25:50 -07:00
|
|
|
void BinaryFunction::propagateGnuArgsSizeInfo(
|
|
|
|
|
MCPlusBuilder::AllocatorIdTy AllocId) {
|
2017-11-28 09:57:21 -08:00
|
|
|
assert(CurrentState == State::Disassembled && "unexpected function state");
|
2016-04-19 22:00:29 -07:00
|
|
|
|
|
|
|
|
if (!hasEHRanges() || !usesGnuArgsSize())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// The current value of DW_CFA_GNU_args_size affects all following
|
2016-04-20 15:31:11 -07:00
|
|
|
// invoke instructions until the next CFI overrides it.
|
2016-04-19 22:00:29 -07:00
|
|
|
// It is important to iterate basic blocks in the original order when
|
|
|
|
|
// assigning the value.
|
|
|
|
|
uint64_t CurrentGnuArgsSize = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
2020-07-16 17:35:55 -07:00
|
|
|
for (auto II = BB->begin(); II != BB->end(); ) {
|
2021-04-08 00:19:26 -07:00
|
|
|
MCInst &Instr = *II;
|
2018-03-09 09:45:13 -08:00
|
|
|
if (BC.MIB->isCFI(Instr)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
const MCCFIInstruction *CFI = getCFIFor(Instr);
|
2016-04-19 22:00:29 -07:00
|
|
|
if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) {
|
|
|
|
|
CurrentGnuArgsSize = CFI->getOffset();
|
|
|
|
|
// Delete DW_CFA_GNU_args_size instructions and only regenerate
|
|
|
|
|
// during the final code emission. The information is embedded
|
|
|
|
|
// inside call instructions.
|
2016-08-29 21:11:22 -07:00
|
|
|
II = BB->erasePseudoInstruction(II);
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
2016-04-19 22:00:29 -07:00
|
|
|
}
|
2018-03-09 09:45:13 -08:00
|
|
|
} else if (BC.MIB->isInvoke(Instr)) {
|
2016-09-27 19:09:38 -07:00
|
|
|
// Add the value of GNU_args_size as an extra operand to invokes.
|
2019-07-12 07:25:50 -07:00
|
|
|
BC.MIB->addGnuArgsSize(Instr, CurrentGnuArgsSize, AllocId);
|
2016-04-19 22:00:29 -07:00
|
|
|
}
|
|
|
|
|
++II;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-03 11:35:41 -08:00
|
|
|
void BinaryFunction::postProcessBranches() {
|
|
|
|
|
if (!isSimple())
|
|
|
|
|
return;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocksLayout) {
|
2017-03-03 11:35:41 -08:00
|
|
|
auto LastInstrRI = BB->getLastNonPseudo();
|
|
|
|
|
if (BB->succ_size() == 1) {
|
|
|
|
|
if (LastInstrRI != BB->rend() &&
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->isConditionalBranch(*LastInstrRI)) {
|
2017-03-03 11:35:41 -08:00
|
|
|
// __builtin_unreachable() could create a conditional branch that
|
|
|
|
|
// falls-through into the next function - hence the block will have only
|
|
|
|
|
// one valid successor. Such behaviour is undefined and thus we remove
|
|
|
|
|
// the conditional branch while leaving a valid successor.
|
2018-03-30 10:54:42 -07:00
|
|
|
BB->eraseInstruction(std::prev(LastInstrRI.base()));
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: erasing conditional branch in "
|
|
|
|
|
<< BB->getName() << " in function " << *this << '\n');
|
2017-03-03 11:35:41 -08:00
|
|
|
}
|
|
|
|
|
} else if (BB->succ_size() == 0) {
|
|
|
|
|
// Ignore unreachable basic blocks.
|
|
|
|
|
if (BB->pred_size() == 0 || BB->isLandingPad())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// If it's the basic block that does not end up with a terminator - we
|
|
|
|
|
// insert a return instruction unless it's a call instruction.
|
|
|
|
|
if (LastInstrRI == BB->rend()) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: at least one instruction expected in BB "
|
|
|
|
|
<< BB->getName() << " in function " << *this << '\n');
|
2017-03-03 11:35:41 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
2018-03-09 09:45:13 -08:00
|
|
|
if (!BC.MIB->isTerminator(*LastInstrRI) &&
|
|
|
|
|
!BC.MIB->isCall(*LastInstrRI)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding return to basic block "
|
|
|
|
|
<< BB->getName() << " in function " << *this << '\n');
|
2017-03-03 11:35:41 -08:00
|
|
|
MCInst ReturnInstr;
|
2018-03-09 09:45:13 -08:00
|
|
|
BC.MIB->createReturn(ReturnInstr);
|
2017-03-03 11:35:41 -08:00
|
|
|
BB->addInstruction(ReturnInstr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(validateCFG() && "invalid CFG");
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-27 13:40:53 -07:00
|
|
|
MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
|
|
|
|
|
assert(Offset && "cannot add primary entry point");
|
|
|
|
|
assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
|
|
|
|
|
|
|
|
|
|
const uint64_t EntryPointAddress = getAddress() + Offset;
|
|
|
|
|
MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
|
|
|
|
|
|
|
|
|
|
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol);
|
|
|
|
|
if (EntrySymbol)
|
|
|
|
|
return EntrySymbol;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
if (BinaryData *EntryBD = BC.getBinaryDataAtAddress(EntryPointAddress)) {
|
2020-04-27 13:40:53 -07:00
|
|
|
EntrySymbol = EntryBD->getSymbol();
|
|
|
|
|
} else {
|
2020-07-16 17:35:55 -07:00
|
|
|
EntrySymbol =
|
|
|
|
|
BC.getOrCreateGlobalSymbol(EntryPointAddress,
|
|
|
|
|
Twine("__ENTRY_") + getOneName() + "@");
|
2020-04-27 13:40:53 -07:00
|
|
|
}
|
|
|
|
|
SecondaryEntryPoints[LocalSymbol] = EntrySymbol;
|
|
|
|
|
|
|
|
|
|
BC.setSymbolToFunctionMap(EntrySymbol, this);
|
|
|
|
|
|
|
|
|
|
return EntrySymbol;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MCSymbol *BinaryFunction::addEntryPoint(const BinaryBasicBlock &BB) {
|
|
|
|
|
assert(CurrentState == State::CFG &&
|
|
|
|
|
"basic block can be added as an entry only in a function with CFG");
|
|
|
|
|
|
|
|
|
|
if (&BB == BasicBlocks.front())
|
|
|
|
|
return getSymbol();
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB);
|
2020-04-27 13:40:53 -07:00
|
|
|
if (EntrySymbol)
|
|
|
|
|
return EntrySymbol;
|
|
|
|
|
|
|
|
|
|
EntrySymbol =
|
2020-07-16 17:35:55 -07:00
|
|
|
BC.Ctx->getOrCreateSymbol("__ENTRY_" + BB.getLabel()->getName());
|
2020-04-27 13:40:53 -07:00
|
|
|
|
|
|
|
|
SecondaryEntryPoints[BB.getLabel()] = EntrySymbol;
|
|
|
|
|
|
|
|
|
|
BC.setSymbolToFunctionMap(EntrySymbol, this);
|
|
|
|
|
|
|
|
|
|
return EntrySymbol;
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-14 17:34:20 -07:00
|
|
|
MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
|
2020-04-19 22:29:54 -07:00
|
|
|
if (EntryID == 0)
|
2017-12-13 23:12:01 -08:00
|
|
|
return getSymbol();
|
|
|
|
|
|
|
|
|
|
if (!isMultiEntry())
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
|
|
uint64_t NumEntries = 0;
|
2020-01-06 14:57:15 -08:00
|
|
|
if (hasCFG()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
|
|
|
|
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
|
2020-04-19 22:29:54 -07:00
|
|
|
if (!EntrySymbol)
|
2020-01-06 14:57:15 -08:00
|
|
|
continue;
|
2020-04-19 22:29:54 -07:00
|
|
|
if (NumEntries == EntryID)
|
|
|
|
|
return EntrySymbol;
|
2020-01-06 14:57:15 -08:00
|
|
|
++NumEntries;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
|
|
|
|
|
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
|
2020-04-19 22:29:54 -07:00
|
|
|
if (!EntrySymbol)
|
|
|
|
|
continue;
|
|
|
|
|
if (NumEntries == EntryID)
|
|
|
|
|
return EntrySymbol;
|
2020-01-06 14:57:15 -08:00
|
|
|
++NumEntries;
|
|
|
|
|
}
|
2017-12-13 23:12:01 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-19 22:29:54 -07:00
|
|
|
uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
|
2020-01-06 14:57:15 -08:00
|
|
|
if (!isMultiEntry())
|
|
|
|
|
return 0;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const MCSymbol *FunctionSymbol : getSymbols())
|
2020-04-19 22:29:54 -07:00
|
|
|
if (FunctionSymbol == Symbol)
|
2020-01-13 11:56:59 -08:00
|
|
|
return 0;
|
2017-12-13 23:12:01 -08:00
|
|
|
|
2020-01-13 11:56:59 -08:00
|
|
|
// Check all secondary entries available as either basic blocks or lables.
|
2017-12-13 23:12:01 -08:00
|
|
|
uint64_t NumEntries = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *BB : BasicBlocks) {
|
|
|
|
|
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
|
2020-04-19 22:29:54 -07:00
|
|
|
if (!EntrySymbol)
|
2020-01-13 11:56:59 -08:00
|
|
|
continue;
|
2020-04-19 22:29:54 -07:00
|
|
|
if (EntrySymbol == Symbol)
|
2020-01-13 11:56:59 -08:00
|
|
|
return NumEntries;
|
|
|
|
|
++NumEntries;
|
|
|
|
|
}
|
|
|
|
|
NumEntries = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
|
|
|
|
|
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
|
2020-04-19 22:29:54 -07:00
|
|
|
if (!EntrySymbol)
|
|
|
|
|
continue;
|
|
|
|
|
if (EntrySymbol == Symbol)
|
2020-01-13 11:56:59 -08:00
|
|
|
return NumEntries;
|
|
|
|
|
++NumEntries;
|
2017-12-13 23:12:01 -08:00
|
|
|
}
|
|
|
|
|
|
2020-01-13 11:56:59 -08:00
|
|
|
llvm_unreachable("symbol not found");
|
2017-12-13 23:12:01 -08:00
|
|
|
}
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {
|
|
|
|
|
bool Status = Callback(0, getSymbol());
|
|
|
|
|
if (!isMultiEntry())
|
|
|
|
|
return Status;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (!Status)
|
|
|
|
|
break;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (!EntrySymbol)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
Status = Callback(KV.first, EntrySymbol);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Status;
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-25 10:29:38 -07:00
|
|
|
BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
|
2016-12-21 17:13:56 -08:00
|
|
|
BasicBlockOrderType DFS;
|
|
|
|
|
unsigned Index = 0;
|
|
|
|
|
std::stack<BinaryBasicBlock *> Stack;
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Push entry points to the stack in reverse order.
|
|
|
|
|
//
|
|
|
|
|
// NB: we rely on the original order of entries to match.
|
|
|
|
|
for (auto BBI = layout_rbegin(); BBI != layout_rend(); ++BBI) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *BB = *BBI;
|
2020-04-19 22:29:54 -07:00
|
|
|
if (isEntryPoint(*BB))
|
2016-12-21 17:13:56 -08:00
|
|
|
Stack.push(BB);
|
|
|
|
|
BB->setLayoutIndex(BinaryBasicBlock::InvalidIndex);
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
while (!Stack.empty()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *BB = Stack.top();
|
2016-12-21 17:13:56 -08:00
|
|
|
Stack.pop();
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
if (BB->getLayoutIndex() != BinaryBasicBlock::InvalidIndex)
|
2016-06-09 11:36:55 -07:00
|
|
|
continue;
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
BB->setLayoutIndex(Index++);
|
|
|
|
|
DFS.push_back(BB);
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *SuccBB : BB->landing_pads()) {
|
2016-12-21 17:13:56 -08:00
|
|
|
Stack.push(SuccBB);
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-13 23:12:01 -08:00
|
|
|
const MCSymbol *TBB = nullptr;
|
|
|
|
|
const MCSymbol *FBB = nullptr;
|
|
|
|
|
MCInst *CondBranch = nullptr;
|
|
|
|
|
MCInst *UncondBranch = nullptr;
|
2020-07-16 17:35:55 -07:00
|
|
|
if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) &&
|
|
|
|
|
CondBranch && BB->succ_size() == 2) {
|
2020-12-01 16:29:39 -08:00
|
|
|
if (BC.MIB->getCanonicalBranchCondCode(BC.MIB->getCondCode(
|
|
|
|
|
*CondBranch)) == BC.MIB->getCondCode(*CondBranch)) {
|
2017-12-13 23:12:01 -08:00
|
|
|
Stack.push(BB->getConditionalSuccessor(true));
|
|
|
|
|
Stack.push(BB->getConditionalSuccessor(false));
|
|
|
|
|
} else {
|
|
|
|
|
Stack.push(BB->getConditionalSuccessor(false));
|
|
|
|
|
Stack.push(BB->getConditionalSuccessor(true));
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *SuccBB : BB->successors()) {
|
2017-12-13 23:12:01 -08:00
|
|
|
Stack.push(SuccBB);
|
|
|
|
|
}
|
2016-12-21 17:13:56 -08:00
|
|
|
}
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
return DFS;
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2020-04-07 00:21:37 -07:00
|
|
|
size_t BinaryFunction::computeHash(bool UseDFS,
|
|
|
|
|
OperandHashFuncTy OperandHashFunc) const {
|
2017-12-13 23:12:01 -08:00
|
|
|
if (size() == 0)
|
|
|
|
|
return 0;
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
assert(hasCFG() && "function is expected to have CFG");
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const std::vector<BinaryBasicBlock *> &Order =
|
|
|
|
|
UseDFS ? dfs() : BasicBlocksLayout;
|
2016-12-21 17:13:56 -08:00
|
|
|
|
2020-04-07 00:21:37 -07:00
|
|
|
// The hash is computed by creating a string of all instruction opcodes and
|
|
|
|
|
// possibly their operands and then hashing that string with std::hash.
|
|
|
|
|
std::string HashString;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const BinaryBasicBlock *BB : Order) {
|
|
|
|
|
for (const MCInst &Inst : *BB) {
|
2016-06-09 11:36:55 -07:00
|
|
|
unsigned Opcode = Inst.getOpcode();
|
|
|
|
|
|
|
|
|
|
if (BC.MII->get(Opcode).isPseudo())
|
|
|
|
|
continue;
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Ignore unconditional jumps since we check CFG consistency by processing
|
|
|
|
|
// basic blocks in order and do not rely on branches to be in-sync with
|
|
|
|
|
// CFG. Note that we still use condition code of conditional jumps.
|
2018-03-09 09:45:13 -08:00
|
|
|
if (BC.MIB->isUnconditionalBranch(Inst))
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
|
|
|
|
|
2020-04-07 00:21:37 -07:00
|
|
|
if (Opcode == 0)
|
|
|
|
|
HashString.push_back(0);
|
2016-06-09 11:36:55 -07:00
|
|
|
|
|
|
|
|
while (Opcode) {
|
|
|
|
|
uint8_t LSB = Opcode & 0xff;
|
2020-04-07 00:21:37 -07:00
|
|
|
HashString.push_back(LSB);
|
2016-06-09 11:36:55 -07:00
|
|
|
Opcode = Opcode >> 8;
|
|
|
|
|
}
|
2020-04-07 00:21:37 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (unsigned I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) {
|
2020-04-07 00:21:37 -07:00
|
|
|
HashString.append(OperandHashFunc(Inst.getOperand(I)));
|
|
|
|
|
}
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-07 00:21:37 -07:00
|
|
|
return Hash = std::hash<std::string>{}(HashString);
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2016-07-23 12:50:34 -07:00
|
|
|
void BinaryFunction::insertBasicBlocks(
|
2017-10-23 23:32:40 -07:00
|
|
|
BinaryBasicBlock *Start,
|
|
|
|
|
std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
|
2020-07-16 17:35:55 -07:00
|
|
|
const bool UpdateLayout,
|
|
|
|
|
const bool UpdateCFIState,
|
2019-07-02 16:56:41 -07:00
|
|
|
const bool RecomputeLandingPads) {
|
2018-07-08 12:14:08 -07:00
|
|
|
const auto StartIndex = Start ? getIndex(Start) : -1;
|
2021-04-08 00:19:26 -07:00
|
|
|
const size_t NumNewBlocks = NewBBs.size();
|
2016-07-23 12:50:34 -07:00
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
BasicBlocks.insert(BasicBlocks.begin() + (StartIndex + 1),
|
|
|
|
|
NumNewBlocks,
|
2016-07-23 12:50:34 -07:00
|
|
|
nullptr);
|
|
|
|
|
|
|
|
|
|
auto I = StartIndex + 1;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
|
2016-07-23 12:50:34 -07:00
|
|
|
assert(!BasicBlocks[I]);
|
|
|
|
|
BasicBlocks[I++] = BB.release();
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-02 16:56:41 -07:00
|
|
|
if (RecomputeLandingPads) {
|
|
|
|
|
recomputeLandingPads();
|
|
|
|
|
} else {
|
|
|
|
|
updateBBIndices(0);
|
|
|
|
|
}
|
2016-09-07 18:59:23 -07:00
|
|
|
|
|
|
|
|
if (UpdateLayout) {
|
|
|
|
|
updateLayout(Start, NumNewBlocks);
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
if (UpdateCFIState) {
|
2016-09-07 18:59:23 -07:00
|
|
|
updateCFIState(Start, NumNewBlocks);
|
2016-07-13 18:57:40 -07:00
|
|
|
}
|
2016-09-07 18:59:23 -07:00
|
|
|
}
|
2016-07-23 12:50:34 -07:00
|
|
|
|
2017-10-23 23:32:40 -07:00
|
|
|
BinaryFunction::iterator BinaryFunction::insertBasicBlocks(
|
|
|
|
|
BinaryFunction::iterator StartBB,
|
|
|
|
|
std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
|
2020-07-16 17:35:55 -07:00
|
|
|
const bool UpdateLayout,
|
|
|
|
|
const bool UpdateCFIState,
|
2019-07-02 16:56:41 -07:00
|
|
|
const bool RecomputeLandingPads) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const unsigned StartIndex = getIndex(&*StartBB);
|
|
|
|
|
const size_t NumNewBlocks = NewBBs.size();
|
2017-10-23 23:32:40 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1, NumNewBlocks,
|
|
|
|
|
nullptr);
|
2018-04-13 15:34:09 -07:00
|
|
|
auto RetIter = BasicBlocks.begin() + StartIndex + 1;
|
2017-10-23 23:32:40 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
unsigned I = StartIndex + 1;
|
|
|
|
|
for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
|
2017-10-23 23:32:40 -07:00
|
|
|
assert(!BasicBlocks[I]);
|
|
|
|
|
BasicBlocks[I++] = BB.release();
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-02 16:56:41 -07:00
|
|
|
if (RecomputeLandingPads) {
|
|
|
|
|
recomputeLandingPads();
|
|
|
|
|
} else {
|
|
|
|
|
updateBBIndices(0);
|
|
|
|
|
}
|
2017-10-23 23:32:40 -07:00
|
|
|
|
|
|
|
|
if (UpdateLayout) {
|
|
|
|
|
updateLayout(*std::prev(RetIter), NumNewBlocks);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (UpdateCFIState) {
|
|
|
|
|
updateCFIState(*std::prev(RetIter), NumNewBlocks);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return RetIter;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-07 18:59:23 -07:00
|
|
|
void BinaryFunction::updateBBIndices(const unsigned StartIndex) {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (unsigned I = StartIndex; I < BasicBlocks.size(); ++I) {
|
2016-09-07 18:59:23 -07:00
|
|
|
BasicBlocks[I]->Index = I;
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-07-23 12:50:34 -07:00
|
|
|
|
2016-09-07 18:59:23 -07:00
|
|
|
void BinaryFunction::updateCFIState(BinaryBasicBlock *Start,
|
|
|
|
|
const unsigned NumNewBlocks) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const int32_t CFIState = Start->getCFIStateAtExit();
|
|
|
|
|
const unsigned StartIndex = getIndex(Start) + 1;
|
2017-02-24 21:59:33 -08:00
|
|
|
for (unsigned I = 0; I < NumNewBlocks; ++I) {
|
|
|
|
|
BasicBlocks[StartIndex + I]->setCFIState(CFIState);
|
|
|
|
|
}
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
|
2018-07-08 12:14:08 -07:00
|
|
|
void BinaryFunction::updateLayout(BinaryBasicBlock *Start,
|
2016-07-23 12:50:34 -07:00
|
|
|
const unsigned NumNewBlocks) {
|
2018-07-08 12:14:08 -07:00
|
|
|
// If start not provided insert new blocks at the beginning
|
|
|
|
|
if (!Start) {
|
|
|
|
|
BasicBlocksLayout.insert(layout_begin(), BasicBlocks.begin(),
|
|
|
|
|
BasicBlocks.begin() + NumNewBlocks);
|
|
|
|
|
updateLayoutIndices();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-23 12:50:34 -07:00
|
|
|
// Insert new blocks in the layout immediately after Start.
|
|
|
|
|
auto Pos = std::find(layout_begin(), layout_end(), Start);
|
|
|
|
|
assert(Pos != layout_end());
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock **Begin = &BasicBlocks[getIndex(Start) + 1];
|
|
|
|
|
BinaryBasicBlock **End = &BasicBlocks[getIndex(Start) + NumNewBlocks + 1];
|
2016-07-23 12:50:34 -07:00
|
|
|
BasicBlocksLayout.insert(Pos + 1, Begin, End);
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
updateLayoutIndices();
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
|
2019-07-02 16:56:41 -07:00
|
|
|
bool BinaryFunction::checkForAmbiguousJumpTables() {
|
2019-09-20 11:29:35 -07:00
|
|
|
SmallSet<uint64_t, 4> JumpTables;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *&BB : BasicBlocks) {
|
|
|
|
|
for (MCInst &Inst : *BB) {
|
2019-07-02 16:56:41 -07:00
|
|
|
if (!BC.MIB->isIndirectBranch(Inst))
|
|
|
|
|
continue;
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t JTAddress = BC.MIB->getJumpTable(Inst);
|
2019-07-02 16:56:41 -07:00
|
|
|
if (!JTAddress)
|
|
|
|
|
continue;
|
|
|
|
|
// This address can be inside another jump table, but we only consider
|
|
|
|
|
// it ambiguous when the same start address is used, not the same JT
|
|
|
|
|
// object.
|
2019-09-20 11:29:35 -07:00
|
|
|
if (!JumpTables.count(JTAddress)) {
|
2019-07-02 16:56:41 -07:00
|
|
|
JumpTables.insert(JTAddress);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-07 16:09:50 -07:00
|
|
|
void BinaryFunction::disambiguateJumpTables(
|
|
|
|
|
MCPlusBuilder::AllocatorIdTy AllocId) {
|
2021-03-15 16:34:25 -07:00
|
|
|
assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
|
2019-07-02 16:56:41 -07:00
|
|
|
SmallPtrSet<JumpTable *, 4> JumpTables;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *&BB : BasicBlocks) {
|
|
|
|
|
for (MCInst &Inst : *BB) {
|
2019-07-02 16:56:41 -07:00
|
|
|
if (!BC.MIB->isIndirectBranch(Inst))
|
|
|
|
|
continue;
|
2021-04-08 00:19:26 -07:00
|
|
|
JumpTable *JT = getJumpTable(Inst);
|
2019-07-02 16:56:41 -07:00
|
|
|
if (!JT)
|
|
|
|
|
continue;
|
|
|
|
|
auto Iter = JumpTables.find(JT);
|
|
|
|
|
if (Iter == JumpTables.end()) {
|
|
|
|
|
JumpTables.insert(JT);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// This instruction is an indirect jump using a jump table, but it is
|
|
|
|
|
// using the same jump table of another jump. Try all our tricks to
|
|
|
|
|
// extract the jump table symbol and make it point to a new, duplicated JT
|
2021-03-15 16:34:25 -07:00
|
|
|
MCPhysReg BaseReg1;
|
2019-07-02 16:56:41 -07:00
|
|
|
uint64_t Scale;
|
|
|
|
|
const MCSymbol *Target;
|
2021-03-15 16:34:25 -07:00
|
|
|
// In case we match if our first matcher, first instruction is the one to
|
|
|
|
|
// patch
|
2019-07-02 16:56:41 -07:00
|
|
|
MCInst *JTLoadInst = &Inst;
|
|
|
|
|
// Try a standard indirect jump matcher, scale 8
|
2021-04-08 00:19:26 -07:00
|
|
|
std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher =
|
2021-03-15 16:34:25 -07:00
|
|
|
BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
|
|
|
|
|
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
|
|
|
|
|
/*Offset=*/BC.MIB->matchSymbol(Target));
|
|
|
|
|
if (!IndJmpMatcher->match(
|
2019-07-02 16:56:41 -07:00
|
|
|
*BC.MRI, *BC.MIB,
|
|
|
|
|
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
|
2021-03-15 16:34:25 -07:00
|
|
|
BaseReg1 != BC.MIB->getNoRegister() ||
|
2019-07-02 16:56:41 -07:00
|
|
|
Scale != 8) {
|
|
|
|
|
MCPhysReg BaseReg2;
|
|
|
|
|
uint64_t Offset;
|
2021-03-15 16:34:25 -07:00
|
|
|
// Standard JT matching failed. Trying now:
|
|
|
|
|
// movq "jt.2397/1"(,%rax,8), %rax
|
|
|
|
|
// jmpq *%rax
|
2021-04-08 00:19:26 -07:00
|
|
|
std::unique_ptr<MCPlusBuilder::MCInstMatcher> LoadMatcherOwner =
|
|
|
|
|
BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg1),
|
|
|
|
|
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
|
|
|
|
|
/*Offset=*/BC.MIB->matchSymbol(Target));
|
|
|
|
|
MCPlusBuilder::MCInstMatcher *LoadMatcher = LoadMatcherOwner.get();
|
|
|
|
|
std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher2 =
|
|
|
|
|
BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
|
2021-03-15 16:34:25 -07:00
|
|
|
if (!IndJmpMatcher2->match(
|
2019-07-02 16:56:41 -07:00
|
|
|
*BC.MRI, *BC.MIB,
|
|
|
|
|
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
|
2021-03-15 16:34:25 -07:00
|
|
|
BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
|
|
|
|
|
// JT matching failed. Trying now:
|
|
|
|
|
// PIC-style matcher, scale 4
|
|
|
|
|
// addq %rdx, %rsi
|
|
|
|
|
// addq %rdx, %rdi
|
|
|
|
|
// leaq DATAat0x402450(%rip), %r11
|
|
|
|
|
// movslq (%r11,%rdx,4), %rcx
|
|
|
|
|
// addq %r11, %rcx
|
|
|
|
|
// jmpq *%rcx # JUMPTABLE @0x402450
|
2021-04-08 00:19:26 -07:00
|
|
|
std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher =
|
|
|
|
|
BC.MIB->matchIndJmp(BC.MIB->matchAdd(
|
|
|
|
|
BC.MIB->matchReg(BaseReg1),
|
|
|
|
|
BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
|
|
|
|
|
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
|
|
|
|
|
BC.MIB->matchImm(Offset))));
|
|
|
|
|
std::unique_ptr<MCPlusBuilder::MCInstMatcher> LEAMatcherOwner =
|
2021-03-15 16:34:25 -07:00
|
|
|
BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
|
2021-04-08 00:19:26 -07:00
|
|
|
MCPlusBuilder::MCInstMatcher *LEAMatcher = LEAMatcherOwner.get();
|
|
|
|
|
std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICBaseAddrMatcher =
|
|
|
|
|
BC.MIB->matchIndJmp(BC.MIB->matchAdd(std::move(LEAMatcherOwner),
|
|
|
|
|
BC.MIB->matchAnyOperand()));
|
2021-03-15 16:34:25 -07:00
|
|
|
if (!PICIndJmpMatcher->match(
|
|
|
|
|
*BC.MRI, *BC.MIB,
|
|
|
|
|
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
|
|
|
|
|
Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
|
|
|
|
|
!PICBaseAddrMatcher->match(
|
|
|
|
|
*BC.MRI, *BC.MIB,
|
|
|
|
|
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
|
|
|
|
|
llvm_unreachable("Failed to extract jump table base");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// Matched PIC, identify the instruction with the reference to the JT
|
|
|
|
|
JTLoadInst = LEAMatcher->CurInst;
|
|
|
|
|
} else {
|
|
|
|
|
// Matched non-PIC
|
|
|
|
|
JTLoadInst = LoadMatcher->CurInst;
|
2019-07-02 16:56:41 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t NewJumpTableID{0};
|
|
|
|
|
const MCSymbol *NewJTLabel;
|
|
|
|
|
std::tie(NewJumpTableID, NewJTLabel) =
|
|
|
|
|
BC.duplicateJumpTable(*this, JT, Target);
|
2019-08-07 16:09:50 -07:00
|
|
|
{
|
|
|
|
|
auto L = BC.scopeLock();
|
|
|
|
|
BC.MIB->replaceMemOperandDisp(*JTLoadInst, NewJTLabel, BC.Ctx.get());
|
|
|
|
|
}
|
2019-07-02 16:56:41 -07:00
|
|
|
// We use a unique ID with the high bit set as address for this "injected"
|
|
|
|
|
// jump table (not originally in the input binary).
|
2019-08-07 16:09:50 -07:00
|
|
|
BC.MIB->setJumpTable(Inst, NewJumpTableID, 0, AllocId);
|
2019-07-02 16:56:41 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-01 16:52:54 -07:00
|
|
|
bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
|
|
|
|
|
BinaryBasicBlock *OldDest,
|
|
|
|
|
BinaryBasicBlock *NewDest) {
|
2021-04-08 00:19:26 -07:00
|
|
|
MCInst *Instr = BB->getLastNonPseudoInstr();
|
2018-03-09 09:45:13 -08:00
|
|
|
if (!Instr || !BC.MIB->isIndirectBranch(*Instr))
|
2017-05-01 16:52:54 -07:00
|
|
|
return false;
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t JTAddress = BC.MIB->getJumpTable(*Instr);
|
2017-05-01 16:52:54 -07:00
|
|
|
assert(JTAddress && "Invalid jump table address");
|
2021-04-08 00:19:26 -07:00
|
|
|
JumpTable *JT = getJumpTableContainingAddress(JTAddress);
|
2017-05-01 16:52:54 -07:00
|
|
|
assert(JT && "No jump table structure for this indirect branch");
|
|
|
|
|
bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
|
|
|
|
|
NewDest->getLabel());
|
|
|
|
|
assert(Patched && "Invalid entry to be replaced in jump table");
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
|
|
|
|
|
BinaryBasicBlock *To) {
|
|
|
|
|
// Create intermediate BB
|
2019-07-02 10:48:43 -07:00
|
|
|
MCSymbol *Tmp;
|
|
|
|
|
{
|
2019-08-07 16:09:50 -07:00
|
|
|
auto L = BC.scopeLock();
|
2020-12-01 16:29:39 -08:00
|
|
|
Tmp = BC.Ctx->createNamedTempSymbol("SplitEdge");
|
2019-07-02 10:48:43 -07:00
|
|
|
}
|
2019-04-12 17:33:46 -07:00
|
|
|
// Link new BBs to the original input offset of the From BB, so we can map
|
|
|
|
|
// samples recorded in new BBs back to the original BB seem in the input
|
|
|
|
|
// binary (if using BAT)
|
2021-04-08 00:19:26 -07:00
|
|
|
std::unique_ptr<BinaryBasicBlock> NewBB =
|
|
|
|
|
createBasicBlock(From->getInputOffset(), Tmp);
|
|
|
|
|
BinaryBasicBlock *NewBBPtr = NewBB.get();
|
2017-05-01 16:52:54 -07:00
|
|
|
|
|
|
|
|
// Update "From" BB
|
|
|
|
|
auto I = From->succ_begin();
|
|
|
|
|
auto BI = From->branch_info_begin();
|
|
|
|
|
for (; I != From->succ_end(); ++I) {
|
|
|
|
|
if (*I == To)
|
|
|
|
|
break;
|
|
|
|
|
++BI;
|
|
|
|
|
}
|
|
|
|
|
assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
|
|
|
|
|
uint64_t OrigCount{BI->Count};
|
|
|
|
|
uint64_t OrigMispreds{BI->MispredictedCount};
|
|
|
|
|
replaceJumpTableEntryIn(From, To, NewBBPtr);
|
|
|
|
|
From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);
|
|
|
|
|
|
|
|
|
|
NewBB->addSuccessor(To, OrigCount, OrigMispreds);
|
|
|
|
|
NewBB->setExecutionCount(OrigCount);
|
|
|
|
|
NewBB->setIsCold(From->isCold());
|
|
|
|
|
|
|
|
|
|
// Update CFI and BB layout with new intermediate BB
|
|
|
|
|
std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
|
|
|
|
|
NewBBs.emplace_back(std::move(NewBB));
|
2019-07-02 16:56:41 -07:00
|
|
|
insertBasicBlocks(From, std::move(NewBBs), true, true,
|
|
|
|
|
/*RecomputeLandingPads=*/false);
|
2017-05-01 16:52:54 -07:00
|
|
|
return NewBBPtr;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-13 17:27:03 -08:00
|
|
|
void BinaryFunction::deleteConservativeEdges() {
|
|
|
|
|
// Our goal is to aggressively remove edges from the CFG that we believe are
|
|
|
|
|
// wrong. This is used for instrumentation, where it is safe to remove
|
|
|
|
|
// fallthrough edges because we won't reorder blocks.
|
|
|
|
|
for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *BB = *I;
|
2019-12-13 17:27:03 -08:00
|
|
|
if (BB->succ_size() != 1 || BB->size() == 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
auto NextBB = std::next(I);
|
2020-07-16 17:35:55 -07:00
|
|
|
MCInst* Last = BB->getLastNonPseudoInstr();
|
2019-12-13 17:27:03 -08:00
|
|
|
// Fallthrough is a landing pad? Delete this edge (as long as we don't
|
|
|
|
|
// have a direct jump to it)
|
|
|
|
|
if ((*BB->succ_begin())->isLandingPad() && NextBB != E &&
|
|
|
|
|
*BB->succ_begin() == *NextBB && Last && !BC.MIB->isBranch(*Last)) {
|
|
|
|
|
BB->removeAllSuccessors();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Look for suspicious calls at the end of BB where gcc may optimize it and
|
|
|
|
|
// remove the jump to the epilogue when it knows the call won't return.
|
|
|
|
|
if (!Last || !BC.MIB->isCall(*Last))
|
|
|
|
|
continue;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(*Last);
|
2019-12-13 17:27:03 -08:00
|
|
|
if (!CalleeSymbol)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
StringRef CalleeName = CalleeSymbol->getName();
|
2020-07-16 17:35:55 -07:00
|
|
|
if (CalleeName != "__cxa_throw@PLT" &&
|
|
|
|
|
CalleeName != "_Unwind_Resume@PLT" &&
|
|
|
|
|
CalleeName != "__cxa_rethrow@PLT" &&
|
|
|
|
|
CalleeName != "exit@PLT" &&
|
|
|
|
|
CalleeName != "abort@PLT" )
|
2019-12-13 17:27:03 -08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
BB->removeAllSuccessors();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-20 10:43:01 -07:00
|
|
|
bool BinaryFunction::isDataMarker(const SymbolRef &Symbol,
|
|
|
|
|
uint64_t SymbolSize) const {
|
|
|
|
|
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
|
|
|
|
// code section (see IHI0056B). $d identifies a symbol starting data contents.
|
2018-03-20 14:34:58 -07:00
|
|
|
if (BC.isAArch64() && Symbol.getType() &&
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
|
|
|
|
|
Symbol.getName() && cantFail(Symbol.getName()) == "$d")
|
2017-09-20 10:43:01 -07:00
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool BinaryFunction::isCodeMarker(const SymbolRef &Symbol,
|
|
|
|
|
uint64_t SymbolSize) const {
|
|
|
|
|
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
|
|
|
|
// code section (see IHI0056B). $x identifies a symbol starting code or the
|
|
|
|
|
// end of a data chunk inside code.
|
2018-03-20 14:34:58 -07:00
|
|
|
if (BC.isAArch64() && Symbol.getType() &&
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
cantFail(Symbol.getType()) == SymbolRef::ST_Unknown && SymbolSize == 0 &&
|
|
|
|
|
Symbol.getName() && cantFail(Symbol.getName()) == "$x")
|
2017-09-20 10:43:01 -07:00
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
|
|
|
|
|
uint64_t SymbolSize) const {
|
2019-04-16 14:35:29 -07:00
|
|
|
// If this symbol is in a different section from the one where the
|
|
|
|
|
// function symbol is, don't consider it as valid.
|
2020-10-09 16:06:27 -07:00
|
|
|
if (!getOriginSection()->containsAddress(
|
2019-04-16 14:35:29 -07:00
|
|
|
cantFail(Symbol.getAddress(), "cannot get symbol address")))
|
|
|
|
|
return false;
|
|
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
// Some symbols are tolerated inside function bodies, others are not.
|
|
|
|
|
// The real function boundaries may not be known at this point.
|
2017-09-20 10:43:01 -07:00
|
|
|
if (isDataMarker(Symbol, SymbolSize) || isCodeMarker(Symbol, SymbolSize))
|
|
|
|
|
return true;
|
2016-09-29 11:19:06 -07:00
|
|
|
|
|
|
|
|
// It's okay to have a zero-sized symbol in the middle of non-zero-sized
|
|
|
|
|
// function.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (SymbolSize == 0 && containsAddress(cantFail(Symbol.getAddress())))
|
2016-09-29 11:19:06 -07:00
|
|
|
return true;
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (cantFail(Symbol.getType()) != SymbolRef::ST_Unknown)
|
2016-09-29 11:19:06 -07:00
|
|
|
return false;
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
|
2016-09-29 11:19:06 -07:00
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-31 11:23:02 -08:00
|
|
|
void BinaryFunction::adjustExecutionCount(uint64_t Count) {
|
|
|
|
|
if (getKnownExecutionCount() == 0 || Count == 0)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (ExecutionCount < Count)
|
|
|
|
|
Count = ExecutionCount;
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
double AdjustmentRatio = ((double) ExecutionCount - Count) / ExecutionCount;
|
2019-01-31 11:23:02 -08:00
|
|
|
if (AdjustmentRatio < 0.0)
|
|
|
|
|
AdjustmentRatio = 0.0;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *&BB : layout())
|
2019-01-31 11:23:02 -08:00
|
|
|
BB->adjustExecutionCount(AdjustmentRatio);
|
|
|
|
|
|
|
|
|
|
ExecutionCount -= Count;
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-07 16:27:52 -07:00
|
|
|
BinaryFunction::~BinaryFunction() {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
2016-06-07 16:27:52 -07:00
|
|
|
delete BB;
|
|
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryBasicBlock *BB : DeletedBasicBlocks) {
|
2016-09-07 18:59:23 -07:00
|
|
|
delete BB;
|
|
|
|
|
}
|
2016-06-07 16:27:52 -07:00
|
|
|
}
|
|
|
|
|
|
2016-05-26 10:58:01 -07:00
|
|
|
void BinaryFunction::calculateLoopInfo() {
|
|
|
|
|
// Discover loops.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
BinaryDominatorTree DomTree;
|
|
|
|
|
DomTree.recalculate(*this);
|
2016-05-26 10:58:01 -07:00
|
|
|
BLI.reset(new BinaryLoopInfo());
|
|
|
|
|
BLI->analyze(DomTree);
|
|
|
|
|
|
|
|
|
|
// Traverse discovered loops and add depth and profile information.
|
|
|
|
|
std::stack<BinaryLoop *> St;
|
|
|
|
|
for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
|
|
|
|
|
St.push(*I);
|
|
|
|
|
++BLI->OuterLoops;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (!St.empty()) {
|
|
|
|
|
BinaryLoop *L = St.top();
|
|
|
|
|
St.pop();
|
|
|
|
|
++BLI->TotalLoops;
|
|
|
|
|
BLI->MaximumDepth = std::max(L->getLoopDepth(), BLI->MaximumDepth);
|
|
|
|
|
|
|
|
|
|
// Add nested loops in the stack.
|
|
|
|
|
for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
|
|
|
|
|
St.push(*I);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skip if no valid profile is found.
|
|
|
|
|
if (!hasValidProfile()) {
|
|
|
|
|
L->EntryCount = COUNT_NO_PROFILE;
|
|
|
|
|
L->ExitCount = COUNT_NO_PROFILE;
|
|
|
|
|
L->TotalBackEdgeCount = COUNT_NO_PROFILE;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compute back edge count.
|
|
|
|
|
SmallVector<BinaryBasicBlock *, 1> Latches;
|
|
|
|
|
L->getLoopLatches(Latches);
|
|
|
|
|
|
|
|
|
|
for (BinaryBasicBlock *Latch : Latches) {
|
2016-09-13 17:12:00 -07:00
|
|
|
auto BI = Latch->branch_info_begin();
|
2016-05-26 10:58:01 -07:00
|
|
|
for (BinaryBasicBlock *Succ : Latch->successors()) {
|
|
|
|
|
if (Succ == L->getHeader()) {
|
2016-12-21 17:13:56 -08:00
|
|
|
assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
|
2016-05-26 10:58:01 -07:00
|
|
|
"profile data not found");
|
|
|
|
|
L->TotalBackEdgeCount += BI->Count;
|
|
|
|
|
}
|
|
|
|
|
++BI;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compute entry count.
|
|
|
|
|
L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount;
|
|
|
|
|
|
|
|
|
|
// Compute exit count.
|
|
|
|
|
SmallVector<BinaryLoop::Edge, 1> ExitEdges;
|
|
|
|
|
L->getExitEdges(ExitEdges);
|
|
|
|
|
for (BinaryLoop::Edge &Exit : ExitEdges) {
|
|
|
|
|
const BinaryBasicBlock *Exiting = Exit.first;
|
|
|
|
|
const BinaryBasicBlock *ExitTarget = Exit.second;
|
2016-09-13 17:12:00 -07:00
|
|
|
auto BI = Exiting->branch_info_begin();
|
2016-05-26 10:58:01 -07:00
|
|
|
for (BinaryBasicBlock *Succ : Exiting->successors()) {
|
|
|
|
|
if (Succ == ExitTarget) {
|
2016-12-21 17:13:56 -08:00
|
|
|
assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
|
2016-05-26 10:58:01 -07:00
|
|
|
"profile data not found");
|
|
|
|
|
L->ExitCount += BI->Count;
|
|
|
|
|
}
|
|
|
|
|
++BI;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-03 21:57:15 -08:00
|
|
|
void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
|
|
|
|
|
if (!isEmitted()) {
|
|
|
|
|
assert(!isInjected() && "injected function should be emitted");
|
|
|
|
|
setOutputAddress(getAddress());
|
|
|
|
|
setOutputSize(getSize());
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t BaseAddress = getCodeSection()->getOutputAddress();
|
|
|
|
|
ErrorOr<BinarySection &> ColdSection = getColdCodeSection();
|
|
|
|
|
const uint64_t ColdBaseAddress =
|
|
|
|
|
isSplit() ? ColdSection->getOutputAddress() : 0;
|
2019-11-03 21:57:15 -08:00
|
|
|
if (BC.HasRelocations || isInjected()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t StartOffset = Layout.getSymbolOffset(*getSymbol());
|
|
|
|
|
const uint64_t EndOffset = Layout.getSymbolOffset(*getFunctionEndLabel());
|
2019-11-03 21:57:15 -08:00
|
|
|
setOutputAddress(BaseAddress + StartOffset);
|
|
|
|
|
setOutputSize(EndOffset - StartOffset);
|
|
|
|
|
if (hasConstantIsland()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t DataOffset =
|
2019-11-03 21:57:15 -08:00
|
|
|
Layout.getSymbolOffset(*getFunctionConstantIslandLabel());
|
|
|
|
|
setOutputDataAddress(BaseAddress + DataOffset);
|
|
|
|
|
}
|
|
|
|
|
if (isSplit()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCSymbol *ColdStartSymbol = getColdSymbol();
|
2019-11-03 21:57:15 -08:00
|
|
|
assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
|
|
|
|
|
"split function should have defined cold symbol");
|
2021-04-08 00:19:26 -07:00
|
|
|
const MCSymbol *ColdEndSymbol = getFunctionColdEndLabel();
|
2019-11-03 21:57:15 -08:00
|
|
|
assert(ColdEndSymbol && ColdEndSymbol->isDefined() &&
|
|
|
|
|
"split function should have defined cold end symbol");
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t ColdStartOffset = Layout.getSymbolOffset(*ColdStartSymbol);
|
|
|
|
|
const uint64_t ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol);
|
2019-11-03 21:57:15 -08:00
|
|
|
cold().setAddress(ColdBaseAddress + ColdStartOffset);
|
|
|
|
|
cold().setImageSize(ColdEndOffset - ColdStartOffset);
|
|
|
|
|
if (hasConstantIsland()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t DataOffset =
|
|
|
|
|
Layout.getSymbolOffset(*getFunctionColdConstantIslandLabel());
|
2019-11-03 21:57:15 -08:00
|
|
|
setOutputColdDataAddress(ColdBaseAddress + DataOffset);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
setOutputAddress(getAddress());
|
2020-07-16 17:35:55 -07:00
|
|
|
setOutputSize(
|
|
|
|
|
Layout.getSymbolOffset(*getFunctionEndLabel()));
|
2019-11-03 21:57:15 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update basic block output ranges for the debug info, if we have
|
|
|
|
|
// secondary entry points in the symbol table to update or if writing BAT.
|
|
|
|
|
if (!opts::UpdateDebugSections && !isMultiEntry() &&
|
|
|
|
|
!requiresAddressTranslation())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// Output ranges should match the input if the body hasn't changed.
|
|
|
|
|
if (!isSimple() && !BC.HasRelocations)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// AArch64 may have functions that only contains a constant island (no code).
|
|
|
|
|
if (layout_begin() == layout_end())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
BinaryBasicBlock *PrevBB = nullptr;
|
|
|
|
|
for (auto BBI = layout_begin(), BBE = layout_end(); BBI != BBE; ++BBI) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *BB = *BBI;
|
2019-11-03 21:57:15 -08:00
|
|
|
assert(BB->getLabel()->isDefined() && "symbol should be defined");
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t BBBaseAddress = BB->isCold() ? ColdBaseAddress : BaseAddress;
|
2019-11-03 21:57:15 -08:00
|
|
|
if (!BC.HasRelocations) {
|
|
|
|
|
if (BB->isCold()) {
|
|
|
|
|
assert(BBBaseAddress == cold().getAddress());
|
|
|
|
|
} else {
|
|
|
|
|
assert(BBBaseAddress == getOutputAddress());
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel());
|
|
|
|
|
const uint64_t BBAddress = BBBaseAddress + BBOffset;
|
2019-11-03 21:57:15 -08:00
|
|
|
BB->setOutputStartAddress(BBAddress);
|
|
|
|
|
|
|
|
|
|
if (PrevBB) {
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t PrevBBEndAddress = BBAddress;
|
2019-11-03 21:57:15 -08:00
|
|
|
if (BB->isCold() != PrevBB->isCold()) {
|
2020-07-16 17:35:55 -07:00
|
|
|
PrevBBEndAddress =
|
|
|
|
|
getOutputAddress() + getOutputSize();
|
2019-11-03 21:57:15 -08:00
|
|
|
}
|
|
|
|
|
PrevBB->setOutputEndAddress(PrevBBEndAddress);
|
|
|
|
|
}
|
|
|
|
|
PrevBB = BB;
|
|
|
|
|
|
|
|
|
|
BB->updateOutputValues(Layout);
|
|
|
|
|
}
|
2020-07-16 17:35:55 -07:00
|
|
|
PrevBB->setOutputEndAddress(PrevBB->isCold() ?
|
|
|
|
|
cold().getAddress() + cold().getImageSize() :
|
|
|
|
|
getOutputAddress() + getOutputSize());
|
2019-11-03 21:57:15 -08:00
|
|
|
}
|
|
|
|
|
|
2019-03-29 14:22:54 -07:00
|
|
|
DebugAddressRangesVector BinaryFunction::getOutputAddressRanges() const {
|
|
|
|
|
DebugAddressRangesVector OutputRanges;
|
2017-05-16 09:27:34 -07:00
|
|
|
|
2020-10-16 00:11:24 -07:00
|
|
|
if (isFolded())
|
|
|
|
|
return OutputRanges;
|
|
|
|
|
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
if (IsFragment)
|
|
|
|
|
return OutputRanges;
|
|
|
|
|
|
2017-05-16 09:27:34 -07:00
|
|
|
OutputRanges.emplace_back(getOutputAddress(),
|
|
|
|
|
getOutputAddress() + getOutputSize());
|
|
|
|
|
if (isSplit()) {
|
|
|
|
|
assert(isEmitted() && "split function should be emitted");
|
|
|
|
|
OutputRanges.emplace_back(cold().getAddress(),
|
|
|
|
|
cold().getAddress() + cold().getImageSize());
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
if (isSimple())
|
|
|
|
|
return OutputRanges;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryFunction *Frag : Fragments) {
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
assert(!Frag->isSimple() &&
|
|
|
|
|
"fragment of non-simple function should also be non-simple");
|
|
|
|
|
OutputRanges.emplace_back(Frag->getOutputAddress(),
|
|
|
|
|
Frag->getOutputAddress() + Frag->getOutputSize());
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-16 09:27:34 -07:00
|
|
|
return OutputRanges;
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-31 09:36:49 -07:00
|
|
|
uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
|
2020-10-16 00:11:24 -07:00
|
|
|
if (isFolded())
|
|
|
|
|
return 0;
|
|
|
|
|
|
2017-05-31 09:36:49 -07:00
|
|
|
// If the function hasn't changed return the same address.
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (!isEmitted())
|
2017-05-31 09:36:49 -07:00
|
|
|
return Address;
|
|
|
|
|
|
|
|
|
|
if (Address < getAddress())
|
|
|
|
|
return 0;
|
|
|
|
|
|
2019-11-03 21:57:15 -08:00
|
|
|
// Check if the address is associated with an instruction that is tracked
|
|
|
|
|
// by address translation.
|
|
|
|
|
auto KV = InputOffsetToAddressMap.find(Address - getAddress());
|
|
|
|
|
if (KV != InputOffsetToAddressMap.end()) {
|
|
|
|
|
return KV->second;
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-31 09:36:49 -07:00
|
|
|
// FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
|
|
|
|
|
// intact. Instead we can use pseudo instructions and/or annotations.
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t Offset = Address - getAddress();
|
|
|
|
|
const BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
|
2017-05-31 09:36:49 -07:00
|
|
|
if (!BB) {
|
|
|
|
|
// Special case for address immediately past the end of the function.
|
|
|
|
|
if (Offset == getSize())
|
|
|
|
|
return getOutputAddress() + getOutputSize();
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return std::min(BB->getOutputAddressRange().first + Offset - BB->getOffset(),
|
|
|
|
|
BB->getOutputAddressRange().second);
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-29 14:22:54 -07:00
|
|
|
DebugAddressRangesVector BinaryFunction::translateInputToOutputRanges(
|
2017-05-24 15:20:27 -07:00
|
|
|
const DWARFAddressRangesVector &InputRanges) const {
|
2019-03-29 14:22:54 -07:00
|
|
|
DebugAddressRangesVector OutputRanges;
|
|
|
|
|
|
2020-10-16 00:11:24 -07:00
|
|
|
if (isFolded())
|
|
|
|
|
return OutputRanges;
|
|
|
|
|
|
2017-05-24 15:20:27 -07:00
|
|
|
// If the function hasn't changed return the same ranges.
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (!isEmitted()) {
|
2019-03-29 14:22:54 -07:00
|
|
|
OutputRanges.resize(InputRanges.size());
|
2020-07-16 17:35:55 -07:00
|
|
|
std::transform(InputRanges.begin(), InputRanges.end(),
|
|
|
|
|
OutputRanges.begin(),
|
2019-03-29 14:22:54 -07:00
|
|
|
[](const DWARFAddressRange &Range) {
|
|
|
|
|
return DebugAddressRange(Range.LowPC, Range.HighPC);
|
|
|
|
|
});
|
|
|
|
|
return OutputRanges;
|
|
|
|
|
}
|
2017-05-16 09:27:34 -07:00
|
|
|
|
2017-05-24 15:20:27 -07:00
|
|
|
// Even though we will merge ranges in a post-processing pass, we attempt to
|
|
|
|
|
// merge them in a main processing loop as it improves the processing time.
|
2017-05-16 09:27:34 -07:00
|
|
|
uint64_t PrevEndAddress = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const DWARFAddressRange &Range : InputRanges) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (!containsAddress(Range.LowPC)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
|
|
|
|
|
<< *this << " : [0x" << Twine::utohexstr(Range.LowPC) << ", 0x"
|
|
|
|
|
<< Twine::utohexstr(Range.HighPC) << "]\n");
|
2017-05-16 09:27:34 -07:00
|
|
|
PrevEndAddress = 0;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t InputOffset = Range.LowPC - getAddress();
|
|
|
|
|
const uint64_t InputEndOffset =
|
|
|
|
|
std::min(Range.HighPC - getAddress(), getSize());
|
2017-05-24 15:20:27 -07:00
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
auto BBI = std::upper_bound(BasicBlockOffsets.begin(),
|
|
|
|
|
BasicBlockOffsets.end(),
|
|
|
|
|
BasicBlockOffset(InputOffset, nullptr),
|
|
|
|
|
CompareBasicBlockOffsets());
|
2017-05-24 15:20:27 -07:00
|
|
|
--BBI;
|
2017-05-16 09:27:34 -07:00
|
|
|
do {
|
2021-04-08 00:19:26 -07:00
|
|
|
const BinaryBasicBlock *BB = BBI->second;
|
2017-05-24 15:20:27 -07:00
|
|
|
if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
|
|
|
|
|
<< *this << " : [0x" << Twine::utohexstr(Range.LowPC)
|
|
|
|
|
<< ", 0x" << Twine::utohexstr(Range.HighPC) << "]\n");
|
2017-05-16 09:27:34 -07:00
|
|
|
PrevEndAddress = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skip the range if the block was deleted.
|
2021-04-08 00:19:26 -07:00
|
|
|
if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
|
|
|
|
|
const uint64_t StartAddress =
|
|
|
|
|
OutputStart + InputOffset - BB->getOffset();
|
|
|
|
|
uint64_t EndAddress = BB->getOutputAddressRange().second;
|
2017-05-16 09:27:34 -07:00
|
|
|
if (InputEndOffset < BB->getEndOffset())
|
|
|
|
|
EndAddress = StartAddress + InputEndOffset - InputOffset;
|
|
|
|
|
|
|
|
|
|
if (StartAddress == PrevEndAddress) {
|
2020-07-16 17:35:55 -07:00
|
|
|
OutputRanges.back().HighPC = std::max(OutputRanges.back().HighPC,
|
|
|
|
|
EndAddress);
|
2017-05-16 09:27:34 -07:00
|
|
|
} else {
|
2017-05-24 15:20:27 -07:00
|
|
|
OutputRanges.emplace_back(StartAddress,
|
|
|
|
|
std::max(StartAddress, EndAddress));
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
PrevEndAddress = OutputRanges.back().HighPC;
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
InputOffset = BB->getEndOffset();
|
2017-05-24 15:20:27 -07:00
|
|
|
++BBI;
|
2017-05-16 09:27:34 -07:00
|
|
|
} while (InputOffset < InputEndOffset);
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-24 15:20:27 -07:00
|
|
|
// Post-processing pass to sort and merge ranges.
|
|
|
|
|
std::sort(OutputRanges.begin(), OutputRanges.end());
|
2019-03-29 14:22:54 -07:00
|
|
|
DebugAddressRangesVector MergedRanges;
|
2017-05-24 15:20:27 -07:00
|
|
|
PrevEndAddress = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const DebugAddressRange &Range : OutputRanges) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (Range.LowPC <= PrevEndAddress) {
|
2020-07-16 17:35:55 -07:00
|
|
|
MergedRanges.back().HighPC = std::max(MergedRanges.back().HighPC,
|
|
|
|
|
Range.HighPC);
|
2017-05-24 15:20:27 -07:00
|
|
|
} else {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
MergedRanges.emplace_back(Range.LowPC, Range.HighPC);
|
2017-05-24 15:20:27 -07:00
|
|
|
}
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
PrevEndAddress = MergedRanges.back().HighPC;
|
2017-05-24 15:20:27 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return MergedRanges;
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
|
|
|
|
|
if (CurrentState == State::Disassembled) {
|
|
|
|
|
auto II = Instructions.find(Offset);
|
|
|
|
|
return (II == Instructions.end()) ? nullptr : &II->second;
|
|
|
|
|
} else if (CurrentState == State::CFG) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
|
2017-11-28 09:57:21 -08:00
|
|
|
if (!BB)
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (MCInst &Inst : *BB) {
|
|
|
|
|
constexpr uint32_t InvalidOffset = std::numeric_limits<uint32_t>::max();
|
2019-04-12 17:33:46 -07:00
|
|
|
if (Offset == BC.MIB->getAnnotationWithDefault<uint32_t>(Inst, "Offset",
|
2017-11-28 09:57:21 -08:00
|
|
|
InvalidOffset))
|
|
|
|
|
return &Inst;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
} else {
|
|
|
|
|
llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
DebugLocationsVector BinaryFunction::translateInputToOutputLocationList(
|
|
|
|
|
const DebugLocationsVector &InputLL) const {
|
|
|
|
|
DebugLocationsVector OutputLL;
|
2020-10-16 00:11:24 -07:00
|
|
|
|
|
|
|
|
if (isFolded()) {
|
|
|
|
|
return OutputLL;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-24 22:05:37 -07:00
|
|
|
// If the function hasn't changed - there's nothing to update.
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (!isEmitted()) {
|
2020-03-24 22:05:37 -07:00
|
|
|
return InputLL;
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t PrevEndAddress = 0;
|
2020-12-01 16:29:39 -08:00
|
|
|
SmallVectorImpl<uint8_t> *PrevExpr = nullptr;
|
|
|
|
|
for (const DebugLocationEntry &Entry : InputLL) {
|
|
|
|
|
const uint64_t Start = Entry.LowPC;
|
|
|
|
|
const uint64_t End = Entry.HighPC;
|
2017-05-16 09:27:34 -07:00
|
|
|
if (!containsAddress(Start)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
|
|
|
|
|
"for "
|
|
|
|
|
<< *this << " : [0x" << Twine::utohexstr(Start)
|
|
|
|
|
<< ", 0x" << Twine::utohexstr(End) << "]\n");
|
2017-05-16 09:27:34 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t InputOffset = Start - getAddress();
|
|
|
|
|
const uint64_t InputEndOffset = std::min(End - getAddress(), getSize());
|
2020-07-16 17:35:55 -07:00
|
|
|
auto BBI = std::upper_bound(BasicBlockOffsets.begin(),
|
|
|
|
|
BasicBlockOffsets.end(),
|
|
|
|
|
BasicBlockOffset(InputOffset, nullptr),
|
|
|
|
|
CompareBasicBlockOffsets());
|
2017-05-24 15:20:27 -07:00
|
|
|
--BBI;
|
2017-05-16 09:27:34 -07:00
|
|
|
do {
|
2021-04-08 00:19:26 -07:00
|
|
|
const BinaryBasicBlock *BB = BBI->second;
|
2017-05-24 15:20:27 -07:00
|
|
|
if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected "
|
|
|
|
|
"for "
|
|
|
|
|
<< *this << " : [0x" << Twine::utohexstr(Start)
|
|
|
|
|
<< ", 0x" << Twine::utohexstr(End) << "]\n");
|
2017-05-16 09:27:34 -07:00
|
|
|
PrevEndAddress = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skip the range if the block was deleted.
|
2021-04-08 00:19:26 -07:00
|
|
|
if (const uint64_t OutputStart = BB->getOutputAddressRange().first) {
|
|
|
|
|
const uint64_t StartAddress =
|
|
|
|
|
OutputStart + InputOffset - BB->getOffset();
|
|
|
|
|
uint64_t EndAddress = BB->getOutputAddressRange().second;
|
2017-05-16 09:27:34 -07:00
|
|
|
if (InputEndOffset < BB->getEndOffset())
|
|
|
|
|
EndAddress = StartAddress + InputEndOffset - InputOffset;
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
if (StartAddress == PrevEndAddress && Entry.Expr == *PrevExpr) {
|
|
|
|
|
OutputLL.back().HighPC = std::max(OutputLL.back().HighPC, EndAddress);
|
2017-05-16 09:27:34 -07:00
|
|
|
} else {
|
2020-12-01 16:29:39 -08:00
|
|
|
OutputLL.emplace_back(
|
|
|
|
|
DebugLocationEntry{StartAddress,
|
|
|
|
|
std::max(StartAddress, EndAddress),
|
|
|
|
|
Entry.Expr});
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
2020-12-01 16:29:39 -08:00
|
|
|
PrevEndAddress = OutputLL.back().HighPC;
|
|
|
|
|
PrevExpr = &OutputLL.back().Expr;
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
2017-05-24 15:20:27 -07:00
|
|
|
|
|
|
|
|
++BBI;
|
2017-05-16 09:27:34 -07:00
|
|
|
InputOffset = BB->getEndOffset();
|
|
|
|
|
} while (InputOffset < InputEndOffset);
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-24 15:20:27 -07:00
|
|
|
// Sort and merge adjacent entries with identical location.
|
2020-12-01 16:29:39 -08:00
|
|
|
std::stable_sort(OutputLL.begin(), OutputLL.end(),
|
|
|
|
|
[] (const DebugLocationEntry &A, const DebugLocationEntry &B) {
|
|
|
|
|
return A.LowPC < B.LowPC;
|
2017-05-24 15:20:27 -07:00
|
|
|
});
|
2020-12-01 16:29:39 -08:00
|
|
|
DebugLocationsVector MergedLL;
|
2017-05-24 15:20:27 -07:00
|
|
|
PrevEndAddress = 0;
|
2020-12-01 16:29:39 -08:00
|
|
|
PrevExpr = nullptr;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const DebugLocationEntry &Entry : OutputLL) {
|
2020-12-01 16:29:39 -08:00
|
|
|
if (Entry.LowPC <= PrevEndAddress && *PrevExpr == Entry.Expr) {
|
|
|
|
|
MergedLL.back().HighPC = std::max(Entry.HighPC, MergedLL.back().HighPC);
|
2017-05-24 15:20:27 -07:00
|
|
|
} else {
|
2020-12-01 16:29:39 -08:00
|
|
|
const uint64_t Begin = std::max(Entry.LowPC, PrevEndAddress);
|
|
|
|
|
const uint64_t End = std::max(Begin, Entry.HighPC);
|
|
|
|
|
MergedLL.emplace_back(DebugLocationEntry{Begin, End, Entry.Expr});
|
|
|
|
|
}
|
|
|
|
|
PrevEndAddress = MergedLL.back().HighPC;
|
|
|
|
|
PrevExpr = &MergedLL.back().Expr;
|
2017-05-24 15:20:27 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return MergedLL;
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
|
|
|
|
|
2016-05-26 10:58:01 -07:00
|
|
|
void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
|
2016-08-07 12:35:23 -07:00
|
|
|
OS << "Loop Info for Function \"" << *this << "\"";
|
2016-05-26 10:58:01 -07:00
|
|
|
if (hasValidProfile()) {
|
|
|
|
|
OS << " (count: " << getExecutionCount() << ")";
|
|
|
|
|
}
|
|
|
|
|
OS << "\n";
|
|
|
|
|
|
|
|
|
|
std::stack<BinaryLoop *> St;
|
|
|
|
|
for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
|
|
|
|
|
St.push(*I);
|
|
|
|
|
}
|
|
|
|
|
while (!St.empty()) {
|
|
|
|
|
BinaryLoop *L = St.top();
|
|
|
|
|
St.pop();
|
|
|
|
|
|
|
|
|
|
for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
|
|
|
|
|
St.push(*I);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!hasValidProfile())
|
|
|
|
|
continue;
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
OS << (L->getLoopDepth() > 1 ? "Nested" : "Outer") << " loop header: "
|
|
|
|
|
<< L->getHeader()->getName();
|
2016-05-26 10:58:01 -07:00
|
|
|
OS << "\n";
|
|
|
|
|
OS << "Loop basic blocks: ";
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Sep = "";
|
2016-05-26 10:58:01 -07:00
|
|
|
for (auto BI = L->block_begin(), BE = L->block_end(); BI != BE; ++BI) {
|
|
|
|
|
OS << Sep << (*BI)->getName();
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
OS << "\n";
|
|
|
|
|
if (hasValidProfile()) {
|
|
|
|
|
OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n";
|
|
|
|
|
OS << "Loop entry count: " << L->EntryCount << "\n";
|
|
|
|
|
OS << "Loop exit count: " << L->ExitCount << "\n";
|
|
|
|
|
if (L->EntryCount > 0) {
|
|
|
|
|
OS << "Average iters per entry: "
|
|
|
|
|
<< format("%.4lf", (double)L->TotalBackEdgeCount / L->EntryCount)
|
|
|
|
|
<< "\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
OS << "----\n";
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
OS << "Total number of loops: "<< BLI->TotalLoops << "\n";
|
2016-05-26 10:58:01 -07:00
|
|
|
OS << "Number of outer loops: " << BLI->OuterLoops << "\n";
|
|
|
|
|
OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-07 11:10:37 -07:00
|
|
|
bool BinaryFunction::isAArch64Veneer() const {
|
|
|
|
|
if (BasicBlocks.size() != 1)
|
|
|
|
|
return false;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryBasicBlock &BB = **BasicBlocks.begin();
|
2018-06-07 11:10:37 -07:00
|
|
|
if (BB.size() != 3)
|
|
|
|
|
return false;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (MCInst &Inst : BB) {
|
2018-06-07 11:10:37 -07:00
|
|
|
if (!BC.MIB->hasAnnotation(Inst, "AArch64Veneer"))
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-05 14:42:04 -08:00
|
|
|
} // namespace bolt
|
2015-10-09 17:21:14 -07:00
|
|
|
} // namespace llvm
|