2015-11-23 17:54:18 -08:00
|
|
|
//===--- RewriteInstance.cpp - Interface for machine-level function -------===//
|
|
|
|
|
//
|
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
|
//
|
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "BinaryBasicBlock.h"
|
|
|
|
|
#include "BinaryContext.h"
|
|
|
|
|
#include "BinaryFunction.h"
|
2016-04-15 15:59:52 -07:00
|
|
|
#include "BinaryPassManager.h"
|
2017-10-16 16:53:50 -07:00
|
|
|
#include "CacheMetrics.h"
|
2017-09-01 18:13:51 -07:00
|
|
|
#include "DataAggregator.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "DataReader.h"
|
|
|
|
|
#include "Exceptions.h"
|
2018-03-09 09:45:13 -08:00
|
|
|
#include "MCPlusBuilder.h"
|
2017-12-13 23:12:01 -08:00
|
|
|
#include "ProfileReader.h"
|
|
|
|
|
#include "ProfileWriter.h"
|
2015-12-18 17:00:46 -08:00
|
|
|
#include "RewriteInstance.h"
|
2017-07-17 11:22:22 -07:00
|
|
|
#include "llvm/ADT/Optional.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
#include "llvm/BinaryFormat/Dwarf.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
2016-03-02 18:40:10 -08:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
|
|
|
|
|
#include "llvm/MC/MCAsmBackend.h"
|
2016-03-28 17:45:22 -07:00
|
|
|
#include "llvm/MC/MCAsmLayout.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/MC/MCAsmInfo.h"
|
|
|
|
|
#include "llvm/MC/MCContext.h"
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
|
2016-03-02 18:40:10 -08:00
|
|
|
#include "llvm/MC/MCDwarf.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/MC/MCInstPrinter.h"
|
|
|
|
|
#include "llvm/MC/MCInstrAnalysis.h"
|
|
|
|
|
#include "llvm/MC/MCInstrInfo.h"
|
|
|
|
|
#include "llvm/MC/MCObjectFileInfo.h"
|
|
|
|
|
#include "llvm/MC/MCObjectStreamer.h"
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
#include "llvm/MC/MCObjectWriter.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
|
|
|
|
#include "llvm/MC/MCSection.h"
|
|
|
|
|
#include "llvm/MC/MCSectionELF.h"
|
|
|
|
|
#include "llvm/MC/MCStreamer.h"
|
|
|
|
|
#include "llvm/MC/MCSubtargetInfo.h"
|
|
|
|
|
#include "llvm/MC/MCSymbol.h"
|
|
|
|
|
#include "llvm/Object/ObjectFile.h"
|
2016-09-29 11:19:06 -07:00
|
|
|
#include "llvm/Object/SymbolicFile.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
|
#include "llvm/Support/CommandLine.h"
|
2016-09-27 19:09:38 -07:00
|
|
|
#include "llvm/Support/DataExtractor.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/Support/Errc.h"
|
|
|
|
|
#include "llvm/Support/ManagedStatic.h"
|
|
|
|
|
#include "llvm/Support/TargetSelect.h"
|
|
|
|
|
#include "llvm/Support/TargetRegistry.h"
|
2017-11-27 18:00:24 -08:00
|
|
|
#include "llvm/Support/Timer.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/Support/ToolOutputFile.h"
|
2017-05-24 14:14:16 -07:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/Target/TargetMachine.h"
|
|
|
|
|
#include <algorithm>
|
2016-01-26 16:03:58 -08:00
|
|
|
#include <fstream>
|
2015-11-23 17:54:18 -08:00
|
|
|
#include <stack>
|
|
|
|
|
#include <system_error>
|
|
|
|
|
|
|
|
|
|
#undef DEBUG_TYPE
|
2016-02-05 14:42:04 -08:00
|
|
|
#define DEBUG_TYPE "bolt"
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
using namespace object;
|
2016-02-05 14:42:04 -08:00
|
|
|
using namespace bolt;
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
namespace opts {
|
|
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
extern cl::OptionCategory BoltCategory;
|
2017-12-07 15:00:41 -08:00
|
|
|
extern cl::OptionCategory BoltDiffCategory;
|
2017-03-28 14:40:20 -07:00
|
|
|
extern cl::OptionCategory BoltOptCategory;
|
2017-09-01 18:13:51 -07:00
|
|
|
extern cl::OptionCategory BoltOutputCategory;
|
|
|
|
|
extern cl::OptionCategory AggregatorCategory;
|
2017-03-28 14:40:20 -07:00
|
|
|
|
2018-04-13 15:46:19 -07:00
|
|
|
extern cl::opt<MacroFusionType> AlignMacroOpFusion;
|
2017-01-17 15:49:59 -08:00
|
|
|
extern cl::opt<JumpTableSupportLevel> JumpTables;
|
2018-04-20 20:03:31 -07:00
|
|
|
extern cl::list<std::string> ReorderData;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2018-01-24 05:42:11 -08:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
ForceToDataRelocations("force-data-relocations",
|
|
|
|
|
cl::desc("force relocations to data sections to always be processed"),
|
|
|
|
|
cl::init(false),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
// Note: enabling this is liable to make things break.
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
|
AllowSectionRelocations("allow-section-relocations",
|
|
|
|
|
cl::desc("allow reordering of data referenced by section relocations "
|
|
|
|
|
"(experimental)"),
|
|
|
|
|
cl::init(false),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
|
|
|
|
|
2017-06-13 16:29:39 -07:00
|
|
|
static cl::opt<bool>
|
2017-10-16 16:53:50 -07:00
|
|
|
PrintCacheMetrics("print-cache-metrics",
|
|
|
|
|
cl::desc("calculate and print various metrics for instruction cache"),
|
2017-06-13 16:29:39 -07:00
|
|
|
cl::init(false),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
|
|
|
|
|
2017-09-01 18:13:51 -07:00
|
|
|
cl::opt<std::string>
|
2017-03-28 14:40:20 -07:00
|
|
|
OutputFilename("o",
|
|
|
|
|
cl::desc("<output file>"),
|
2017-12-07 15:00:41 -08:00
|
|
|
cl::Optional,
|
2017-09-01 18:13:51 -07:00
|
|
|
cl::cat(BoltOutputCategory));
|
2016-09-02 14:15:29 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
cl::opt<bool>
|
2017-03-28 14:40:20 -07:00
|
|
|
AllowStripped("allow-stripped",
|
|
|
|
|
cl::desc("allow processing of stripped binaries"),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-12-13 23:12:01 -08:00
|
|
|
static cl::opt<std::string>
|
|
|
|
|
BoltProfile("b",
|
|
|
|
|
cl::desc("<bolt profile>"),
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
static cl::list<std::string>
|
|
|
|
|
BreakFunctionNames("break-funcs",
|
|
|
|
|
cl::CommaSeparated,
|
|
|
|
|
cl::desc("list of functions to core dump on (debugging)"),
|
|
|
|
|
cl::value_desc("func1,func2,func3,..."),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-08-29 21:11:22 -07:00
|
|
|
|
2016-09-09 12:37:37 -07:00
|
|
|
cl::opt<bool>
|
2017-03-28 14:40:20 -07:00
|
|
|
DumpDotAll("dump-dot-all",
|
|
|
|
|
cl::desc("dump function CFGs to graphviz format after each stage"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-09-09 12:37:37 -07:00
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
DumpEHFrame("dump-eh-frame",
|
|
|
|
|
cl::desc("dump parsed .eh_frame (debugging)"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2017-02-22 11:29:52 -08:00
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
FixDebugInfoLargeFunctions("fix-debuginfo-large-functions",
|
|
|
|
|
cl::init(true),
|
|
|
|
|
cl::desc("do another pass if we encounter large functions, to correct their "
|
|
|
|
|
"debug info."),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::ReallyHidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
static cl::list<std::string>
|
|
|
|
|
FunctionNames("funcs",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::CommaSeparated,
|
|
|
|
|
cl::desc("list of functions to optimize"),
|
|
|
|
|
cl::value_desc("func1,func2,func3,..."),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-01-26 16:03:58 -08:00
|
|
|
static cl::opt<std::string>
|
2016-04-21 09:54:33 -07:00
|
|
|
FunctionNamesFile("funcs-file",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("file with list of functions to optimize"),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
static cl::list<std::string>
|
|
|
|
|
FunctionPadSpec("pad-funcs",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::CommaSeparated,
|
|
|
|
|
cl::desc("list of functions to pad with amount of bytes"),
|
|
|
|
|
cl::value_desc("func1:pad1,func2:pad2,func3:pad3,..."),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::opt<bool>
|
|
|
|
|
HotText("hot-text",
|
|
|
|
|
cl::desc("hot text symbols support (relocation mode)"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
HotData("hot-data",
|
|
|
|
|
cl::desc("hot data symbols support (relocation mode)"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
|
UpdateEnd("update-end",
|
|
|
|
|
cl::desc("update the _end symbol to point to the end of all data sections"),
|
|
|
|
|
cl::init(true),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
KeepTmp("keep-tmp",
|
|
|
|
|
cl::desc("preserve intermediate .o file"),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
MarkFuncs("mark-funcs",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("mark function boundaries with break instruction to make "
|
|
|
|
|
"sure we accidentally don't cross them"),
|
|
|
|
|
cl::ReallyHidden,
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-04-08 19:30:27 -07:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
static cl::opt<unsigned>
|
2016-04-21 09:54:33 -07:00
|
|
|
MaxFunctions("max-funcs",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("maximum number of functions to overwrite"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
static cl::opt<unsigned>
|
|
|
|
|
MaxDataRelocations("max-data-relocations",
|
|
|
|
|
cl::desc("maximum number of data relocations to process"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
cl::opt<bool>
|
2017-03-28 14:40:20 -07:00
|
|
|
PrintAll("print-all",
|
|
|
|
|
cl::desc("print functions after each stage"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-02-25 16:57:07 -08:00
|
|
|
|
2016-04-11 17:46:18 -07:00
|
|
|
static cl::opt<bool>
|
2017-03-28 14:40:20 -07:00
|
|
|
PrintCFG("print-cfg",
|
|
|
|
|
cl::desc("print functions after CFG construction"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-04-11 17:46:18 -07:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
static cl::opt<bool>
|
2017-03-28 14:40:20 -07:00
|
|
|
PrintDisasm("print-disasm",
|
|
|
|
|
cl::desc("print function after disassembly"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
PrintGlobals("print-globals",
|
|
|
|
|
cl::desc("print global symbols after disassembly"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
PrintSections("print-sections",
|
|
|
|
|
cl::desc("print all registered sections"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
static cl::opt<bool>
|
2017-03-28 14:40:20 -07:00
|
|
|
PrintLoopInfo("print-loops",
|
|
|
|
|
cl::desc("print loop related information"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
static cl::opt<cl::boolOrDefault>
|
|
|
|
|
RelocationMode("relocs",
|
|
|
|
|
cl::desc("use relocations in the binary (default=autodetect)"),
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2017-12-13 23:12:01 -08:00
|
|
|
static cl::opt<std::string>
|
|
|
|
|
SaveProfile("w",
|
|
|
|
|
cl::desc("save recorded profile to a file"),
|
|
|
|
|
cl::cat(BoltOutputCategory));
|
|
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
static cl::list<std::string>
|
|
|
|
|
SkipFunctionNames("skip-funcs",
|
|
|
|
|
cl::CommaSeparated,
|
|
|
|
|
cl::desc("list of functions to skip"),
|
|
|
|
|
cl::value_desc("func1,func2,func3,..."),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-07-01 08:40:56 -07:00
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
static cl::opt<std::string>
|
|
|
|
|
SkipFunctionNamesFile("skip-funcs-file",
|
|
|
|
|
cl::desc("file with list of functions to skip"),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::opt<BinaryFunction::SplittingType>
|
|
|
|
|
SplitFunctions("split-functions",
|
|
|
|
|
cl::desc("split functions into hot and cold regions"),
|
|
|
|
|
cl::init(BinaryFunction::ST_NONE),
|
|
|
|
|
cl::values(clEnumValN(BinaryFunction::ST_NONE, "0",
|
|
|
|
|
"do not split any function"),
|
|
|
|
|
clEnumValN(BinaryFunction::ST_EH, "1",
|
|
|
|
|
"split all landing pads"),
|
|
|
|
|
clEnumValN(BinaryFunction::ST_LARGE, "2",
|
|
|
|
|
"also split if function too large to fit"),
|
|
|
|
|
clEnumValN(BinaryFunction::ST_ALL, "3",
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
"split all functions")),
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
2016-05-26 10:58:01 -07:00
|
|
|
|
2018-06-25 14:55:48 -07:00
|
|
|
cl::opt<bool>
|
|
|
|
|
SplitEH("split-eh",
|
|
|
|
|
cl::desc("split C++ exception handling code"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
|
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::opt<bool>
|
|
|
|
|
TrapOldCode("trap-old-code",
|
|
|
|
|
cl::desc("insert traps in old function bodies (relocation mode)"),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
|
|
|
|
cl::opt<bool>
|
|
|
|
|
UpdateDebugSections("update-debug-sections",
|
|
|
|
|
cl::desc("update DWARF debug sections of the executable"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-02-08 10:08:28 -08:00
|
|
|
static cl::opt<bool>
|
2017-03-28 14:40:20 -07:00
|
|
|
UseGnuStack("use-gnu-stack",
|
|
|
|
|
cl::desc("use GNU_STACK program header for new segment (workaround for "
|
|
|
|
|
"issues with strip/objcopy)"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-07-11 18:51:13 -07:00
|
|
|
cl::opt<bool>
|
2017-03-28 14:40:20 -07:00
|
|
|
UseOldText("use-old-text",
|
|
|
|
|
cl::desc("re-use space in old .text if possible (relocation mode)"),
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
|
|
|
|
// The default verbosity level (0) is pretty terse, level 1 is fairly
|
|
|
|
|
// verbose and usually prints some informational message for every
|
|
|
|
|
// function processed. Level 2 is for the noisiest of messages and
|
|
|
|
|
// often prints a message per basic block.
|
|
|
|
|
// Error messages should never be suppressed by the verbosity level.
|
|
|
|
|
// Only warnings and info messages should be affected.
|
|
|
|
|
//
|
|
|
|
|
// The rational behind stream usage is as follows:
|
|
|
|
|
// outs() for info and debugging controlled by command line flags.
|
|
|
|
|
// errs() for errors and warnings.
|
|
|
|
|
// dbgs() for output within DEBUG().
|
|
|
|
|
cl::opt<unsigned>
|
|
|
|
|
Verbosity("v",
|
|
|
|
|
cl::desc("set verbosity level for diagnostic output"),
|
|
|
|
|
cl::init(0),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-07-11 18:51:13 -07:00
|
|
|
|
2017-05-24 14:14:16 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
AddBoltInfo("add-bolt-info",
|
2018-06-14 14:27:20 -07:00
|
|
|
cl::desc("add BOLT version and command line argument information to "
|
2017-05-24 14:14:16 -07:00
|
|
|
"processed binaries"),
|
|
|
|
|
cl::init(true),
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2017-09-01 18:13:51 -07:00
|
|
|
cl::opt<bool>
|
|
|
|
|
AggregateOnly("aggregate-only",
|
|
|
|
|
cl::desc("exit after writing aggregated data file"),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(AggregatorCategory));
|
|
|
|
|
|
2017-12-07 15:00:41 -08:00
|
|
|
cl::opt<bool>
|
|
|
|
|
DiffOnly("diff-only",
|
|
|
|
|
cl::desc("stop processing once we have enough to compare two binaries"),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltDiffCategory));
|
|
|
|
|
|
2017-11-27 18:00:24 -08:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
TimeRewrite("time-rewrite",
|
|
|
|
|
cl::desc("print time spent in rewriting passes"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
// Check against lists of functions from options if we should
|
|
|
|
|
// optimize the function with a given name.
|
2015-12-18 17:00:46 -08:00
|
|
|
bool shouldProcess(const BinaryFunction &Function) {
|
2017-10-20 12:11:34 -07:00
|
|
|
if (opts::MaxFunctions && Function.getFunctionNumber() >= opts::MaxFunctions) {
|
|
|
|
|
if (Function.getFunctionNumber() == opts::MaxFunctions)
|
|
|
|
|
dbgs() << "BOLT-INFO: processing ending on " << Function << "\n";
|
|
|
|
|
else
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2016-04-08 19:30:27 -07:00
|
|
|
auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile,
|
|
|
|
|
cl::list<std::string> &FunctionNames) {
|
|
|
|
|
assert(!FunctionNamesFile.empty() && "unexpected empty file name");
|
2016-01-26 16:03:58 -08:00
|
|
|
std::ifstream FuncsFile(FunctionNamesFile, std::ios::in);
|
|
|
|
|
std::string FuncName;
|
|
|
|
|
while (std::getline(FuncsFile, FuncName)) {
|
|
|
|
|
FunctionNames.push_back(FuncName);
|
|
|
|
|
}
|
|
|
|
|
FunctionNamesFile = "";
|
2016-04-08 19:30:27 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (!FunctionNamesFile.empty())
|
|
|
|
|
populateFunctionNames(FunctionNamesFile, FunctionNames);
|
|
|
|
|
|
|
|
|
|
if (!SkipFunctionNamesFile.empty())
|
|
|
|
|
populateFunctionNames(SkipFunctionNamesFile, SkipFunctionNames);
|
2016-01-26 16:03:58 -08:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
bool IsValid = true;
|
|
|
|
|
if (!FunctionNames.empty()) {
|
|
|
|
|
IsValid = false;
|
|
|
|
|
for (auto &Name : FunctionNames) {
|
2016-06-10 17:13:05 -07:00
|
|
|
if (Function.hasName(Name)) {
|
2015-11-23 17:54:18 -08:00
|
|
|
IsValid = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!IsValid)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (!SkipFunctionNames.empty()) {
|
|
|
|
|
for (auto &Name : SkipFunctionNames) {
|
2016-06-10 17:13:05 -07:00
|
|
|
if (Function.hasName(Name)) {
|
2015-11-23 17:54:18 -08:00
|
|
|
IsValid = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return IsValid;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
size_t padFunction(const BinaryFunction &Function) {
|
|
|
|
|
static std::map<std::string, size_t> FunctionPadding;
|
|
|
|
|
|
|
|
|
|
if (FunctionPadding.empty() && !FunctionPadSpec.empty()) {
|
|
|
|
|
for (auto &Spec : FunctionPadSpec) {
|
|
|
|
|
auto N = Spec.find(':');
|
|
|
|
|
if (N == std::string::npos)
|
|
|
|
|
continue;
|
|
|
|
|
auto Name = Spec.substr(0, N);
|
|
|
|
|
auto Padding = std::stoull(Spec.substr(N+1));
|
|
|
|
|
FunctionPadding[Name] = Padding;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto &FPI : FunctionPadding) {
|
|
|
|
|
auto Name = FPI.first;
|
|
|
|
|
auto Padding = FPI.second;
|
|
|
|
|
if (Function.hasName(Name)) {
|
|
|
|
|
return Padding;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
} // namespace opts
|
|
|
|
|
|
2018-03-09 09:45:13 -08:00
|
|
|
extern MCPlusBuilder * createX86MCPlusBuilder(const MCInstrAnalysis *,
|
|
|
|
|
const MCInstrInfo *,
|
|
|
|
|
const MCRegisterInfo *);
|
|
|
|
|
extern MCPlusBuilder * createAArch64MCPlusBuilder(const MCInstrAnalysis *,
|
|
|
|
|
const MCInstrInfo *,
|
|
|
|
|
const MCRegisterInfo *);
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
|
|
|
|
|
const MCInstrAnalysis *Analysis, const MCInstrInfo *Info,
|
|
|
|
|
const MCRegisterInfo *RegInfo) {
|
|
|
|
|
if (Arch == Triple::x86_64) {
|
|
|
|
|
return createX86MCPlusBuilder(Analysis, Info, RegInfo);
|
|
|
|
|
} else if (Arch == Triple::aarch64) {
|
|
|
|
|
return createAArch64MCPlusBuilder(Analysis, Info, RegInfo);
|
|
|
|
|
} else {
|
|
|
|
|
llvm_unreachable("architecture unsupport by MCPlusBuilder");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-16 09:27:34 -07:00
|
|
|
constexpr const char *RewriteInstance::SectionsToOverwrite[];
|
2016-07-22 20:52:57 -07:00
|
|
|
|
2017-02-07 12:20:46 -08:00
|
|
|
const std::string RewriteInstance::OrgSecPrefix = ".bolt.org";
|
|
|
|
|
|
2018-06-14 14:27:20 -07:00
|
|
|
const std::string RewriteInstance::BOLTSecPrefix = ".bolt";
|
2017-02-07 15:31:14 -08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const char RewriteInstance::TimerGroupName[] = "rewrite";
|
|
|
|
|
const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes";
|
2017-11-27 18:00:24 -08:00
|
|
|
|
2017-05-24 14:14:16 -07:00
|
|
|
namespace llvm {
|
|
|
|
|
namespace bolt {
|
|
|
|
|
extern const char *BoltRevision;
|
|
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
void report_error(StringRef Message, std::error_code EC) {
|
2015-11-23 17:54:18 -08:00
|
|
|
assert(EC);
|
2016-02-05 14:42:04 -08:00
|
|
|
errs() << "BOLT-ERROR: '" << Message << "': " << EC.message() << ".\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
void report_error(StringRef Message, Error E) {
|
|
|
|
|
assert(E);
|
|
|
|
|
errs() << "BOLT-ERROR: '" << Message << "': " << toString(std::move(E))
|
|
|
|
|
<< ".\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
void check_error(std::error_code EC, StringRef Message) {
|
2015-11-23 17:54:18 -08:00
|
|
|
if (!EC)
|
|
|
|
|
return;
|
|
|
|
|
report_error(Message, EC);
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
namespace {
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
std::string uniquifyName(BinaryContext &BC, std::string NamePrefix) {
|
|
|
|
|
unsigned LocalID = 1;
|
|
|
|
|
while (BC.getBinaryDataByName(NamePrefix + std::to_string(LocalID)))
|
|
|
|
|
++LocalID;
|
|
|
|
|
return NamePrefix + std::to_string(LocalID);
|
|
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
|
|
|
|
|
bool refersToReorderedSection(ErrorOr<BinarySection &> Section) {
|
|
|
|
|
auto Itr = std::find_if(opts::ReorderData.begin(),
|
|
|
|
|
opts::ReorderData.end(),
|
|
|
|
|
[&](const std::string &SectionName) {
|
|
|
|
|
return (Section &&
|
|
|
|
|
Section->getName() == SectionName);
|
|
|
|
|
});
|
|
|
|
|
return Itr != opts::ReorderData.end();
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
|
|
|
|
|
2015-12-18 17:00:46 -08:00
|
|
|
uint8_t *ExecutableFileMemoryManager::allocateSection(intptr_t Size,
|
|
|
|
|
unsigned Alignment,
|
|
|
|
|
unsigned SectionID,
|
|
|
|
|
StringRef SectionName,
|
|
|
|
|
bool IsCode,
|
|
|
|
|
bool IsReadOnly) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
// Register as note section (non-allocatable) if we recognize it as so
|
|
|
|
|
for (auto &OverwriteName : RewriteInstance::SectionsToOverwrite) {
|
|
|
|
|
if (SectionName == OverwriteName) {
|
|
|
|
|
uint8_t *DataCopy = new uint8_t[Size];
|
2018-02-01 16:33:43 -08:00
|
|
|
auto &Section = BC.registerOrUpdateNoteSection(SectionName,
|
|
|
|
|
DataCopy,
|
|
|
|
|
Size,
|
|
|
|
|
Alignment);
|
|
|
|
|
Section.setSectionID(SectionID);
|
|
|
|
|
assert(!Section.isAllocatable() && "note sections cannot be allocatable");
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
return DataCopy;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
uint8_t *Ret;
|
2015-12-18 17:00:46 -08:00
|
|
|
if (IsCode) {
|
2018-02-01 16:33:43 -08:00
|
|
|
Ret = SectionMemoryManager::allocateCodeSection(Size, Alignment,
|
2015-12-18 17:00:46 -08:00
|
|
|
SectionID, SectionName);
|
|
|
|
|
} else {
|
2018-02-01 16:33:43 -08:00
|
|
|
Ret = SectionMemoryManager::allocateDataSection(Size, Alignment,
|
2015-12-18 17:00:46 -08:00
|
|
|
SectionID, SectionName,
|
|
|
|
|
IsReadOnly);
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
const auto Flags = BinarySection::getFlags(IsReadOnly, IsCode, true);
|
|
|
|
|
auto &Section = BC.registerOrUpdateSection(SectionName,
|
|
|
|
|
ELF::SHT_PROGBITS,
|
|
|
|
|
Flags,
|
|
|
|
|
Ret,
|
|
|
|
|
Size,
|
|
|
|
|
Alignment);
|
|
|
|
|
Section.setSectionID(SectionID);
|
|
|
|
|
assert(Section.isAllocatable() &&
|
|
|
|
|
"verify that allocatable is marked as allocatable");
|
|
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "BOLT: allocating " << (Section.isLocal() ? "local " : "")
|
2016-09-14 16:45:40 -07:00
|
|
|
<< (IsCode ? "code" : (IsReadOnly ? "read-only data" : "data"))
|
2015-12-18 17:00:46 -08:00
|
|
|
<< " section : " << SectionName
|
|
|
|
|
<< " with size " << Size << ", alignment " << Alignment
|
2018-02-01 16:33:43 -08:00
|
|
|
<< " at 0x" << Ret << ", ID = " << SectionID << "\n");
|
|
|
|
|
|
|
|
|
|
return Ret;
|
2015-12-18 17:00:46 -08:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-05-16 17:02:17 -07:00
|
|
|
/// Notifier for non-allocatable (note) section.
|
2016-03-09 16:06:41 -08:00
|
|
|
uint8_t *ExecutableFileMemoryManager::recordNoteSection(
|
2016-03-03 10:13:11 -08:00
|
|
|
const uint8_t *Data,
|
|
|
|
|
uintptr_t Size,
|
|
|
|
|
unsigned Alignment,
|
|
|
|
|
unsigned SectionID,
|
|
|
|
|
StringRef SectionName) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT: note section "
|
|
|
|
|
<< SectionName
|
|
|
|
|
<< " with size " << Size << ", alignment " << Alignment
|
|
|
|
|
<< " at 0x"
|
|
|
|
|
<< Twine::utohexstr(reinterpret_cast<uint64_t>(Data)) << '\n');
|
2018-02-01 16:33:43 -08:00
|
|
|
auto &Section = BC.registerOrUpdateNoteSection(SectionName,
|
|
|
|
|
copyByteArray(Data, Size),
|
|
|
|
|
Size,
|
|
|
|
|
Alignment);
|
|
|
|
|
Section.setSectionID(SectionID);
|
|
|
|
|
assert(!Section.isAllocatable() && "note sections cannot be allocatable");
|
|
|
|
|
return Section.getOutputData();
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
|
|
|
|
|
2015-12-18 17:00:46 -08:00
|
|
|
bool ExecutableFileMemoryManager::finalizeMemory(std::string *ErrMsg) {
|
2016-02-05 14:42:04 -08:00
|
|
|
DEBUG(dbgs() << "BOLT: finalizeMemory()\n");
|
2015-12-18 17:00:46 -08:00
|
|
|
return SectionMemoryManager::finalizeMemory(ErrMsg);
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
ExecutableFileMemoryManager::~ExecutableFileMemoryManager() { }
|
2016-03-03 10:13:11 -08:00
|
|
|
|
2016-08-11 14:23:54 -07:00
|
|
|
namespace {
|
|
|
|
|
|
2018-01-24 05:42:11 -08:00
|
|
|
StringRef getSectionName(SectionRef Section) {
|
|
|
|
|
StringRef SectionName;
|
|
|
|
|
Section.getName(SectionName);
|
|
|
|
|
return SectionName;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
/// Create BinaryContext for a given architecture \p ArchName and
|
|
|
|
|
/// triple \p TripleName.
|
2017-07-25 09:11:42 -07:00
|
|
|
std::unique_ptr<BinaryContext>
|
2017-08-02 18:14:01 -07:00
|
|
|
createBinaryContext(ELFObjectFileBase *File, DataReader &DR,
|
2017-07-25 09:11:42 -07:00
|
|
|
std::unique_ptr<DWARFContext> DwCtx) {
|
|
|
|
|
std::string ArchName;
|
|
|
|
|
std::string TripleName;
|
|
|
|
|
llvm::Triple::ArchType Arch = (llvm::Triple::ArchType)File->getArch();
|
|
|
|
|
if (Arch == llvm::Triple::x86_64) {
|
|
|
|
|
ArchName = "x86-64";
|
|
|
|
|
TripleName = "x86_64-unknown-linux";
|
|
|
|
|
} else if (Arch == llvm::Triple::aarch64) {
|
|
|
|
|
ArchName = "aarch64";
|
|
|
|
|
TripleName = "aarch64-unknown-linux";
|
|
|
|
|
} else {
|
|
|
|
|
errs() << "BOLT-ERROR: Unrecognized machine in ELF file.\n";
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
std::string Error;
|
|
|
|
|
std::unique_ptr<Triple> TheTriple = llvm::make_unique<Triple>(TripleName);
|
|
|
|
|
const Target *TheTarget = TargetRegistry::lookupTarget(ArchName,
|
|
|
|
|
*TheTriple,
|
|
|
|
|
Error);
|
|
|
|
|
if (!TheTarget) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: " << Error;
|
2015-11-23 17:54:18 -08:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<const MCRegisterInfo> MRI(
|
|
|
|
|
TheTarget->createMCRegInfo(TripleName));
|
|
|
|
|
if (!MRI) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: no register info for target " << TripleName << "\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Set up disassembler.
|
|
|
|
|
std::unique_ptr<const MCAsmInfo> AsmInfo(
|
|
|
|
|
TheTarget->createMCAsmInfo(*MRI, TripleName));
|
|
|
|
|
if (!AsmInfo) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: no assembly info for target " << TripleName << "\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<const MCSubtargetInfo> STI(
|
|
|
|
|
TheTarget->createMCSubtargetInfo(TripleName, "", ""));
|
|
|
|
|
if (!STI) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: no subtarget info for target " << TripleName << "\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
|
|
|
|
|
if (!MII) {
|
2017-11-28 09:57:21 -08:00
|
|
|
errs() << "BOLT-ERROR: no instruction info for target " << TripleName
|
|
|
|
|
<< "\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<MCObjectFileInfo> MOFI =
|
|
|
|
|
llvm::make_unique<MCObjectFileInfo>();
|
|
|
|
|
std::unique_ptr<MCContext> Ctx =
|
|
|
|
|
llvm::make_unique<MCContext>(AsmInfo.get(), MRI.get(), MOFI.get());
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
MOFI->InitMCObjectFileInfo(*TheTriple, /*PIC=*/false, *Ctx);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
std::unique_ptr<MCDisassembler> DisAsm(
|
|
|
|
|
TheTarget->createMCDisassembler(*STI, *Ctx));
|
|
|
|
|
|
|
|
|
|
if (!DisAsm) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: no disassembler for target " << TripleName << "\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<const MCInstrAnalysis> MIA(
|
2018-03-09 09:45:13 -08:00
|
|
|
TheTarget->createMCInstrAnalysis(MII.get()));
|
2015-11-23 17:54:18 -08:00
|
|
|
if (!MIA) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: failed to create instruction analysis for target"
|
2015-11-23 17:54:18 -08:00
|
|
|
<< TripleName << "\n";
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-09 09:45:13 -08:00
|
|
|
|
[BOLT][Refactoring] Isolate changes to MC layer
Summary:
Changes that we made to MCInst, MCOperand, MCExpr, etc. are now all
moved into tools/llvm-bolt. That required a change to the way we handle
annotations and any extra operands for MCInst.
Any MCPlus information is now attached via an extra operand of type
MCInst with an opcode ANNOTATION_LABEL. Since this operand is MCInst, we
attach extra info as operands to this instruction. For first-level
annotations use functions to access the information, such as
getConditionalTailCall() or getEHInfo(), etc. For the rest, optional or
second-class annotations, use a general named-annotation interface such
as getAnnotationAs<uint64_t>(Inst, "Count").
I did a test on HHVM binary, and a memory consumption went down a little
bit while the runtime remained the same.
(cherry picked from FBD7405412)
2018-03-19 18:32:12 -07:00
|
|
|
std::unique_ptr<MCPlusBuilder> MIB(
|
2018-03-09 09:45:13 -08:00
|
|
|
createMCPlusBuilder(Arch, MIA.get(), MII.get(), MRI.get()));
|
|
|
|
|
if (!MIB) {
|
|
|
|
|
errs() << "BOLT-ERROR: failed to create instruction builder for target"
|
|
|
|
|
<< TripleName << "\n";
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
|
|
|
|
|
std::unique_ptr<MCInstPrinter> InstructionPrinter(
|
|
|
|
|
TheTarget->createMCInstPrinter(Triple(TripleName), AsmPrinterVariant,
|
|
|
|
|
*AsmInfo, *MII, *MRI));
|
|
|
|
|
if (!InstructionPrinter) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: no instruction printer for target " << TripleName
|
2015-11-23 17:54:18 -08:00
|
|
|
<< '\n';
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
InstructionPrinter->setPrintImmHex(true);
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<MCCodeEmitter> MCE(
|
|
|
|
|
TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx));
|
|
|
|
|
|
|
|
|
|
// Make sure we don't miss any output on core dumps.
|
|
|
|
|
outs().SetUnbuffered();
|
|
|
|
|
errs().SetUnbuffered();
|
|
|
|
|
dbgs().SetUnbuffered();
|
|
|
|
|
|
|
|
|
|
auto BC =
|
|
|
|
|
llvm::make_unique<BinaryContext>(std::move(Ctx),
|
2016-02-25 16:57:07 -08:00
|
|
|
std::move(DwCtx),
|
2015-11-23 17:54:18 -08:00
|
|
|
std::move(TheTriple),
|
|
|
|
|
TheTarget,
|
|
|
|
|
TripleName,
|
|
|
|
|
std::move(MCE),
|
|
|
|
|
std::move(MOFI),
|
|
|
|
|
std::move(AsmInfo),
|
|
|
|
|
std::move(MII),
|
|
|
|
|
std::move(STI),
|
|
|
|
|
std::move(InstructionPrinter),
|
|
|
|
|
std::move(MIA),
|
2018-03-09 09:45:13 -08:00
|
|
|
std::move(MIB),
|
2015-11-23 17:54:18 -08:00
|
|
|
std::move(MRI),
|
|
|
|
|
std::move(DisAsm),
|
2016-03-14 18:48:05 -07:00
|
|
|
DR);
|
2016-03-02 18:40:10 -08:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
return BC;
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-11 14:23:54 -07:00
|
|
|
} // namespace
|
|
|
|
|
|
2017-08-02 18:14:01 -07:00
|
|
|
RewriteInstance::RewriteInstance(ELFObjectFileBase *File, DataReader &DR,
|
2017-09-01 18:13:51 -07:00
|
|
|
DataAggregator &DA, const int Argc,
|
|
|
|
|
const char *const *Argv)
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
: InputFile(File), Argc(Argc), Argv(Argv), DA(DA),
|
|
|
|
|
BC(createBinaryContext(
|
|
|
|
|
File, DR,
|
|
|
|
|
DWARFContext::create(*File, nullptr,
|
|
|
|
|
DWARFContext::defaultErrorHandler, "", false))),
|
|
|
|
|
SHStrTab(StringTableBuilder::ELF) {}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
RewriteInstance::~RewriteInstance() {}
|
|
|
|
|
|
2015-11-24 09:29:41 -08:00
|
|
|
void RewriteInstance::reset() {
|
|
|
|
|
BinaryFunctions.clear();
|
|
|
|
|
FileSymRefs.clear();
|
|
|
|
|
auto &DR = BC->DR;
|
2017-07-25 09:11:42 -07:00
|
|
|
BC = createBinaryContext(
|
|
|
|
|
InputFile, DR,
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
DWARFContext::create(*InputFile, nullptr,
|
|
|
|
|
DWARFContext::defaultErrorHandler, "", false));
|
2015-11-24 09:29:41 -08:00
|
|
|
CFIRdWrt.reset(nullptr);
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
OLT.reset(nullptr);
|
|
|
|
|
EFMM.reset();
|
2015-11-24 09:29:41 -08:00
|
|
|
Out.reset(nullptr);
|
|
|
|
|
EHFrame = nullptr;
|
|
|
|
|
FailedAddresses.clear();
|
2016-04-05 19:35:45 -07:00
|
|
|
RangesSectionsWriter.reset();
|
2017-05-16 09:27:34 -07:00
|
|
|
LocationListWriter.reset();
|
2017-09-01 18:13:51 -07:00
|
|
|
}
|
|
|
|
|
|
2016-02-08 10:02:48 -08:00
|
|
|
void RewriteInstance::discoverStorage() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName,
|
|
|
|
|
TimerGroupDesc, opts::TimeRewrite);
|
2017-11-14 16:51:24 -08:00
|
|
|
|
2017-09-20 10:43:01 -07:00
|
|
|
// Stubs are harmful because RuntimeDyld may try to increase the size of
|
|
|
|
|
// sections accounting for stubs when we need those sections to match the
|
|
|
|
|
// same size seen in the input binary, in case this section is a copy
|
|
|
|
|
// of the original one seen in the binary.
|
2018-02-01 16:33:43 -08:00
|
|
|
EFMM.reset(new ExecutableFileMemoryManager(*BC, /*AllowStubs*/ false));
|
2017-01-17 15:49:59 -08:00
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
|
2016-02-08 10:02:48 -08:00
|
|
|
if (!ELF64LEFile) {
|
|
|
|
|
errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
auto Obj = ELF64LEFile->getELFFile();
|
2018-06-29 21:12:55 -07:00
|
|
|
if (Obj->getHeader()->e_type != ELF::ET_EXEC) {
|
|
|
|
|
errs() << "BOLT-ERROR: only non-PIE ELF executables are supported at the "
|
|
|
|
|
"moment.\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
EntryPoint = Obj->getHeader()->e_entry;
|
|
|
|
|
|
2016-02-08 10:02:48 -08:00
|
|
|
// This is where the first segment and ELF header were allocated.
|
|
|
|
|
uint64_t FirstAllocAddress = std::numeric_limits<uint64_t>::max();
|
|
|
|
|
|
|
|
|
|
NextAvailableAddress = 0;
|
2016-02-12 19:01:53 -08:00
|
|
|
uint64_t NextAvailableOffset = 0;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto PHs = cantFail(Obj->program_headers(), "program_headers() failed");
|
|
|
|
|
for (const auto &Phdr : PHs) {
|
2016-02-08 10:02:48 -08:00
|
|
|
if (Phdr.p_type == ELF::PT_LOAD) {
|
|
|
|
|
FirstAllocAddress = std::min(FirstAllocAddress,
|
|
|
|
|
static_cast<uint64_t>(Phdr.p_vaddr));
|
|
|
|
|
NextAvailableAddress = std::max(NextAvailableAddress,
|
|
|
|
|
Phdr.p_vaddr + Phdr.p_memsz);
|
2016-02-12 19:01:53 -08:00
|
|
|
NextAvailableOffset = std::max(NextAvailableOffset,
|
|
|
|
|
Phdr.p_offset + Phdr.p_filesz);
|
2017-01-17 15:49:59 -08:00
|
|
|
|
|
|
|
|
EFMM->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr,
|
|
|
|
|
Phdr.p_memsz,
|
|
|
|
|
Phdr.p_offset,
|
|
|
|
|
Phdr.p_filesz};
|
2016-02-08 10:02:48 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
for (const auto &Section : InputFile->sections()) {
|
|
|
|
|
StringRef SectionName;
|
|
|
|
|
Section.getName(SectionName);
|
|
|
|
|
StringRef SectionContents;
|
|
|
|
|
Section.getContents(SectionContents);
|
|
|
|
|
if (SectionName == ".text") {
|
2017-09-20 10:43:01 -07:00
|
|
|
BC->OldTextSectionAddress = Section.getAddress();
|
|
|
|
|
BC->OldTextSectionSize = Section.getSize();
|
|
|
|
|
BC->OldTextSectionOffset =
|
2016-09-27 19:09:38 -07:00
|
|
|
SectionContents.data() - InputFile->getData().data();
|
2017-02-07 15:31:14 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (SectionName.startswith(OrgSecPrefix) ||
|
2018-06-14 14:27:20 -07:00
|
|
|
SectionName.startswith(BOLTSecPrefix)) {
|
2017-02-07 15:31:14 -08:00
|
|
|
errs() << "BOLT-ERROR: input file was processed by BOLT. "
|
|
|
|
|
"Cannot re-optimize.\n";
|
|
|
|
|
exit(1);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
assert(NextAvailableAddress && NextAvailableOffset &&
|
|
|
|
|
"no PT_LOAD pheader seen");
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "BOLT-INFO: first alloc address is 0x"
|
2016-02-08 10:02:48 -08:00
|
|
|
<< Twine::utohexstr(FirstAllocAddress) << '\n';
|
|
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
FirstNonAllocatableOffset = NextAvailableOffset;
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NextAvailableAddress = alignTo(NextAvailableAddress, PageAlign);
|
|
|
|
|
NextAvailableOffset = alignTo(NextAvailableOffset, PageAlign);
|
2016-02-12 19:01:53 -08:00
|
|
|
|
|
|
|
|
if (!opts::UseGnuStack) {
|
|
|
|
|
// This is where the black magic happens. Creating PHDR table in a segment
|
|
|
|
|
// other than that containing ELF header is tricky. Some loaders and/or
|
|
|
|
|
// parts of loaders will apply e_phoff from ELF header assuming both are in
|
|
|
|
|
// the same segment, while others will do the proper calculation.
|
|
|
|
|
// We create the new PHDR table in such a way that both of the methods
|
|
|
|
|
// of loading and locating the table work. There's a slight file size
|
|
|
|
|
// overhead because of that.
|
2016-03-03 10:13:11 -08:00
|
|
|
//
|
|
|
|
|
// NB: bfd's strip command cannot do the above and will corrupt the
|
|
|
|
|
// binary during the process of stripping non-allocatable sections.
|
2016-02-12 19:01:53 -08:00
|
|
|
if (NextAvailableOffset <= NextAvailableAddress - FirstAllocAddress) {
|
|
|
|
|
NextAvailableOffset = NextAvailableAddress - FirstAllocAddress;
|
|
|
|
|
} else {
|
|
|
|
|
NextAvailableAddress = NextAvailableOffset + FirstAllocAddress;
|
|
|
|
|
}
|
|
|
|
|
assert(NextAvailableOffset == NextAvailableAddress - FirstAllocAddress &&
|
|
|
|
|
"PHDR table address calculation error");
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "BOLT-INFO: creating new program header table at address 0x"
|
2016-02-12 19:01:53 -08:00
|
|
|
<< Twine::utohexstr(NextAvailableAddress) << ", offset 0x"
|
|
|
|
|
<< Twine::utohexstr(NextAvailableOffset) << '\n';
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
PHDRTableAddress = NextAvailableAddress;
|
|
|
|
|
PHDRTableOffset = NextAvailableOffset;
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
// Reserve space for 3 extra pheaders.
|
|
|
|
|
unsigned Phnum = Obj->getHeader()->e_phnum;
|
|
|
|
|
Phnum += 3;
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
NextAvailableAddress += Phnum * sizeof(ELFFile<ELF64LE>::Elf_Phdr);
|
|
|
|
|
NextAvailableOffset += Phnum * sizeof(ELFFile<ELF64LE>::Elf_Phdr);
|
|
|
|
|
}
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
// Align at cache line.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NextAvailableAddress = alignTo(NextAvailableAddress, 64);
|
|
|
|
|
NextAvailableOffset = alignTo(NextAvailableOffset, 64);
|
2016-02-08 10:02:48 -08:00
|
|
|
|
|
|
|
|
NewTextSegmentAddress = NextAvailableAddress;
|
|
|
|
|
NewTextSegmentOffset = NextAvailableOffset;
|
2017-08-31 11:45:37 -07:00
|
|
|
BC->LayoutStartAddress = NextAvailableAddress;
|
2016-02-08 10:02:48 -08:00
|
|
|
}
|
|
|
|
|
|
2017-10-06 14:42:46 -07:00
|
|
|
Optional<std::string>
|
2018-04-09 19:10:19 -07:00
|
|
|
RewriteInstance::getBuildID() const {
|
2017-10-06 14:42:46 -07:00
|
|
|
for (auto &Section : InputFile->sections()) {
|
|
|
|
|
StringRef SectionName;
|
|
|
|
|
Section.getName(SectionName);
|
|
|
|
|
|
|
|
|
|
if (SectionName != ".note.gnu.build-id")
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
StringRef SectionContents;
|
|
|
|
|
Section.getContents(SectionContents);
|
|
|
|
|
|
|
|
|
|
// Reading notes section (see Portable Formats Specification, Version 1.1,
|
|
|
|
|
// pg 2-5, section "Note Section").
|
|
|
|
|
DataExtractor DE = DataExtractor(SectionContents, true, 8);
|
|
|
|
|
uint32_t Offset = 0;
|
|
|
|
|
if (!DE.isValidOffset(Offset))
|
|
|
|
|
return NoneType();
|
|
|
|
|
uint32_t NameSz = DE.getU32(&Offset);
|
|
|
|
|
if (!DE.isValidOffset(Offset))
|
|
|
|
|
return NoneType();
|
|
|
|
|
uint32_t DescSz = DE.getU32(&Offset);
|
|
|
|
|
if (!DE.isValidOffset(Offset))
|
|
|
|
|
return NoneType();
|
|
|
|
|
uint32_t Type = DE.getU32(&Offset);
|
|
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz
|
|
|
|
|
<< "; Type = " << Type << "\n");
|
|
|
|
|
|
|
|
|
|
// Type 3 is a GNU build-id note section
|
|
|
|
|
if (Type != 3)
|
|
|
|
|
return NoneType();
|
|
|
|
|
|
|
|
|
|
StringRef Name = SectionContents.slice(Offset, Offset + NameSz);
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Offset = alignTo(Offset + NameSz, 4);
|
2017-10-06 14:42:46 -07:00
|
|
|
StringRef BinaryBuildID = SectionContents.slice(Offset, Offset + DescSz);
|
|
|
|
|
if (Name.substr(0, 3) != "GNU")
|
|
|
|
|
return NoneType();
|
|
|
|
|
|
|
|
|
|
std::string Str;
|
|
|
|
|
raw_string_ostream OS(Str);
|
|
|
|
|
auto CharIter = BinaryBuildID.bytes_begin();
|
|
|
|
|
while (CharIter != BinaryBuildID.bytes_end()) {
|
|
|
|
|
if (*CharIter < 0x10)
|
|
|
|
|
OS << "0";
|
|
|
|
|
OS << Twine::utohexstr(*CharIter);
|
|
|
|
|
++CharIter;
|
|
|
|
|
}
|
2018-05-16 13:31:13 -07:00
|
|
|
outs() << "BOLT-INFO: binary build-id is: " << OS.str() << "\n";
|
2017-10-06 14:42:46 -07:00
|
|
|
return OS.str();
|
|
|
|
|
}
|
|
|
|
|
return NoneType();
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
void RewriteInstance::run() {
|
|
|
|
|
if (!BC) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: failed to create a binary context\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-17 11:22:22 -07:00
|
|
|
auto executeRewritePass = [&](const std::set<uint64_t> &NonSimpleFunctions) {
|
|
|
|
|
discoverStorage();
|
|
|
|
|
readSpecialSections();
|
2018-04-13 15:46:19 -07:00
|
|
|
adjustCommandLineOptions();
|
2017-07-17 11:22:22 -07:00
|
|
|
discoverFileObjects();
|
|
|
|
|
readDebugInfo();
|
|
|
|
|
disassembleFunctions();
|
2017-12-13 23:12:01 -08:00
|
|
|
processProfileData();
|
2017-11-28 09:57:21 -08:00
|
|
|
if (opts::AggregateOnly)
|
|
|
|
|
return;
|
|
|
|
|
postProcessFunctions();
|
2017-07-17 11:22:22 -07:00
|
|
|
for (uint64_t Address : NonSimpleFunctions) {
|
|
|
|
|
auto FI = BinaryFunctions.find(Address);
|
|
|
|
|
assert(FI != BinaryFunctions.end() && "bad non-simple function address");
|
|
|
|
|
FI->second.setSimple(false);
|
|
|
|
|
}
|
2017-12-07 15:00:41 -08:00
|
|
|
if (opts::DiffOnly)
|
|
|
|
|
return;
|
2017-07-17 11:22:22 -07:00
|
|
|
runOptimizationPasses();
|
|
|
|
|
emitFunctions();
|
|
|
|
|
};
|
2016-04-11 17:46:18 -07:00
|
|
|
|
2017-07-25 09:11:42 -07:00
|
|
|
outs() << "BOLT-INFO: Target architecture: "
|
|
|
|
|
<< Triple::getArchTypeName(
|
|
|
|
|
(llvm::Triple::ArchType)InputFile->getArch())
|
|
|
|
|
<< "\n";
|
|
|
|
|
|
2018-05-16 13:31:13 -07:00
|
|
|
if (DA.started()) {
|
|
|
|
|
if (auto FileBuildID = getBuildID()) {
|
|
|
|
|
DA.processFileBuildID(*FileBuildID);
|
|
|
|
|
} else {
|
|
|
|
|
errs() << "BOLT-WARNING: build-id will not be checked because we could "
|
|
|
|
|
"not read one from input binary\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-17 11:22:22 -07:00
|
|
|
unsigned PassNumber = 1;
|
|
|
|
|
executeRewritePass({});
|
2017-12-07 15:00:41 -08:00
|
|
|
if (opts::AggregateOnly || opts::DiffOnly)
|
2017-09-01 18:13:51 -07:00
|
|
|
return;
|
2016-03-31 16:38:49 -07:00
|
|
|
|
|
|
|
|
if (opts::SplitFunctions == BinaryFunction::ST_LARGE &&
|
2016-04-11 17:46:18 -07:00
|
|
|
checkLargeFunctions()) {
|
|
|
|
|
++PassNumber;
|
2016-03-31 16:38:49 -07:00
|
|
|
// Emit again because now some functions have been split
|
2018-06-14 14:27:20 -07:00
|
|
|
outs() << "BOLT: split-functions: starting pass " << PassNumber << "...\n";
|
2016-03-31 16:38:49 -07:00
|
|
|
reset();
|
2017-07-17 11:22:22 -07:00
|
|
|
executeRewritePass({});
|
2016-03-31 16:38:49 -07:00
|
|
|
}
|
|
|
|
|
|
2016-04-11 17:46:18 -07:00
|
|
|
// Emit functions again ignoring functions which still didn't fit in their
|
|
|
|
|
// original space, so that we don't generate incorrect debugging information
|
|
|
|
|
// for them (information that would reflect the optimized version).
|
|
|
|
|
if (opts::UpdateDebugSections && opts::FixDebugInfoLargeFunctions &&
|
|
|
|
|
checkLargeFunctions()) {
|
|
|
|
|
++PassNumber;
|
2018-05-17 18:27:13 -07:00
|
|
|
outs() << format("BOLT: starting pass %zu (ignoring %zu large functions) ",
|
|
|
|
|
PassNumber, LargeFunctions.size())
|
|
|
|
|
<< "...\n";
|
2016-04-11 17:46:18 -07:00
|
|
|
reset();
|
2017-07-17 11:22:22 -07:00
|
|
|
executeRewritePass(LargeFunctions);
|
2016-04-11 17:46:18 -07:00
|
|
|
}
|
|
|
|
|
|
2016-05-31 19:12:26 -07:00
|
|
|
if (opts::UpdateDebugSections)
|
|
|
|
|
updateDebugInfo();
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2017-05-24 14:14:16 -07:00
|
|
|
addBoltInfoSection();
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Copy allocatable part of the input.
|
2015-11-23 17:54:18 -08:00
|
|
|
std::error_code EC;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Out = llvm::make_unique<ToolOutputFile>(opts::OutputFilename, EC,
|
|
|
|
|
sys::fs::F_None, 0777);
|
2015-11-23 17:54:18 -08:00
|
|
|
check_error(EC, "cannot create output executable file");
|
2016-03-03 10:13:11 -08:00
|
|
|
Out->os() << InputFile->getData().substr(0, FirstNonAllocatableOffset);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Rewrite allocatable contents and copy non-allocatable parts with mods.
|
2015-11-23 17:54:18 -08:00
|
|
|
rewriteFile();
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-11 11:09:34 -08:00
|
|
|
void RewriteInstance::discoverFileObjects() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("discoverFileObjects", "discover file objects",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2017-11-27 18:00:24 -08:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
FileSymRefs.clear();
|
|
|
|
|
BinaryFunctions.clear();
|
2017-11-14 20:05:11 -08:00
|
|
|
BC->clearBinaryData();
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
// For local symbols we want to keep track of associated FILE symbol name for
|
|
|
|
|
// disambiguation by combined name.
|
|
|
|
|
StringRef FileSymbolName;
|
|
|
|
|
bool SeenFileName = false;
|
|
|
|
|
struct SymbolRefHash {
|
|
|
|
|
std::size_t operator()(SymbolRef const &S) const {
|
|
|
|
|
return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName;
|
|
|
|
|
for (const auto &Symbol : InputFile->symbols()) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto NameOrError = Symbol.getName();
|
2017-02-07 15:56:00 -08:00
|
|
|
if (NameOrError && NameOrError->startswith("__asan_init")) {
|
|
|
|
|
errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer "
|
|
|
|
|
"support. Cannot optimize.\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
2017-03-31 07:51:30 -07:00
|
|
|
if (NameOrError && NameOrError->startswith("__llvm_coverage_mapping")) {
|
|
|
|
|
errs() << "BOLT-ERROR: input file was compiled or linked with coverage "
|
|
|
|
|
"support. Cannot optimize.\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
2017-02-07 15:56:00 -08:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
if (Symbol.getFlags() & SymbolRef::SF_Undefined)
|
|
|
|
|
continue;
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
|
|
|
|
|
auto Name =
|
|
|
|
|
cantFail(std::move(NameOrError), "cannot get symbol name for file");
|
2017-09-25 18:05:37 -07:00
|
|
|
// Ignore Clang LTO artificial FILE symbol as it is not always generated,
|
|
|
|
|
// and this uncertainty is causing havoc in function name matching.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (Name == "ld-temp.o")
|
2017-09-25 18:05:37 -07:00
|
|
|
continue;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
FileSymbolName = Name;
|
2016-07-11 18:51:13 -07:00
|
|
|
SeenFileName = true;
|
2015-11-23 17:54:18 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
2016-09-29 11:19:06 -07:00
|
|
|
if (!FileSymbolName.empty() &&
|
|
|
|
|
!(Symbol.getFlags() & SymbolRef::SF_Global)) {
|
|
|
|
|
SymbolToFileName[Symbol] = FileSymbolName;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sort symbols in the file by value.
|
|
|
|
|
std::vector<SymbolRef> SortedFileSymbols(InputFile->symbol_begin(),
|
|
|
|
|
InputFile->symbol_end());
|
|
|
|
|
std::stable_sort(SortedFileSymbols.begin(), SortedFileSymbols.end(),
|
|
|
|
|
[](const SymbolRef &A, const SymbolRef &B) {
|
2016-09-27 19:09:38 -07:00
|
|
|
// FUNC symbols have higher precedence.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto AddressA = cantFail(A.getAddress());
|
|
|
|
|
auto AddressB = cantFail(B.getAddress());
|
|
|
|
|
if (AddressA == AddressB) {
|
|
|
|
|
return cantFail(A.getType()) == SymbolRef::ST_Function &&
|
|
|
|
|
cantFail(B.getType()) != SymbolRef::ST_Function;
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
return AddressA < AddressB;
|
2016-09-29 11:19:06 -07:00
|
|
|
});
|
|
|
|
|
|
2017-11-22 16:17:36 -08:00
|
|
|
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
|
|
|
|
// code section (see IHI0056B). $d identifies data contents.
|
|
|
|
|
auto MarkersBegin = SortedFileSymbols.end();
|
2018-03-20 14:34:58 -07:00
|
|
|
if (BC->isAArch64()) {
|
2017-11-22 16:17:36 -08:00
|
|
|
MarkersBegin = std::stable_partition(
|
|
|
|
|
SortedFileSymbols.begin(), SortedFileSymbols.end(),
|
|
|
|
|
[](const SymbolRef &Symbol) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
StringRef Name = cantFail(Symbol.getName());
|
|
|
|
|
return !(cantFail(Symbol.getType()) == SymbolRef::ST_Unknown &&
|
|
|
|
|
(Name == "$d" || Name == "$x"));
|
2017-11-22 16:17:36 -08:00
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
auto getNextAddress = [&](std::vector<SymbolRef>::const_iterator Itr) {
|
|
|
|
|
auto Section = cantFail(Itr->getSection());
|
|
|
|
|
const auto SymbolEndAddress =
|
|
|
|
|
(cantFail(Itr->getAddress()) + ELFSymbolRef(*Itr).getSize());
|
|
|
|
|
|
|
|
|
|
// absolute sym
|
|
|
|
|
if (Section == InputFile->section_end())
|
|
|
|
|
return SymbolEndAddress;
|
|
|
|
|
|
|
|
|
|
while (Itr != MarkersBegin - 1 &&
|
|
|
|
|
cantFail(std::next(Itr)->getSection()) == Section &&
|
|
|
|
|
cantFail(std::next(Itr)->getAddress()) ==
|
|
|
|
|
cantFail(Itr->getAddress())) {
|
|
|
|
|
++Itr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Itr != MarkersBegin - 1 &&
|
|
|
|
|
cantFail(std::next(Itr)->getSection()) == Section)
|
|
|
|
|
return cantFail(std::next(Itr)->getAddress());
|
|
|
|
|
|
|
|
|
|
const auto SectionEndAddress = Section->getAddress() + Section->getSize();
|
|
|
|
|
if ((ELFSectionRef(*Section).getFlags() & ELF::SHF_TLS) ||
|
|
|
|
|
SymbolEndAddress > SectionEndAddress)
|
|
|
|
|
return SymbolEndAddress;
|
|
|
|
|
|
|
|
|
|
return SectionEndAddress;
|
|
|
|
|
};
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
BinaryFunction *PreviousFunction = nullptr;
|
2017-11-14 20:05:11 -08:00
|
|
|
unsigned AnonymousId = 0;
|
|
|
|
|
|
2017-11-22 16:17:36 -08:00
|
|
|
for (auto ISym = SortedFileSymbols.begin(); ISym != MarkersBegin; ++ISym) {
|
|
|
|
|
const auto &Symbol = *ISym;
|
2016-09-29 11:19:06 -07:00
|
|
|
// Keep undefined symbols for pretty printing?
|
|
|
|
|
if (Symbol.getFlags() & SymbolRef::SF_Undefined)
|
|
|
|
|
continue;
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (cantFail(Symbol.getType()) == SymbolRef::ST_File)
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name");
|
|
|
|
|
uint64_t Address =
|
|
|
|
|
cantFail(Symbol.getAddress(), "cannot get symbol address");
|
2015-11-23 17:54:18 -08:00
|
|
|
if (Address == 0) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (opts::Verbosity >= 1 &&
|
|
|
|
|
cantFail(Symbol.getType()) == SymbolRef::ST_Function)
|
2016-02-05 14:42:04 -08:00
|
|
|
errs() << "BOLT-WARNING: function with 0 address seen\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-22 16:17:36 -08:00
|
|
|
FileSymRefs[Address] = Symbol;
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-07-11 18:51:13 -07:00
|
|
|
/// It is possible we are seeing a globalized local. LLVM might treat it as
|
|
|
|
|
/// a local if it has a "private global" prefix, e.g. ".L". Thus we have to
|
|
|
|
|
/// change the prefix to enforce global scope of the symbol.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
std::string Name = SymName.startswith(BC->AsmInfo->getPrivateGlobalPrefix())
|
|
|
|
|
? "PG" + std::string(SymName)
|
|
|
|
|
: std::string(SymName);
|
2016-07-11 18:51:13 -07:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
// Disambiguate all local symbols before adding to symbol table.
|
2016-07-11 18:51:13 -07:00
|
|
|
// Since we don't know if we will see a global with the same name,
|
2015-11-23 17:54:18 -08:00
|
|
|
// always modify the local name.
|
2016-07-11 18:51:13 -07:00
|
|
|
//
|
|
|
|
|
// NOTE: the naming convention for local symbols should match
|
|
|
|
|
// the one we use for profile data.
|
2015-11-23 17:54:18 -08:00
|
|
|
std::string UniqueName;
|
2016-07-11 18:51:13 -07:00
|
|
|
std::string AlternativeName;
|
2017-11-14 20:05:11 -08:00
|
|
|
if (Name.empty()) {
|
|
|
|
|
if (PLTSection && PLTSection->getAddress() == Address) {
|
2018-06-14 14:27:20 -07:00
|
|
|
// Don't register BOLT_PLT_PSEUDO twice.
|
2017-11-14 20:05:11 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++);
|
|
|
|
|
} else if (Symbol.getFlags() & SymbolRef::SF_Global) {
|
|
|
|
|
assert(!BC->getBinaryDataByName(Name) && "global name not unique");
|
2016-07-11 18:51:13 -07:00
|
|
|
UniqueName = Name;
|
2015-11-23 17:54:18 -08:00
|
|
|
} else {
|
2016-07-11 18:51:13 -07:00
|
|
|
// If we have a local file name, we should create 2 variants for the
|
|
|
|
|
// function name. The reason is that perf profile might have been
|
|
|
|
|
// collected on a binary that did not have the local file name (e.g. as
|
|
|
|
|
// a side effect of stripping debug info from the binary):
|
|
|
|
|
//
|
|
|
|
|
// primary: <function>/<id>
|
|
|
|
|
// alternative: <function>/<file>/<id2>
|
|
|
|
|
//
|
|
|
|
|
// The <id> field is used for disambiguation of local symbols since there
|
|
|
|
|
// could be identical function names coming from identical file names
|
|
|
|
|
// (e.g. from different directories).
|
|
|
|
|
std::string Prefix = Name + "/";
|
|
|
|
|
std::string AltPrefix;
|
2016-09-29 11:19:06 -07:00
|
|
|
auto SFI = SymbolToFileName.find(Symbol);
|
|
|
|
|
if (SFI != SymbolToFileName.end()) {
|
|
|
|
|
AltPrefix = Prefix + std::string(SFI->second) + "/";
|
|
|
|
|
}
|
2016-07-11 18:51:13 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
UniqueName = uniquifyName(*BC, Prefix);
|
2016-07-11 18:51:13 -07:00
|
|
|
if (!AltPrefix.empty())
|
2017-11-14 20:05:11 -08:00
|
|
|
AlternativeName = uniquifyName(*BC, AltPrefix);
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
|
|
|
|
|
uint64_t NextAddress = getNextAddress(ISym);
|
|
|
|
|
uint64_t TentativeSize = !SymbolSize ? NextAddress - Address : SymbolSize;
|
|
|
|
|
uint64_t SymbolAlignment = Symbol.getAlignment();
|
2018-04-20 20:03:31 -07:00
|
|
|
unsigned SymbolFlags = Symbol.getFlags();
|
2017-11-14 20:05:11 -08:00
|
|
|
|
|
|
|
|
auto registerName = [&](uint64_t FinalSize) {
|
|
|
|
|
// Register names even if it's not a function, e.g. for an entry point.
|
2018-04-20 20:03:31 -07:00
|
|
|
BC->registerNameAtAddress(UniqueName, Address, FinalSize,
|
|
|
|
|
SymbolAlignment, SymbolFlags);
|
2017-11-14 20:05:11 -08:00
|
|
|
if (!AlternativeName.empty())
|
|
|
|
|
BC->registerNameAtAddress(AlternativeName, Address, FinalSize,
|
2018-04-20 20:03:31 -07:00
|
|
|
SymbolAlignment, SymbolFlags);
|
2017-11-14 20:05:11 -08:00
|
|
|
};
|
2015-11-23 17:54:18 -08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
section_iterator Section =
|
|
|
|
|
cantFail(Symbol.getSection(), "cannot get symbol section");
|
2016-03-03 10:13:11 -08:00
|
|
|
if (Section == InputFile->section_end()) {
|
2015-11-23 17:54:18 -08:00
|
|
|
// Could be an absolute symbol. Could record for pretty printing.
|
2017-11-14 20:05:11 -08:00
|
|
|
DEBUG(if (opts::Verbosity > 1) {
|
|
|
|
|
dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n";
|
|
|
|
|
});
|
|
|
|
|
registerName(TentativeSize);
|
2015-11-23 17:54:18 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName
|
|
|
|
|
<< " for function\n");
|
|
|
|
|
|
|
|
|
|
if (!Section->isText()) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
assert(cantFail(Symbol.getType()) != SymbolRef::ST_Function &&
|
2016-09-29 11:19:06 -07:00
|
|
|
"unexpected function inside non-code section");
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n");
|
2017-11-14 20:05:11 -08:00
|
|
|
registerName(TentativeSize);
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Assembly functions could be ST_NONE with 0 size. Check that the
|
|
|
|
|
// corresponding section is a code section and they are not inside any
|
|
|
|
|
// other known function to consider them.
|
|
|
|
|
//
|
|
|
|
|
// Sometimes assembly functions are not marked as functions and neither are
|
|
|
|
|
// their local labels. The only way to tell them apart is to look at
|
|
|
|
|
// symbol scope - global vs local.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (cantFail(Symbol.getType()) != SymbolRef::ST_Function) {
|
2016-09-29 11:19:06 -07:00
|
|
|
if (PreviousFunction) {
|
|
|
|
|
if (PreviousFunction->getSize() == 0) {
|
|
|
|
|
if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
|
2017-11-14 20:05:11 -08:00
|
|
|
registerName(SymbolSize);
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
} else if (PreviousFunction->containsAddress(Address)) {
|
|
|
|
|
if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
|
2017-11-14 20:05:11 -08:00
|
|
|
registerName(SymbolSize);
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
} else {
|
|
|
|
|
if (Address == PreviousFunction->getAddress() && SymbolSize == 0) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n");
|
2017-11-14 20:05:11 -08:00
|
|
|
registerName(SymbolSize);
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (opts::Verbosity > 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: symbol " << UniqueName
|
|
|
|
|
<< " seen in the middle of function "
|
|
|
|
|
<< *PreviousFunction << ". Could be a new entry.\n";
|
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
registerName(SymbolSize);
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (PreviousFunction &&
|
|
|
|
|
PreviousFunction->containsAddress(Address) &&
|
2016-09-27 19:09:38 -07:00
|
|
|
PreviousFunction->getAddress() != Address) {
|
|
|
|
|
if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
outs() << "BOLT-DEBUG: possibly another entry for function "
|
|
|
|
|
<< *PreviousFunction << " : " << UniqueName << '\n';
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
outs() << "BOLT-INFO: using " << UniqueName << " as another entry to "
|
|
|
|
|
<< "function " << *PreviousFunction << '\n';
|
|
|
|
|
|
|
|
|
|
PreviousFunction->
|
|
|
|
|
addEntryPointAtOffset(Address - PreviousFunction->getAddress());
|
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!BC->HasRelocations)
|
2016-09-27 19:09:38 -07:00
|
|
|
PreviousFunction->setSimple(false);
|
|
|
|
|
|
|
|
|
|
// Remove the symbol from FileSymRefs so that we can skip it from
|
|
|
|
|
// in the future.
|
|
|
|
|
auto SI = FileSymRefs.find(Address);
|
|
|
|
|
assert(SI != FileSymRefs.end() && "symbol expected to be present");
|
|
|
|
|
assert(SI->second == Symbol && "wrong symbol found");
|
|
|
|
|
FileSymRefs.erase(SI);
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
registerName(SymbolSize);
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-11 11:09:34 -08:00
|
|
|
// Checkout for conflicts with function data from FDEs.
|
|
|
|
|
bool IsSimple = true;
|
|
|
|
|
auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address);
|
|
|
|
|
if (FDEI != CFIRdWrt->getFDEs().end()) {
|
2018-02-14 12:06:17 -08:00
|
|
|
const auto &FDE = *FDEI->second;
|
2016-03-11 11:09:34 -08:00
|
|
|
if (FDEI->first != Address) {
|
|
|
|
|
// There's no matching starting address in FDE. Make sure the previous
|
|
|
|
|
// FDE does not contain this address.
|
|
|
|
|
if (FDEI != CFIRdWrt->getFDEs().begin()) {
|
|
|
|
|
--FDEI;
|
|
|
|
|
auto &PrevFDE = *FDEI->second;
|
|
|
|
|
auto PrevStart = PrevFDE.getInitialLocation();
|
|
|
|
|
auto PrevLength = PrevFDE.getAddressRange();
|
2016-09-15 15:47:10 -07:00
|
|
|
if (Address > PrevStart && Address < PrevStart + PrevLength) {
|
2016-09-27 19:09:38 -07:00
|
|
|
errs() << "BOLT-ERROR: function " << UniqueName
|
|
|
|
|
<< " is in conflict with FDE ["
|
|
|
|
|
<< Twine::utohexstr(PrevStart) << ", "
|
|
|
|
|
<< Twine::utohexstr(PrevStart + PrevLength)
|
|
|
|
|
<< "). Skipping.\n";
|
2016-03-11 11:09:34 -08:00
|
|
|
IsSimple = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (FDE.getAddressRange() != SymbolSize) {
|
2016-09-15 15:47:10 -07:00
|
|
|
if (SymbolSize) {
|
|
|
|
|
// Function addresses match but sizes differ.
|
2017-06-02 18:41:31 -07:00
|
|
|
errs() << "BOLT-WARNING: sizes differ for function " << UniqueName
|
2016-09-27 19:09:38 -07:00
|
|
|
<< ". FDE : " << FDE.getAddressRange()
|
2017-06-02 18:41:31 -07:00
|
|
|
<< "; symbol table : " << SymbolSize << ". Using max size.\n";
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2016-03-11 11:09:34 -08:00
|
|
|
SymbolSize = std::max(SymbolSize, FDE.getAddressRange());
|
2017-11-14 20:05:11 -08:00
|
|
|
if (BC->getBinaryDataAtAddress(Address)) {
|
|
|
|
|
BC->setBinaryDataSize(Address, SymbolSize);
|
|
|
|
|
} else {
|
2018-02-14 12:06:17 -08:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << "\n");
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
2016-03-11 11:09:34 -08:00
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
TentativeSize = SymbolSize;
|
2016-03-11 11:09:34 -08:00
|
|
|
}
|
|
|
|
|
|
2016-08-11 14:23:54 -07:00
|
|
|
BinaryFunction *BF{nullptr};
|
2016-06-10 17:13:05 -07:00
|
|
|
auto BFI = BinaryFunctions.find(Address);
|
|
|
|
|
if (BFI != BinaryFunctions.end()) {
|
2016-08-11 14:23:54 -07:00
|
|
|
BF = &BFI->second;
|
2016-06-10 17:13:05 -07:00
|
|
|
// Duplicate function name. Make sure everything matches before we add
|
|
|
|
|
// an alternative name.
|
2016-09-15 15:47:10 -07:00
|
|
|
if (SymbolSize != BF->getSize()) {
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
if (SymbolSize && BF->getSize()) {
|
|
|
|
|
errs() << "BOLT-WARNING: size mismatch for duplicate entries "
|
|
|
|
|
<< *BF << " and " << UniqueName << '\n';
|
|
|
|
|
}
|
|
|
|
|
outs() << "BOLT-INFO: adjusting size of function " << *BF
|
|
|
|
|
<< " old " << BF->getSize() << " new " << SymbolSize << "\n";
|
|
|
|
|
}
|
|
|
|
|
BF->setSize(std::max(SymbolSize, BF->getSize()));
|
2017-11-14 20:05:11 -08:00
|
|
|
BC->setBinaryDataSize(Address, BF->getSize());
|
2016-06-10 17:13:05 -07:00
|
|
|
}
|
2016-08-11 14:23:54 -07:00
|
|
|
BF->addAlternativeName(UniqueName);
|
2016-06-10 17:13:05 -07:00
|
|
|
} else {
|
2018-02-01 16:33:43 -08:00
|
|
|
auto Section = BC->getSectionForAddress(Address);
|
|
|
|
|
assert(Section && "section for functions must be registered.");
|
|
|
|
|
BF = createBinaryFunction(UniqueName, *Section, Address,
|
|
|
|
|
SymbolSize, IsSimple);
|
2016-06-10 17:13:05 -07:00
|
|
|
}
|
2016-07-11 18:51:13 -07:00
|
|
|
if (!AlternativeName.empty())
|
2016-08-11 14:23:54 -07:00
|
|
|
BF->addAlternativeName(AlternativeName);
|
2016-09-29 11:19:06 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
registerName(SymbolSize);
|
2016-09-29 11:19:06 -07:00
|
|
|
PreviousFunction = BF;
|
2016-07-11 18:51:13 -07:00
|
|
|
}
|
|
|
|
|
|
2017-08-04 11:21:05 -07:00
|
|
|
// Process PLT section.
|
2017-08-24 14:37:35 -07:00
|
|
|
if (BC->TheTriple->getArch() == Triple::x86_64)
|
|
|
|
|
disassemblePLT();
|
2017-08-04 11:21:05 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// See if we missed any functions marked by FDE.
|
|
|
|
|
for (const auto &FDEI : CFIRdWrt->getFDEs()) {
|
|
|
|
|
const auto Address = FDEI.first;
|
|
|
|
|
const auto *FDE = FDEI.second;
|
2017-08-04 11:21:05 -07:00
|
|
|
const auto *BF = getBinaryFunctionAtAddress(Address);
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!BF) {
|
2017-08-04 11:21:05 -07:00
|
|
|
if (const auto *PartialBF = getBinaryFunctionContainingAddress(Address)) {
|
2017-07-17 11:22:22 -07:00
|
|
|
errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
|
|
|
|
|
<< Twine::utohexstr(Address + FDE->getAddressRange())
|
2017-08-04 11:21:05 -07:00
|
|
|
<< ") conflicts with function " << *PartialBF << '\n';
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
2017-08-04 11:21:05 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address)
|
|
|
|
|
<< ", 0x" << Twine::utohexstr(Address + FDE->getAddressRange())
|
|
|
|
|
<< ") has no corresponding symbol table entry\n";
|
|
|
|
|
}
|
|
|
|
|
auto Section = BC->getSectionForAddress(Address);
|
|
|
|
|
assert(Section && "cannot get section for address from FDE");
|
2016-09-27 19:09:38 -07:00
|
|
|
std::string FunctionName =
|
|
|
|
|
"__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str();
|
|
|
|
|
createBinaryFunction(FunctionName, *Section, Address,
|
|
|
|
|
FDE->getAddressRange(), true);
|
|
|
|
|
}
|
2017-06-02 18:41:31 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-11 18:51:13 -07:00
|
|
|
if (!SeenFileName && BC->DR.hasLocalsWithFileName() && !opts::AllowStripped) {
|
|
|
|
|
errs() << "BOLT-ERROR: input binary does not have local file symbols "
|
|
|
|
|
"but profile data includes function names with embedded file "
|
|
|
|
|
"names. It appears that the input binary was stripped while a "
|
|
|
|
|
"profiled binary was not. If you know what you are doing and "
|
|
|
|
|
"wish to proceed, use -allow-stripped option.\n";
|
|
|
|
|
exit(1);
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
2016-09-29 11:19:06 -07:00
|
|
|
|
|
|
|
|
// Now that all the functions were created - adjust their boundaries.
|
|
|
|
|
adjustFunctionBoundaries();
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-11-22 16:17:36 -08:00
|
|
|
// Annotate functions with code/data markers in AArch64
|
|
|
|
|
for (auto ISym = MarkersBegin; ISym != SortedFileSymbols.end(); ++ISym) {
|
|
|
|
|
const auto &Symbol = *ISym;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
uint64_t Address =
|
|
|
|
|
cantFail(Symbol.getAddress(), "cannot get symbol address");
|
2017-11-22 16:17:36 -08:00
|
|
|
auto SymbolSize = ELFSymbolRef(Symbol).getSize();
|
|
|
|
|
auto *BF = getBinaryFunctionContainingAddress(Address, true, true);
|
|
|
|
|
if (!BF) {
|
|
|
|
|
// Stray marker
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
const auto EntryOffset = Address - BF->getAddress();
|
|
|
|
|
if (BF->isCodeMarker(Symbol, SymbolSize)) {
|
|
|
|
|
BF->markCodeAtOffset(EntryOffset);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (BF->isDataMarker(Symbol, SymbolSize)) {
|
|
|
|
|
BF->markDataAtOffset(EntryOffset);
|
|
|
|
|
BC->AddressToConstantIslandMap[Address] = BF;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
llvm_unreachable("Unknown marker");
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!BC->HasRelocations)
|
2016-09-27 19:09:38 -07:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// Read all relocations now that we have binary functions mapped.
|
|
|
|
|
for (const auto &Section : InputFile->sections()) {
|
2018-07-06 21:30:23 -07:00
|
|
|
if (Section.getRelocatedSection() != InputFile->section_end())
|
2016-09-27 19:09:38 -07:00
|
|
|
readRelocations(Section);
|
|
|
|
|
}
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
|
|
|
|
|
2017-08-04 11:21:05 -07:00
|
|
|
void RewriteInstance::disassemblePLT() {
|
2018-01-23 15:10:24 -08:00
|
|
|
if (!PLTSection)
|
2017-08-04 11:21:05 -07:00
|
|
|
return;
|
|
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
const auto PLTAddress = PLTSection->getAddress();
|
|
|
|
|
StringRef PLTContents = PLTSection->getContents();
|
2017-08-04 11:21:05 -07:00
|
|
|
ArrayRef<uint8_t> PLTData(
|
|
|
|
|
reinterpret_cast<const uint8_t *>(PLTContents.data()),
|
2018-01-23 15:10:24 -08:00
|
|
|
PLTSection->getSize());
|
2017-08-04 11:21:05 -07:00
|
|
|
|
|
|
|
|
// Pseudo function for the start of PLT. The table could have a matching
|
|
|
|
|
// FDE that we want to match to pseudo function.
|
2017-11-14 20:05:11 -08:00
|
|
|
createBinaryFunction("__BOLT_PLT_PSEUDO", *PLTSection, PLTAddress, 0, false,
|
|
|
|
|
PLTSize, PLTAlignment);
|
|
|
|
|
for (uint64_t Offset = 0; Offset < PLTSection->getSize(); Offset += PLTSize) {
|
2017-08-04 11:21:05 -07:00
|
|
|
uint64_t InstrSize;
|
|
|
|
|
MCInst Instruction;
|
|
|
|
|
const uint64_t InstrAddr = PLTAddress + Offset;
|
|
|
|
|
if (!BC->DisAsm->getInstruction(Instruction,
|
|
|
|
|
InstrSize,
|
|
|
|
|
PLTData.slice(Offset),
|
|
|
|
|
InstrAddr,
|
|
|
|
|
nulls(),
|
|
|
|
|
nulls())) {
|
|
|
|
|
errs() << "BOLT-ERROR: unable to disassemble instruction in .plt "
|
|
|
|
|
<< "at offset 0x" << Twine::utohexstr(Offset) << '\n';
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-09 09:45:13 -08:00
|
|
|
if (!BC->MIB->isIndirectBranch(Instruction))
|
2017-08-04 11:21:05 -07:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
uint64_t TargetAddress;
|
2018-03-09 09:45:13 -08:00
|
|
|
if (!BC->MIB->evaluateMemOperandTarget(Instruction,
|
2017-08-04 11:21:05 -07:00
|
|
|
TargetAddress,
|
|
|
|
|
InstrAddr,
|
|
|
|
|
InstrSize)) {
|
|
|
|
|
errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(InstrAddr) << '\n';
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// To get the name we have to read a relocation against the address.
|
2018-01-23 15:10:24 -08:00
|
|
|
for (const auto &Rel : RelaPLTSection->getSectionRef().relocations()) {
|
2017-08-04 11:21:05 -07:00
|
|
|
if (Rel.getType() != ELF::R_X86_64_JUMP_SLOT)
|
|
|
|
|
continue;
|
|
|
|
|
if (Rel.getOffset() == TargetAddress) {
|
|
|
|
|
const auto SymbolIter = Rel.getSymbol();
|
|
|
|
|
assert(SymbolIter != InputFile->symbol_end() &&
|
|
|
|
|
"non-null symbol expected");
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const auto SymbolName = cantFail((*SymbolIter).getName());
|
2017-08-04 11:21:05 -07:00
|
|
|
std::string Name = SymbolName.str() + "@PLT";
|
2017-11-14 20:05:11 -08:00
|
|
|
const auto PtrSize = BC->AsmInfo->getCodePointerSize();
|
2017-08-04 11:21:05 -07:00
|
|
|
auto *BF = createBinaryFunction(Name,
|
2018-01-23 15:10:24 -08:00
|
|
|
*PLTSection,
|
2017-08-04 11:21:05 -07:00
|
|
|
InstrAddr,
|
|
|
|
|
0,
|
2017-11-14 20:05:11 -08:00
|
|
|
/*IsSimple=*/false,
|
|
|
|
|
PLTSize,
|
|
|
|
|
PLTAlignment);
|
2017-08-04 11:21:05 -07:00
|
|
|
auto TargetSymbol = BC->registerNameAtAddress(SymbolName.str() + "@GOT",
|
2017-11-14 20:05:11 -08:00
|
|
|
TargetAddress,
|
|
|
|
|
PtrSize,
|
|
|
|
|
PLTAlignment);
|
2017-08-04 11:21:05 -07:00
|
|
|
BF->setPLTSymbol(TargetSymbol);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
if (PLTGOTSection) {
|
2017-08-04 11:21:05 -07:00
|
|
|
// Check if we need to create a function for .plt.got. Some linkers
|
|
|
|
|
// (depending on the version) would mark it with FDE while others wouldn't.
|
2018-01-23 15:10:24 -08:00
|
|
|
if (!getBinaryFunctionAtAddress(PLTGOTSection->getAddress())) {
|
|
|
|
|
createBinaryFunction("__BOLT_PLT_GOT_PSEUDO",
|
|
|
|
|
*PLTGOTSection,
|
|
|
|
|
PLTGOTSection->getAddress(),
|
|
|
|
|
0,
|
2017-11-14 20:05:11 -08:00
|
|
|
false,
|
|
|
|
|
PLTAlignment);
|
2017-08-04 11:21:05 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
void RewriteInstance::adjustFunctionBoundaries() {
|
2017-10-10 14:54:09 -07:00
|
|
|
for (auto BFI = BinaryFunctions.begin(), BFE = BinaryFunctions.end();
|
|
|
|
|
BFI != BFE; ++BFI) {
|
|
|
|
|
auto &Function = BFI->second;
|
|
|
|
|
|
|
|
|
|
// Check if there's a symbol or a function with a larger address in the
|
|
|
|
|
// same section. If there is - it determines the maximum size for the
|
|
|
|
|
// current function. Otherwise, it is the size of a containing section
|
|
|
|
|
// the defines it.
|
2016-09-29 11:19:06 -07:00
|
|
|
//
|
|
|
|
|
// NOTE: ignore some symbols that could be tolerated inside the body
|
|
|
|
|
// of a function.
|
|
|
|
|
auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress());
|
|
|
|
|
while (NextSymRefI != FileSymRefs.end()) {
|
|
|
|
|
auto &Symbol = NextSymRefI->second;
|
|
|
|
|
auto SymbolSize = ELFSymbolRef(Symbol).getSize();
|
|
|
|
|
|
|
|
|
|
if (!Function.isSymbolValidInScope(Symbol, SymbolSize))
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
// This is potentially another entry point into the function.
|
|
|
|
|
auto EntryOffset = NextSymRefI->first - Function.getAddress();
|
2017-11-22 16:17:36 -08:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " << Function
|
|
|
|
|
<< " at offset 0x" << Twine::utohexstr(EntryOffset) << '\n');
|
|
|
|
|
Function.addEntryPointAtOffset(EntryOffset);
|
|
|
|
|
// In non-relocation mode there's potentially an external undetectable
|
|
|
|
|
// reference to the entry point and hence we cannot move this entry
|
|
|
|
|
// point. Optimizing without moving could be difficult.
|
|
|
|
|
if (!BC->HasRelocations)
|
|
|
|
|
Function.setSimple(false);
|
2016-09-29 11:19:06 -07:00
|
|
|
|
|
|
|
|
++NextSymRefI;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-10 14:54:09 -07:00
|
|
|
// Function runs at most till the end of the containing section.
|
2018-01-23 15:10:24 -08:00
|
|
|
uint64_t NextObjectAddress = Function.getSection().getEndAddress();
|
2017-10-10 14:54:09 -07:00
|
|
|
// Or till the next object marked by a symbol.
|
|
|
|
|
if (NextSymRefI != FileSymRefs.end()) {
|
|
|
|
|
NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress);
|
|
|
|
|
}
|
|
|
|
|
// Or till the next function not marked by a symbol.
|
|
|
|
|
if (std::next(BFI) != BFE) {
|
|
|
|
|
const auto &NextFunction = std::next(BFI)->second;
|
|
|
|
|
NextObjectAddress = std::min(NextFunction.getAddress(),
|
|
|
|
|
NextObjectAddress);
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
|
|
|
|
|
2017-10-10 14:54:09 -07:00
|
|
|
const auto MaxSize = NextObjectAddress - Function.getAddress();
|
2016-09-29 11:19:06 -07:00
|
|
|
if (MaxSize < Function.getSize()) {
|
2016-09-27 19:09:38 -07:00
|
|
|
errs() << "BOLT-ERROR: symbol seen in the middle of the function "
|
|
|
|
|
<< Function << ". Skipping.\n";
|
2016-09-29 11:19:06 -07:00
|
|
|
Function.setSimple(false);
|
2016-09-27 19:09:38 -07:00
|
|
|
Function.setMaxSize(Function.getSize());
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
Function.setMaxSize(MaxSize);
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!Function.getSize() && Function.isSimple()) {
|
2016-09-29 11:19:06 -07:00
|
|
|
// Some assembly functions have their size set to 0, use the max
|
|
|
|
|
// size as their real size.
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
outs() << "BOLT-INFO: setting size of function " << Function
|
|
|
|
|
<< " to " << Function.getMaxSize() << " (was 0)\n";
|
|
|
|
|
}
|
|
|
|
|
Function.setSize(Function.getMaxSize());
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2016-11-11 14:33:34 -08:00
|
|
|
void RewriteInstance::relocateEHFrameSection() {
|
2018-01-23 15:10:24 -08:00
|
|
|
assert(EHFrameSection && "non-empty .eh_frame section expected");
|
2016-11-11 14:33:34 -08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
DWARFDebugFrame EHFrame(true, EHFrameSection->getAddress());
|
2018-02-01 16:33:43 -08:00
|
|
|
DWARFDataExtractor DE(EHFrameSection->getContents(),
|
|
|
|
|
BC->AsmInfo->isLittleEndian(),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
BC->AsmInfo->getCodePointerSize());
|
2016-11-11 14:33:34 -08:00
|
|
|
auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) {
|
|
|
|
|
if (DwarfType == dwarf::DW_EH_PE_omit)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (!(DwarfType & dwarf::DW_EH_PE_pcrel) &&
|
|
|
|
|
!(DwarfType & dwarf::DW_EH_PE_textrel) &&
|
|
|
|
|
!(DwarfType & dwarf::DW_EH_PE_funcrel) &&
|
|
|
|
|
!(DwarfType & dwarf::DW_EH_PE_datarel)) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!(DwarfType & dwarf::DW_EH_PE_sdata4))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
uint64_t RelType;
|
|
|
|
|
switch (DwarfType & 0x0f) {
|
|
|
|
|
default:
|
|
|
|
|
llvm_unreachable("unsupported DWARF encoding type");
|
|
|
|
|
case dwarf::DW_EH_PE_sdata4:
|
|
|
|
|
case dwarf::DW_EH_PE_udata4:
|
|
|
|
|
RelType = ELF::R_X86_64_PC32;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Offset -= 4;
|
2016-11-11 14:33:34 -08:00
|
|
|
break;
|
|
|
|
|
case dwarf::DW_EH_PE_sdata8:
|
|
|
|
|
case dwarf::DW_EH_PE_udata8:
|
|
|
|
|
RelType = ELF::R_X86_64_PC64;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Offset -= 8;
|
2016-11-11 14:33:34 -08:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
auto *BD = BC->getBinaryDataContainingAddress(Value);
|
|
|
|
|
auto *Symbol = BD ? BD->getSymbol() : nullptr;
|
|
|
|
|
auto Addend = BD ? Value - BD->getAddress() : 0;
|
2016-11-11 14:33:34 -08:00
|
|
|
if (!Symbol) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: creating symbol for DWARF reference at 0x"
|
|
|
|
|
<< Twine::utohexstr(Value) << '\n');
|
2017-11-14 20:05:11 -08:00
|
|
|
Symbol = BC->getOrCreateGlobalSymbol(Value, 0, 0, "FUNCat");
|
2016-11-11 14:33:34 -08:00
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: adding DWARF reference against symbol "
|
|
|
|
|
<< Symbol->getName() << '\n');
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
EHFrameSection->addRelocation(Offset, Symbol, RelType, Addend);
|
2016-11-11 14:33:34 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
EHFrame.parse(DE, createReloc);
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-11 14:23:54 -07:00
|
|
|
BinaryFunction *RewriteInstance::createBinaryFunction(
|
2018-01-23 15:10:24 -08:00
|
|
|
const std::string &Name, BinarySection &Section, uint64_t Address,
|
2017-11-14 20:05:11 -08:00
|
|
|
uint64_t Size, bool IsSimple, uint64_t SymbolSize, uint16_t Alignment) {
|
2016-08-11 14:23:54 -07:00
|
|
|
auto Result = BinaryFunctions.emplace(
|
|
|
|
|
Address, BinaryFunction(Name, Section, Address, Size, *BC, IsSimple));
|
|
|
|
|
assert(Result.second == true && "unexpected duplicate function");
|
|
|
|
|
auto *BF = &Result.first->second;
|
2017-11-14 20:05:11 -08:00
|
|
|
BC->registerNameAtAddress(Name,
|
|
|
|
|
Address,
|
|
|
|
|
SymbolSize ? SymbolSize : Size,
|
|
|
|
|
Alignment);
|
|
|
|
|
BC->setSymbolToFunctionMap(BF->getSymbol(), BF);
|
2016-08-11 14:23:54 -07:00
|
|
|
return BF;
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
ArrayRef<uint8_t> RewriteInstance::getLSDAData() {
|
|
|
|
|
return ArrayRef<uint8_t>(LSDASection->getData(),
|
|
|
|
|
LSDASection->getContents().size());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t RewriteInstance::getLSDAAddress() {
|
|
|
|
|
return LSDASection->getAddress();
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
void RewriteInstance::readSpecialSections() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("readSpecialSections", "read special sections",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2017-11-27 18:00:24 -08:00
|
|
|
|
2017-03-22 22:05:50 -07:00
|
|
|
bool HasTextRelocations = false;
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
// Process special sections.
|
2016-03-03 10:13:11 -08:00
|
|
|
for (const auto &Section : InputFile->sections()) {
|
2015-11-23 17:54:18 -08:00
|
|
|
StringRef SectionName;
|
|
|
|
|
check_error(Section.getName(SectionName), "cannot get section name");
|
2016-07-21 12:45:35 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
// Only register sections with names.
|
2018-04-20 20:03:31 -07:00
|
|
|
if (!SectionName.empty()) {
|
2018-02-01 16:33:43 -08:00
|
|
|
BC->registerSection(Section);
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: registering section " << SectionName
|
|
|
|
|
<< " @ 0x" << Twine::utohexstr(Section.getAddress()) << ":0x"
|
|
|
|
|
<< Twine::utohexstr(Section.getAddress() + Section.getSize())
|
|
|
|
|
<< "\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text");
|
|
|
|
|
LSDASection = BC->getUniqueSectionByName(".gcc_except_table");
|
|
|
|
|
EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
|
|
|
|
|
GdbIndexSection = BC->getUniqueSectionByName(".gdb_index");
|
|
|
|
|
PLTSection = BC->getUniqueSectionByName(".plt");
|
|
|
|
|
GOTPLTSection = BC->getUniqueSectionByName(".got.plt");
|
|
|
|
|
PLTGOTSection = BC->getUniqueSectionByName(".plt.got");
|
|
|
|
|
RelaPLTSection = BC->getUniqueSectionByName(".rela.plt");
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
if (opts::PrintSections) {
|
|
|
|
|
outs() << "BOLT-INFO: Sections from original binary:\n";
|
|
|
|
|
BC->printSections(outs());
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
if (opts::PrintSections) {
|
|
|
|
|
outs() << "BOLT-INFO: Sections:\n";
|
|
|
|
|
BC->printSections(outs());
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) {
|
2017-03-22 22:05:50 -07:00
|
|
|
errs() << "BOLT-ERROR: relocations against code are missing from the input "
|
|
|
|
|
"file. Cannot proceed in relocations mode (-relocs).\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
BC->HasRelocations = HasTextRelocations &&
|
|
|
|
|
(opts::RelocationMode != cl::BOU_FALSE);
|
2018-04-09 13:47:43 -07:00
|
|
|
if (BC->HasRelocations) {
|
|
|
|
|
outs() << "BOLT-INFO: enabling relocation mode\n";
|
|
|
|
|
}
|
2017-12-09 21:40:39 -08:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
// Process debug sections.
|
2016-02-25 16:57:07 -08:00
|
|
|
EHFrame = BC->DwCtx->getEHFrame();
|
2015-11-23 17:54:18 -08:00
|
|
|
if (opts::DumpEHFrame) {
|
2016-11-15 10:40:00 -08:00
|
|
|
outs() << "BOLT-INFO: Dumping original binary .eh_frame\n";
|
2018-03-30 15:49:34 -07:00
|
|
|
EHFrame->dump(outs(), &*BC->MRI, NoneType());
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
2016-11-14 16:39:55 -08:00
|
|
|
CFIRdWrt.reset(new CFIReaderWriter(*EHFrame));
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2018-04-13 15:46:19 -07:00
|
|
|
void RewriteInstance::adjustCommandLineOptions() {
|
2018-04-13 11:18:46 -07:00
|
|
|
if (BC->isAArch64() && opts::RelocationMode != cl::BOU_TRUE &&
|
|
|
|
|
!opts::AggregateOnly) {
|
2018-04-13 15:46:19 -07:00
|
|
|
errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully "
|
|
|
|
|
"supported\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
|
|
|
|
|
outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
|
|
|
|
|
opts::AlignMacroOpFusion = MFT_NONE;
|
|
|
|
|
}
|
|
|
|
|
if (opts::AlignMacroOpFusion != MFT_NONE &&
|
|
|
|
|
!BC->HasRelocations) {
|
|
|
|
|
outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
|
|
|
|
|
"mode\n";
|
|
|
|
|
opts::AlignMacroOpFusion = MFT_NONE;
|
|
|
|
|
}
|
2018-06-25 14:55:48 -07:00
|
|
|
if (opts::SplitEH && !BC->HasRelocations) {
|
|
|
|
|
outs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n";
|
|
|
|
|
opts::SplitEH = false;
|
|
|
|
|
}
|
2018-04-13 15:46:19 -07:00
|
|
|
if (BC->isX86() && BC->HasRelocations &&
|
|
|
|
|
opts::AlignMacroOpFusion == MFT_HOT &&
|
|
|
|
|
!DA.started() && BC->DR.getAllFuncsData().empty() &&
|
|
|
|
|
opts::BoltProfile.empty()) {
|
|
|
|
|
outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
|
|
|
|
|
"was specified\n";
|
|
|
|
|
opts::AlignMacroOpFusion = MFT_ALL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
namespace {
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj,
|
|
|
|
|
const RelocationRef &RelRef) {
|
|
|
|
|
int64_t Addend = 0;
|
|
|
|
|
const ELFFile<ELFT> &EF = *Obj->getELFFile();
|
|
|
|
|
DataRefImpl Rel = RelRef.getRawDataRefImpl();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const auto *RelocationSection = cantFail(EF.getSection(Rel.d.a));
|
2016-09-27 19:09:38 -07:00
|
|
|
switch (RelocationSection->sh_type) {
|
|
|
|
|
default: llvm_unreachable("unexpected relocation section type");
|
|
|
|
|
case ELF::SHT_REL:
|
|
|
|
|
break;
|
|
|
|
|
case ELF::SHT_RELA: {
|
|
|
|
|
const auto *RelA = Obj->getRela(Rel);
|
|
|
|
|
Addend = RelA->r_addend;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Addend;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int64_t getRelocationAddend(const ELFObjectFileBase *Obj,
|
|
|
|
|
const RelocationRef &Rel) {
|
|
|
|
|
if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
|
|
|
|
|
return getRelocationAddend(ELF32LE, Rel);
|
|
|
|
|
if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
|
|
|
|
|
return getRelocationAddend(ELF64LE, Rel);
|
|
|
|
|
if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
|
|
|
|
|
return getRelocationAddend(ELF32BE, Rel);
|
|
|
|
|
auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
|
|
|
|
|
return getRelocationAddend(ELF64BE, Rel);
|
|
|
|
|
}
|
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
2018-01-24 05:42:11 -08:00
|
|
|
bool RewriteInstance::analyzeRelocation(const RelocationRef &Rel,
|
|
|
|
|
SectionRef RelocatedSection,
|
|
|
|
|
std::string &SymbolName,
|
|
|
|
|
uint64_t &SymbolAddress,
|
|
|
|
|
int64_t &Addend,
|
|
|
|
|
uint64_t &ExtractedValue) const {
|
|
|
|
|
if (!Relocation::isSupported(Rel.getType()))
|
|
|
|
|
return false;
|
|
|
|
|
|
2018-03-20 14:34:58 -07:00
|
|
|
const bool IsAArch64 = BC->isAArch64();
|
2018-01-24 05:42:11 -08:00
|
|
|
const bool IsFromCode = RelocatedSection.isText();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
|
2018-01-24 05:42:11 -08:00
|
|
|
// For value extraction.
|
|
|
|
|
StringRef RelocatedSectionContents;
|
|
|
|
|
RelocatedSection.getContents(RelocatedSectionContents);
|
|
|
|
|
DataExtractor DE(RelocatedSectionContents,
|
|
|
|
|
BC->AsmInfo->isLittleEndian(),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
BC->AsmInfo->getCodePointerSize());
|
2018-01-24 05:42:11 -08:00
|
|
|
|
|
|
|
|
const bool IsPCRelative = Relocation::isPCRelative(Rel.getType());
|
|
|
|
|
auto SymbolIter = Rel.getSymbol();
|
|
|
|
|
assert(SymbolIter != InputFile->symbol_end() &&
|
|
|
|
|
"relocation symbol must exist");
|
2018-07-30 10:29:47 -07:00
|
|
|
const auto &Symbol = *SymbolIter;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
SymbolName = cantFail(Symbol.getName());
|
|
|
|
|
SymbolAddress = cantFail(Symbol.getAddress());
|
2018-01-24 05:42:11 -08:00
|
|
|
Addend = getRelocationAddend(InputFile, Rel);
|
|
|
|
|
|
|
|
|
|
uint32_t RelocationOffset =
|
|
|
|
|
Rel.getOffset() - RelocatedSection.getAddress();
|
|
|
|
|
const auto RelSize = Relocation::getSizeForType(Rel.getType());
|
|
|
|
|
ExtractedValue =
|
|
|
|
|
static_cast<uint64_t>(DE.getSigned(&RelocationOffset, RelSize));
|
|
|
|
|
|
|
|
|
|
if (IsAArch64) {
|
|
|
|
|
ExtractedValue = Relocation::extractValue(Rel.getType(),
|
|
|
|
|
ExtractedValue,
|
|
|
|
|
Rel.getOffset());
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-14 11:10:26 -07:00
|
|
|
// Section symbols are marked as ST_Debug.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const bool SymbolIsSection =
|
|
|
|
|
(cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
|
|
|
|
|
const auto PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0;
|
2018-01-24 05:42:11 -08:00
|
|
|
|
|
|
|
|
// If no symbol has been found or if it is a relocation requiring the
|
|
|
|
|
// creation of a GOT entry, do not link against the symbol but against
|
|
|
|
|
// whatever address was extracted from the instruction itself. We are
|
|
|
|
|
// not creating a GOT entry as this was already processed by the linker.
|
|
|
|
|
if (!SymbolAddress || Relocation::isGOT(Rel.getType())) {
|
|
|
|
|
assert(!SymbolIsSection);
|
|
|
|
|
if (ExtractedValue) {
|
|
|
|
|
SymbolAddress = ExtractedValue - Addend + PCRelOffset;
|
|
|
|
|
} else {
|
|
|
|
|
// This is weird case. The extracted value is zero but the addend is
|
|
|
|
|
// non-zero and the relocation is not pc-rel. Using the previous logic,
|
|
|
|
|
// the SymbolAddress would end up as a huge number. Seen in
|
|
|
|
|
// exceptions_pic.test.
|
2017-11-14 20:05:11 -08:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x"
|
2018-01-24 05:42:11 -08:00
|
|
|
<< Twine::utohexstr(Rel.getOffset())
|
|
|
|
|
<< " value does not match addend for "
|
2017-11-14 20:05:11 -08:00
|
|
|
<< "relocation to undefined symbol.\n");
|
2018-01-24 05:42:11 -08:00
|
|
|
SymbolAddress += PCRelOffset;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
} else if (SymbolIsSection) {
|
|
|
|
|
auto Section = Symbol.getSection();
|
|
|
|
|
if (Section && *Section != InputFile->section_end()) {
|
|
|
|
|
SymbolName = "section " + std::string(getSectionName(**Section));
|
|
|
|
|
if (!IsAArch64) {
|
|
|
|
|
assert(SymbolAddress == (*Section)->getAddress() &&
|
|
|
|
|
"section symbol address must be the same as section address");
|
|
|
|
|
// Convert section symbol relocations to regular relocations inside
|
|
|
|
|
// non-section symbols.
|
|
|
|
|
if (IsPCRelative) {
|
|
|
|
|
Addend = ExtractedValue - (SymbolAddress - PCRelOffset);
|
|
|
|
|
} else {
|
|
|
|
|
SymbolAddress = ExtractedValue;
|
|
|
|
|
Addend = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-30 10:29:47 -07:00
|
|
|
auto verifyExtractedValue = [&]() {
|
|
|
|
|
if (IsAArch64)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (SymbolName == "__hot_start" || SymbolName == "__hot_end")
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (Relocation::isTLS(Rel.getType()))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (cantFail(Symbol.getType()) == SymbolRef::ST_Other)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return truncateToSize(ExtractedValue, RelSize) ==
|
|
|
|
|
truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize);
|
|
|
|
|
};
|
2018-01-24 05:42:11 -08:00
|
|
|
|
2018-07-30 10:29:47 -07:00
|
|
|
assert(verifyExtractedValue() && "mismatched extracted relocation value");
|
2018-01-24 05:42:11 -08:00
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
void RewriteInstance::readRelocations(const SectionRef &Section) {
|
|
|
|
|
StringRef SectionName;
|
|
|
|
|
Section.getName(SectionName);
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: relocations for section "
|
|
|
|
|
<< SectionName << ":\n");
|
|
|
|
|
if (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
auto SecIter = Section.getRelocatedSection();
|
|
|
|
|
assert(SecIter != InputFile->section_end() && "relocated section expected");
|
|
|
|
|
auto RelocatedSection = *SecIter;
|
|
|
|
|
|
|
|
|
|
StringRef RelocatedSectionName;
|
|
|
|
|
RelocatedSection.getName(RelocatedSectionName);
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: relocated section is "
|
|
|
|
|
<< RelocatedSectionName << '\n');
|
|
|
|
|
|
|
|
|
|
if (!(ELFSectionRef(RelocatedSection).getFlags() & ELF::SHF_ALLOC)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against "
|
|
|
|
|
<< "non-allocatable section\n");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName)
|
|
|
|
|
.Cases(".plt", ".rela.plt", ".got.plt", ".eh_frame", true)
|
|
|
|
|
.Default(false);
|
|
|
|
|
if (SkipRelocs) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-20 14:34:58 -07:00
|
|
|
const bool IsAArch64 = BC->isAArch64();
|
2018-01-24 05:42:11 -08:00
|
|
|
const bool IsFromCode = RelocatedSection.isText();
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
auto printRelocationInfo = [&](const RelocationRef &Rel,
|
|
|
|
|
StringRef SymbolName,
|
|
|
|
|
uint64_t SymbolAddress,
|
|
|
|
|
uint64_t Addend,
|
|
|
|
|
uint64_t ExtractedValue) {
|
|
|
|
|
SmallString<16> TypeName;
|
|
|
|
|
Rel.getTypeName(TypeName);
|
|
|
|
|
const auto Address = SymbolAddress + Addend;
|
|
|
|
|
auto Section = BC->getSectionForAddress(SymbolAddress);
|
|
|
|
|
dbgs() << "Relocation: offset = 0x"
|
|
|
|
|
<< Twine::utohexstr(Rel.getOffset())
|
2018-07-12 10:13:03 -07:00
|
|
|
<< "; type = " << TypeName
|
2017-11-14 20:05:11 -08:00
|
|
|
<< "; value = 0x" << Twine::utohexstr(ExtractedValue)
|
|
|
|
|
<< "; symbol = " << SymbolName
|
|
|
|
|
<< " (" << (Section ? Section->getName() : "") << ")"
|
|
|
|
|
<< "; symbol address = 0x" << Twine::utohexstr(SymbolAddress)
|
|
|
|
|
<< "; addend = 0x" << Twine::utohexstr(Addend)
|
|
|
|
|
<< "; address = 0x" << Twine::utohexstr(Address)
|
|
|
|
|
<< "; in = ";
|
|
|
|
|
if (auto *Func = getBinaryFunctionContainingAddress(Rel.getOffset(),
|
|
|
|
|
false,
|
|
|
|
|
IsAArch64)) {
|
|
|
|
|
dbgs() << Func->getPrintName() << "\n";
|
|
|
|
|
} else {
|
|
|
|
|
dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName() << "\n";
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
for (const auto &Rel : Section.relocations()) {
|
|
|
|
|
SmallString<16> TypeName;
|
|
|
|
|
Rel.getTypeName(TypeName);
|
2018-01-24 05:42:11 -08:00
|
|
|
|
|
|
|
|
std::string SymbolName;
|
|
|
|
|
uint64_t SymbolAddress;
|
|
|
|
|
int64_t Addend;
|
|
|
|
|
uint64_t ExtractedValue;
|
|
|
|
|
|
|
|
|
|
if (!analyzeRelocation(Rel,
|
|
|
|
|
RelocatedSection,
|
|
|
|
|
SymbolName,
|
|
|
|
|
SymbolAddress,
|
|
|
|
|
Addend,
|
|
|
|
|
ExtractedValue)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: skipping relocation @ offset = 0x"
|
2016-09-27 19:09:38 -07:00
|
|
|
<< Twine::utohexstr(Rel.getOffset())
|
|
|
|
|
<< "; type name = " << TypeName
|
|
|
|
|
<< '\n');
|
2017-09-13 11:21:47 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-24 05:42:11 -08:00
|
|
|
const auto Address = SymbolAddress + Addend;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: ";
|
|
|
|
|
printRelocationInfo(Rel,
|
|
|
|
|
SymbolName,
|
|
|
|
|
SymbolAddress,
|
|
|
|
|
Addend,
|
|
|
|
|
ExtractedValue));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
BinaryFunction *ContainingBF = nullptr;
|
|
|
|
|
if (IsFromCode) {
|
2018-01-24 05:42:11 -08:00
|
|
|
ContainingBF =
|
|
|
|
|
getBinaryFunctionContainingAddress(Rel.getOffset(),
|
|
|
|
|
/*CheckPastEnd*/ false,
|
|
|
|
|
/*UseMaxSize*/ IsAArch64);
|
2016-09-27 19:09:38 -07:00
|
|
|
assert(ContainingBF && "cannot find function for address in code");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// PC-relative relocations from data to code are tricky since the original
|
|
|
|
|
// information is typically lost after linking even with '--emit-relocs'.
|
|
|
|
|
// They are normally used by PIC-style jump tables and reference both
|
|
|
|
|
// the jump table and jump destination by computing the difference
|
|
|
|
|
// between the two. If we blindly apply the relocation it will appear
|
|
|
|
|
// that it references an arbitrary location in the code, possibly even
|
|
|
|
|
// in a different function from that containing the jump table.
|
2018-01-24 05:42:11 -08:00
|
|
|
if (!IsAArch64 && Relocation::isPCRelative(Rel.getType())) {
|
2016-09-27 19:09:38 -07:00
|
|
|
// Just register the fact that we have PC-relative relocation at a given
|
|
|
|
|
// address. The actual referenced label/address cannot be determined
|
|
|
|
|
// from linker data alone.
|
|
|
|
|
if (IsFromCode) {
|
|
|
|
|
ContainingBF->addPCRelativeRelocationAddress(Rel.getOffset());
|
|
|
|
|
}
|
2018-01-24 05:42:11 -08:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x"
|
2018-04-20 20:03:31 -07:00
|
|
|
<< Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName
|
2018-01-24 05:42:11 -08:00
|
|
|
<< "\n");
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-12 10:13:03 -07:00
|
|
|
auto ForceRelocation = [&](StringRef SymbolName) {
|
|
|
|
|
if (opts::HotText && (SymbolName == "__hot_start" ||
|
|
|
|
|
SymbolName == "__hot_end"))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (opts::HotData && (SymbolName == "__hot_data_start" ||
|
|
|
|
|
SymbolName == "__hot_data_end"))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (SymbolName == "_end")
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}(SymbolName);
|
|
|
|
|
|
|
|
|
|
if (BC->isAArch64() && Rel.getType() == ELF::R_AARCH64_ADR_GOT_PAGE)
|
|
|
|
|
ForceRelocation = true;
|
|
|
|
|
|
2018-01-24 05:42:11 -08:00
|
|
|
// TODO: RefSection should be the same as **Rel.getSymbol().getSection()
|
|
|
|
|
auto RefSection = BC->getSectionForAddress(SymbolAddress);
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!RefSection && !ForceRelocation) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-24 05:42:11 -08:00
|
|
|
const bool IsToCode = RefSection && RefSection->isText();
|
2018-04-20 20:03:31 -07:00
|
|
|
const bool IsSectionRelocation =
|
|
|
|
|
(cantFail(Rel.getSymbol()->getType()) == SymbolRef::ST_Debug);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
// Occasionally we may see a reference past the last byte of the function
|
|
|
|
|
// typically as a result of __builtin_unreachable(). Check it here.
|
2018-04-12 10:07:11 -07:00
|
|
|
auto *ReferencedBF = getBinaryFunctionContainingAddress(
|
|
|
|
|
Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64);
|
2018-05-14 11:10:26 -07:00
|
|
|
|
|
|
|
|
if (!IsSectionRelocation) {
|
|
|
|
|
if (auto *BF = getBinaryFunctionContainingAddress(SymbolAddress)) {
|
|
|
|
|
if (BF != ReferencedBF) {
|
|
|
|
|
// It's possible we are referencing a function without referencing any
|
|
|
|
|
// code, e.g. when taking a bitmask action on a function address.
|
|
|
|
|
errs() << "BOLT-WARNING: non-standard function reference (e.g. "
|
|
|
|
|
"bitmask) detected against function " << *BF;
|
|
|
|
|
if (IsFromCode) {
|
|
|
|
|
errs() << " from function " << *ContainingBF << '\n';
|
|
|
|
|
} else {
|
|
|
|
|
errs() << " from data section at 0x"
|
|
|
|
|
<< Twine::utohexstr(Rel.getOffset()) << '\n';
|
|
|
|
|
}
|
|
|
|
|
DEBUG(printRelocationInfo(Rel,
|
|
|
|
|
SymbolName,
|
|
|
|
|
SymbolAddress,
|
|
|
|
|
Addend,
|
|
|
|
|
ExtractedValue)
|
|
|
|
|
);
|
|
|
|
|
ReferencedBF = BF;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
uint64_t RefFunctionOffset = 0;
|
|
|
|
|
MCSymbol *ReferencedSymbol = nullptr;
|
|
|
|
|
if (ForceRelocation) {
|
2017-11-14 20:05:11 -08:00
|
|
|
auto Name = Relocation::isGOT(Rel.getType()) ? "Zero" : SymbolName;
|
|
|
|
|
ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
|
2018-01-24 05:42:11 -08:00
|
|
|
SymbolAddress = 0;
|
2016-09-27 19:09:38 -07:00
|
|
|
Addend = Address;
|
2018-07-12 10:13:03 -07:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
|
|
|
|
|
<< SymbolName << " with addend " << Addend << '\n');
|
2016-09-27 19:09:38 -07:00
|
|
|
} else if (ReferencedBF) {
|
2018-05-14 11:10:26 -07:00
|
|
|
ReferencedSymbol = ReferencedBF->getSymbol();
|
|
|
|
|
|
|
|
|
|
// Adjust the point of reference to a code location inside a function.
|
|
|
|
|
if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */true)) {
|
|
|
|
|
RefFunctionOffset = Address - ReferencedBF->getAddress();
|
|
|
|
|
if (RefFunctionOffset) {
|
|
|
|
|
ReferencedSymbol =
|
|
|
|
|
ReferencedBF->getOrCreateLocalLabel(Address,
|
|
|
|
|
/*CreatePastEnd =*/ true);
|
|
|
|
|
}
|
|
|
|
|
SymbolAddress = Address;
|
|
|
|
|
Addend = 0;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2018-05-14 11:10:26 -07:00
|
|
|
DEBUG(
|
|
|
|
|
dbgs() << " referenced function " << *ReferencedBF;
|
|
|
|
|
if (Address != ReferencedBF->getAddress())
|
|
|
|
|
dbgs() << " at offset 0x" << Twine::utohexstr(RefFunctionOffset);
|
|
|
|
|
dbgs() << '\n'
|
|
|
|
|
);
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
|
|
|
|
if (RefSection && RefSection->isText() && SymbolAddress) {
|
|
|
|
|
// This can happen e.g. with PIC-style jump tables.
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for "
|
|
|
|
|
"relocation against code\n");
|
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2018-03-20 14:34:58 -07:00
|
|
|
// In AArch64 there are zero reasons to keep a reference to the
|
|
|
|
|
// "original" symbol plus addend. The original symbol is probably just a
|
|
|
|
|
// section symbol. If we are here, this means we are probably accessing
|
|
|
|
|
// data, so it is imperative to keep the original address.
|
|
|
|
|
if (IsAArch64) {
|
|
|
|
|
SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str();
|
|
|
|
|
SymbolAddress = Address;
|
|
|
|
|
Addend = 0;
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
// This function makes sure that symbols referenced by ambiguous
|
|
|
|
|
// relocations are marked as unmoveable. For now, if a section
|
|
|
|
|
// relocation points at the boundary between two symbols then
|
|
|
|
|
// those symbols are marked as unmoveable.
|
|
|
|
|
auto markAmbiguousRelocations = [&](BinaryData *BD) {
|
|
|
|
|
if (Address == BD->getAddress()) {
|
|
|
|
|
BD = BD->getAtomicRoot();
|
|
|
|
|
DEBUG(if (BD->isMoveable()) {
|
|
|
|
|
dbgs() << "BOLT-DEBUG: setting " << *BD << " as unmoveable "
|
|
|
|
|
<< "due to ambiguous relocation (0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << ") @ 0x"
|
|
|
|
|
<< Twine::utohexstr(Rel.getOffset()) << "\n";
|
|
|
|
|
});
|
|
|
|
|
BD->setIsMoveable(false);
|
|
|
|
|
|
|
|
|
|
// set previous symbol as unmoveable
|
|
|
|
|
auto *Prev = BC->getBinaryDataContainingAddress(Address-1);
|
|
|
|
|
if (Prev && Prev->getEndAddress() == BD->getAddress()) {
|
|
|
|
|
Prev = Prev->getAtomicRoot();
|
|
|
|
|
DEBUG(if (Prev->isMoveable()) {
|
|
|
|
|
dbgs() << "BOLT-DEBUG: setting " << *Prev << " as unmoveable "
|
|
|
|
|
<< "due to ambiguous relocation (0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << ") @ 0x"
|
|
|
|
|
<< Twine::utohexstr(Rel.getOffset()) << "\n";
|
|
|
|
|
});
|
|
|
|
|
Prev->setIsMoveable(false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Address == BD->getEndAddress()) {
|
|
|
|
|
BD = BD->getAtomicRoot();
|
|
|
|
|
DEBUG(if (BD->isMoveable()) {
|
|
|
|
|
dbgs() << "BOLT-DEBUG: setting " << *BD << " as unmoveable "
|
|
|
|
|
<< "due to ambiguous relocation (0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << ") @ 0x"
|
|
|
|
|
<< Twine::utohexstr(Rel.getOffset()) << "\n";
|
|
|
|
|
});
|
|
|
|
|
BD->setIsMoveable(false);
|
|
|
|
|
|
|
|
|
|
// set next symbol as unmoveable
|
|
|
|
|
auto *Next = BC->getBinaryDataContainingAddress(BD->getEndAddress());
|
|
|
|
|
if (Next && Next->getAddress() == BD->getEndAddress()) {
|
|
|
|
|
Next = Next->getAtomicRoot();
|
|
|
|
|
DEBUG(if (Next->isMoveable()) {
|
|
|
|
|
dbgs() << "BOLT-DEBUG: setting " << *Next << " as unmoveable "
|
|
|
|
|
<< "due to ambiguous relocation (0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << ") @ 0x"
|
|
|
|
|
<< Twine::utohexstr(Rel.getOffset()) << "\n";
|
|
|
|
|
});
|
|
|
|
|
Next->setIsMoveable(false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// If we are allowing section relocations, we assign relocations
|
|
|
|
|
// that are pointing to the end of a symbol to that symbol rather
|
|
|
|
|
// than the following symbol.
|
|
|
|
|
const auto IncludeEnd =
|
|
|
|
|
opts::AllowSectionRelocations && IsSectionRelocation;
|
|
|
|
|
|
|
|
|
|
if (auto *BD = BC->getBinaryDataContainingAddress(SymbolAddress,
|
|
|
|
|
IncludeEnd)) {
|
|
|
|
|
assert(!IncludeEnd ||
|
|
|
|
|
(BD == BC->getBinaryDataContainingAddress(SymbolAddress) ||
|
|
|
|
|
!BC->getBinaryDataContainingAddress(SymbolAddress) ||
|
|
|
|
|
(IsSectionRelocation && BD->getEndAddress() ==
|
2018-05-14 11:10:26 -07:00
|
|
|
BC->getBinaryDataContainingAddress(SymbolAddress)->
|
|
|
|
|
getAddress())));
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2018-03-20 14:34:58 -07:00
|
|
|
// Note: this assertion is trying to check sanity of BinaryData objects
|
|
|
|
|
// but AArch64 has inferred and incomplete object locations coming from
|
|
|
|
|
// GOT/TLS or any other non-trivial relocation (that requires creation
|
|
|
|
|
// of sections and whose symbol address is not really what should be
|
|
|
|
|
// encoded in the instruction). So we essentially disabled this check
|
|
|
|
|
// for AArch64 and live with bogus names for objects.
|
2018-04-20 20:03:31 -07:00
|
|
|
assert((IsAArch64 ||
|
|
|
|
|
IsSectionRelocation ||
|
|
|
|
|
BD->nameStartsWith(SymbolName) ||
|
|
|
|
|
BD->nameStartsWith("PG" + SymbolName) ||
|
|
|
|
|
(BD->nameStartsWith("ANONYMOUS") &&
|
|
|
|
|
(BD->getSectionName().startswith(".plt") ||
|
|
|
|
|
BD->getSectionName().endswith(".plt")))) &&
|
2018-06-14 14:27:20 -07:00
|
|
|
"BOLT symbol names of all non-section relocations must match "
|
2018-04-20 20:03:31 -07:00
|
|
|
"up with symbol names referenced in the relocation");
|
|
|
|
|
|
|
|
|
|
if (!opts::AllowSectionRelocations && IsSectionRelocation) {
|
|
|
|
|
markAmbiguousRelocations(BD);
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
ReferencedSymbol = BD->getSymbol();
|
|
|
|
|
Addend += (SymbolAddress - BD->getAddress());
|
|
|
|
|
SymbolAddress = BD->getAddress();
|
|
|
|
|
assert(Address == SymbolAddress + Addend);
|
|
|
|
|
} else {
|
|
|
|
|
auto Symbol = *Rel.getSymbol();
|
|
|
|
|
// These are mostly local data symbols but undefined symbols
|
|
|
|
|
// in relocation sections can get through here too, from .plt.
|
2018-04-20 20:03:31 -07:00
|
|
|
assert((IsAArch64 ||
|
|
|
|
|
IsSectionRelocation ||
|
|
|
|
|
BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt"))
|
|
|
|
|
&& "known symbols should not resolve to anonymous locals");
|
|
|
|
|
|
2018-03-20 14:34:58 -07:00
|
|
|
const uint64_t SymbolSize =
|
|
|
|
|
IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize();
|
|
|
|
|
const uint64_t SymbolAlignment = IsAArch64 ? 1 : Symbol.getAlignment();
|
2018-04-20 20:03:31 -07:00
|
|
|
const unsigned SymbolFlags = Symbol.getFlags();
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2018-05-14 11:10:26 -07:00
|
|
|
if (!IsSectionRelocation) {
|
2017-11-14 20:05:11 -08:00
|
|
|
std::string Name;
|
2018-03-20 14:34:58 -07:00
|
|
|
if (Symbol.getFlags() & SymbolRef::SF_Global) {
|
2017-11-14 20:05:11 -08:00
|
|
|
Name = SymbolName;
|
2018-03-20 14:34:58 -07:00
|
|
|
} else {
|
|
|
|
|
Name = uniquifyName(*BC, StringRef(SymbolName).startswith(
|
|
|
|
|
BC->AsmInfo->getPrivateGlobalPrefix())
|
|
|
|
|
? "PG" + SymbolName + "/"
|
|
|
|
|
: SymbolName + "/");
|
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
ReferencedSymbol = BC->registerNameAtAddress(Name,
|
|
|
|
|
SymbolAddress,
|
|
|
|
|
SymbolSize,
|
2018-04-20 20:03:31 -07:00
|
|
|
SymbolAlignment,
|
|
|
|
|
SymbolFlags);
|
2017-11-14 20:05:11 -08:00
|
|
|
} else {
|
|
|
|
|
ReferencedSymbol = BC->getOrCreateGlobalSymbol(SymbolAddress,
|
|
|
|
|
SymbolSize,
|
|
|
|
|
SymbolAlignment,
|
2018-04-20 20:03:31 -07:00
|
|
|
"SYMBOLat",
|
|
|
|
|
SymbolFlags);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!opts::AllowSectionRelocations && IsSectionRelocation) {
|
|
|
|
|
auto *BD = BC->getBinaryDataByName(ReferencedSymbol->getName());
|
|
|
|
|
markAmbiguousRelocations(BD);
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
auto checkMaxDataRelocations = [&]() {
|
|
|
|
|
++NumDataRelocations;
|
|
|
|
|
if (opts::MaxDataRelocations &&
|
|
|
|
|
NumDataRelocations + 1 == opts::MaxDataRelocations) {
|
|
|
|
|
dbgs() << "BOLT-DEBUG: processing ending on data relocation "
|
|
|
|
|
<< NumDataRelocations << ": ";
|
|
|
|
|
printRelocationInfo(Rel,
|
|
|
|
|
ReferencedSymbol->getName(),
|
|
|
|
|
SymbolAddress,
|
|
|
|
|
Addend,
|
|
|
|
|
ExtractedValue);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (!opts::MaxDataRelocations ||
|
|
|
|
|
NumDataRelocations < opts::MaxDataRelocations);
|
|
|
|
|
};
|
|
|
|
|
|
2018-07-12 10:13:03 -07:00
|
|
|
if (IsFromCode && IsAArch64)
|
|
|
|
|
ForceRelocation = true;
|
|
|
|
|
|
|
|
|
|
if (refersToReorderedSection(RefSection) ||
|
|
|
|
|
(opts::ForceToDataRelocations && checkMaxDataRelocations()))
|
|
|
|
|
ForceRelocation = true;
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (IsFromCode) {
|
2018-07-12 10:13:03 -07:00
|
|
|
if (ReferencedBF || ForceRelocation) {
|
2018-01-24 05:42:11 -08:00
|
|
|
ContainingBF->addRelocation(Rel.getOffset(),
|
|
|
|
|
ReferencedSymbol,
|
|
|
|
|
Rel.getType(),
|
|
|
|
|
Addend,
|
|
|
|
|
ExtractedValue);
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
2018-01-24 05:42:11 -08:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation from code to data "
|
|
|
|
|
<< ReferencedSymbol->getName() << "\n");
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2018-07-12 10:13:03 -07:00
|
|
|
} else if (IsToCode || ForceRelocation) {
|
2018-01-24 05:42:11 -08:00
|
|
|
BC->addRelocation(Rel.getOffset(),
|
|
|
|
|
ReferencedSymbol,
|
|
|
|
|
Rel.getType(),
|
|
|
|
|
Addend);
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-14 18:48:05 -07:00
|
|
|
void RewriteInstance::readDebugInfo() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName,
|
|
|
|
|
TimerGroupDesc, opts::TimeRewrite);
|
2016-03-14 18:48:05 -07:00
|
|
|
if (!opts::UpdateDebugSections)
|
|
|
|
|
return;
|
|
|
|
|
|
2016-05-27 20:19:19 -07:00
|
|
|
BC->preprocessDebugInfo(BinaryFunctions);
|
2016-03-14 18:48:05 -07:00
|
|
|
}
|
|
|
|
|
|
2017-12-13 23:12:01 -08:00
|
|
|
void RewriteInstance::processProfileData() {
|
2017-11-28 09:57:21 -08:00
|
|
|
if (DA.started()) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("aggregate", "aggregate data", TimerGroupName,
|
|
|
|
|
TimerGroupDesc, opts::TimeRewrite);
|
2017-11-28 09:57:21 -08:00
|
|
|
DA.aggregate(*BC.get(), BinaryFunctions);
|
|
|
|
|
|
2017-12-13 23:12:01 -08:00
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
auto &Function = BFI.second;
|
|
|
|
|
Function.convertBranchData();
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
if (opts::AggregateOnly) {
|
|
|
|
|
if (std::error_code EC = DA.writeAggregatedFile()) {
|
|
|
|
|
check_error(EC, "cannot create output data file");
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-12-13 23:12:01 -08:00
|
|
|
} else {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("readprofile", "read profile data", TimerGroupName,
|
|
|
|
|
TimerGroupDesc, opts::TimeRewrite);
|
2017-12-13 23:12:01 -08:00
|
|
|
|
|
|
|
|
if (!opts::BoltProfile.empty()) {
|
|
|
|
|
ProfileReader PR;
|
2018-04-09 19:10:19 -07:00
|
|
|
auto EC = PR.readProfile(opts::BoltProfile, BinaryFunctions);
|
|
|
|
|
check_error(EC, "cannot read profile");
|
2017-12-13 23:12:01 -08:00
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Preliminary match profile data to functions.
|
|
|
|
|
if (!BC->DR.getAllFuncsData().empty()) {
|
|
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
auto &Function = BFI.second;
|
|
|
|
|
if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) {
|
|
|
|
|
Function.MemData = MemData;
|
|
|
|
|
MemData->Used = true;
|
|
|
|
|
}
|
|
|
|
|
if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) {
|
|
|
|
|
Function.BranchData = FuncData;
|
|
|
|
|
Function.ExecutionCount = FuncData->ExecutionCount;
|
|
|
|
|
FuncData->Used = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-11-28 09:57:21 -08:00
|
|
|
|
|
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
auto &Function = BFI.second;
|
2017-12-13 23:12:01 -08:00
|
|
|
Function.readProfile();
|
2017-11-28 09:57:21 -08:00
|
|
|
}
|
|
|
|
|
}
|
2017-07-17 11:22:22 -07:00
|
|
|
|
2017-12-13 23:12:01 -08:00
|
|
|
if (!opts::SaveProfile.empty()) {
|
|
|
|
|
ProfileWriter PW(opts::SaveProfile);
|
2018-04-09 19:10:19 -07:00
|
|
|
PW.writeProfile(*this);
|
2017-07-17 11:22:22 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
void RewriteInstance::disassembleFunctions() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("disassembleFunctions", "disassemble functions",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2015-11-23 17:54:18 -08:00
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
BinaryFunction &Function = BFI.second;
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// If we have to relocate the code we have to disassemble all functions.
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!BC->HasRelocations && !opts::shouldProcess(Function)) {
|
2016-03-03 10:13:11 -08:00
|
|
|
DEBUG(dbgs() << "BOLT: skipping processing function "
|
2016-08-07 12:35:23 -07:00
|
|
|
<< Function << " per user request.\n");
|
2015-11-23 17:54:18 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
auto FunctionData = BC->getFunctionData(Function);
|
|
|
|
|
if (!FunctionData) {
|
2015-12-17 12:59:15 -08:00
|
|
|
// When could it happen?
|
2016-09-27 19:09:38 -07:00
|
|
|
errs() << "BOLT-ERROR: corresponding section is non-executable or "
|
|
|
|
|
<< "empty for function " << Function << '\n';
|
2015-11-23 17:54:18 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-15 15:47:10 -07:00
|
|
|
// Treat zero-sized functions as non-simple ones.
|
|
|
|
|
if (Function.getSize() == 0) {
|
|
|
|
|
Function.setSimple(false);
|
|
|
|
|
continue;
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Offset of the function in the file.
|
2017-11-28 09:57:21 -08:00
|
|
|
const auto *FileBegin =
|
2017-10-20 12:11:34 -07:00
|
|
|
reinterpret_cast<const uint8_t*>(InputFile->getData().data());
|
|
|
|
|
Function.setFileOffset(FunctionData->begin() - FileBegin);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
Function.disassemble(*FunctionData);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!Function.isSimple() && BC->HasRelocations) {
|
2018-06-20 12:03:24 -07:00
|
|
|
BC->exitWithBugReport("function cannot be properly disassembled. "
|
|
|
|
|
"Unable to continue in relocation mode.",
|
|
|
|
|
Function);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
if (opts::PrintAll || opts::PrintDisasm)
|
2016-09-02 14:15:29 -07:00
|
|
|
Function.print(outs(), "after disassembly", true);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Post-process inter-procedural references ASAP as it may affect
|
|
|
|
|
// functions we are about to disassemble next.
|
2017-02-21 14:18:09 -08:00
|
|
|
for (const auto Addr : BC->InterproceduralReferences) {
|
2016-09-27 19:09:38 -07:00
|
|
|
auto *ContainingFunction = getBinaryFunctionContainingAddress(Addr);
|
|
|
|
|
if (ContainingFunction && ContainingFunction->getAddress() != Addr) {
|
|
|
|
|
ContainingFunction->addEntryPoint(Addr);
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!BC->HasRelocations) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: Function " << *ContainingFunction
|
|
|
|
|
<< " has internal BBs that are target of a reference located"
|
|
|
|
|
<< " in another function. Skipping the function.\n";
|
|
|
|
|
}
|
|
|
|
|
ContainingFunction->setSimple(false);
|
|
|
|
|
}
|
2017-02-21 14:18:09 -08:00
|
|
|
} else if (!ContainingFunction && Addr) {
|
|
|
|
|
// Check if address falls in function padding space - this could be
|
|
|
|
|
// unmarked data in code. In this case adjust the padding space size.
|
|
|
|
|
auto Section = BC->getSectionForAddress(Addr);
|
|
|
|
|
assert(Section && "cannot get section for referenced address");
|
|
|
|
|
|
|
|
|
|
if (!Section->isText())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// PLT requires special handling and could be ignored in this context.
|
2018-01-23 15:10:24 -08:00
|
|
|
StringRef SectionName = Section->getName();
|
2017-06-02 18:41:31 -07:00
|
|
|
if (SectionName == ".plt" || SectionName == ".plt.got")
|
2017-02-21 14:18:09 -08:00
|
|
|
continue;
|
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations) {
|
2017-02-21 14:18:09 -08:00
|
|
|
errs() << "BOLT-ERROR: cannot process binaries with unmarked "
|
|
|
|
|
<< "object in code at address 0x"
|
|
|
|
|
<< Twine::utohexstr(Addr) << " belonging to section "
|
|
|
|
|
<< SectionName << " in relocation mode.\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ContainingFunction =
|
|
|
|
|
getBinaryFunctionContainingAddress(Addr,
|
|
|
|
|
/*CheckPastEnd=*/false,
|
|
|
|
|
/*UseMaxSize=*/true);
|
2017-03-07 14:22:15 -08:00
|
|
|
// We are not going to overwrite non-simple functions, but for simple
|
|
|
|
|
// ones - adjust the padding size.
|
|
|
|
|
if (ContainingFunction && ContainingFunction->isSimple()) {
|
2017-02-21 14:18:09 -08:00
|
|
|
errs() << "BOLT-WARNING: function " << *ContainingFunction
|
|
|
|
|
<< " has an object detected in a padding region at address 0x"
|
|
|
|
|
<< Twine::utohexstr(Addr) << '\n';
|
2017-10-16 11:12:22 -07:00
|
|
|
ContainingFunction->setMaxSize(Addr -
|
|
|
|
|
ContainingFunction->getAddress());
|
2017-02-21 14:18:09 -08:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
BC->InterproceduralReferences.clear();
|
2018-02-14 12:06:17 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
BinaryFunction &Function = BFI.second;
|
|
|
|
|
|
|
|
|
|
if (!BC->HasRelocations && !opts::shouldProcess(Function)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT: skipping processing function "
|
|
|
|
|
<< Function << " per user request.\n");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!Function.isSimple()) {
|
|
|
|
|
assert((!BC->HasRelocations || Function.getSize() == 0) &&
|
|
|
|
|
"unexpected non-simple function in relocation mode");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
// Fill in CFI information for this function
|
2018-02-14 12:06:17 -08:00
|
|
|
if (!Function.trapsOnEntry()) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!CFIRdWrt->fillCFIInfoFor(Function)) {
|
2018-02-14 12:06:17 -08:00
|
|
|
if (BC->HasRelocations) {
|
2018-06-20 12:03:24 -07:00
|
|
|
BC->exitWithBugReport("unable to fill CFI.", Function);
|
2018-02-14 12:06:17 -08:00
|
|
|
} else {
|
|
|
|
|
errs() << "BOLT-WARNING: unable to fill CFI for function "
|
|
|
|
|
<< Function << ". Skipping.\n";
|
|
|
|
|
Function.setSimple(false);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2016-02-22 18:25:43 -08:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Parse LSDA.
|
2018-02-14 12:06:17 -08:00
|
|
|
if (Function.getLSDAAddress() != 0)
|
2018-04-20 20:03:31 -07:00
|
|
|
Function.parseLSDA(getLSDAData(), getLSDAAddress());
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
if (!Function.buildCFG())
|
|
|
|
|
continue;
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
if (opts::PrintAll)
|
|
|
|
|
Function.print(outs(), "while building cfg", true);
|
|
|
|
|
|
|
|
|
|
} // Iterate over all functions
|
2018-06-06 03:17:32 -07:00
|
|
|
|
|
|
|
|
BC->postProcessSymbolTable();
|
2017-11-28 09:57:21 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::postProcessFunctions() {
|
|
|
|
|
BC->TotalScore = 0;
|
|
|
|
|
BC->SumExecutionCount = 0;
|
|
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
BinaryFunction &Function = BFI.second;
|
|
|
|
|
|
|
|
|
|
if (Function.empty())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
Function.postProcessCFG();
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
if (opts::PrintAll || opts::PrintCFG)
|
2016-09-02 14:15:29 -07:00
|
|
|
Function.print(outs(), "after building cfg", true);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-07-01 08:40:56 -07:00
|
|
|
if (opts::DumpDotAll)
|
|
|
|
|
Function.dumpGraphForPass("build-cfg");
|
|
|
|
|
|
2016-05-26 10:58:01 -07:00
|
|
|
if (opts::PrintLoopInfo) {
|
|
|
|
|
Function.calculateLoopInfo();
|
2016-09-02 14:15:29 -07:00
|
|
|
Function.printLoopInfo(outs());
|
2016-05-26 10:58:01 -07:00
|
|
|
}
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
BC->TotalScore += Function.getFunctionScore();
|
2017-05-01 16:52:54 -07:00
|
|
|
BC->SumExecutionCount += Function.getKnownExecutionCount();
|
2016-01-16 14:58:22 -08:00
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
|
|
|
|
|
if (opts::PrintGlobals) {
|
|
|
|
|
outs() << "BOLT-INFO: Global symbols:\n";
|
|
|
|
|
BC->printGlobalSymbols(outs());
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::runOptimizationPasses() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("runOptimizationPasses", "run optimization passes",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2017-08-10 13:18:44 -07:00
|
|
|
BinaryFunctionPassManager::runAllPasses(*BC, BinaryFunctions, LargeFunctions);
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Helper function to emit the contents of a function via a MCStreamer object.
|
2017-12-09 21:40:39 -08:00
|
|
|
void RewriteInstance::emitFunction(MCStreamer &Streamer,
|
|
|
|
|
BinaryFunction &Function,
|
2017-05-24 18:40:29 -07:00
|
|
|
bool EmitColdPart) {
|
2018-07-08 12:14:08 -07:00
|
|
|
if (Function.size() == 0)
|
2016-09-27 19:09:38 -07:00
|
|
|
return;
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
if (Function.getState() == BinaryFunction::State::Empty)
|
2016-09-27 19:09:38 -07:00
|
|
|
return;
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
MCSection *Section;
|
2018-07-08 12:14:08 -07:00
|
|
|
if (BC->HasRelocations || Function.isInjected()) {
|
2017-05-24 18:40:29 -07:00
|
|
|
Section = BC->MOFI->getTextSection();
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
|
|
|
|
// Each fuction is emmitted into its own section.
|
2017-01-17 15:49:59 -08:00
|
|
|
Section =
|
2017-05-24 18:40:29 -07:00
|
|
|
BC->Ctx->getELFSection(EmitColdPart ? Function.getColdCodeSectionName()
|
|
|
|
|
: Function.getCodeSectionName(),
|
2018-07-08 12:14:08 -07:00
|
|
|
ELF::SHT_PROGBITS,
|
|
|
|
|
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2016-03-02 18:40:10 -08:00
|
|
|
|
|
|
|
|
Section->setHasInstructions(true);
|
2017-05-16 09:27:34 -07:00
|
|
|
|
2017-05-24 18:40:29 -07:00
|
|
|
BC->Ctx->addGenDwarfSection(Section);
|
2016-03-02 18:40:10 -08:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
Streamer.SwitchSection(Section);
|
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations) {
|
2017-05-24 21:59:01 -07:00
|
|
|
Streamer.EmitCodeAlignment(BinaryFunction::MinAlign);
|
2017-10-27 15:05:31 -07:00
|
|
|
auto MaxAlignBytes = EmitColdPart
|
|
|
|
|
? Function.getMaxColdAlignmentBytes()
|
|
|
|
|
: Function.getMaxAlignmentBytes();
|
|
|
|
|
if (MaxAlignBytes > 0)
|
|
|
|
|
Streamer.EmitCodeAlignment(Function.getAlignment(), MaxAlignBytes);
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
|
|
|
|
Streamer.EmitCodeAlignment(Function.getAlignment());
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-17 14:56:42 -08:00
|
|
|
MCContext &Context = Streamer.getContext();
|
|
|
|
|
const MCAsmInfo *MAI = Context.getAsmInfo();
|
|
|
|
|
|
2016-08-07 12:35:23 -07:00
|
|
|
// Emit all names the function is known under.
|
2016-12-21 17:13:56 -08:00
|
|
|
for (const auto &Name : Function.getNames()) {
|
|
|
|
|
Twine EmitName = EmitColdPart ? Twine(Name).concat(".cold") : Name;
|
2017-05-24 18:40:29 -07:00
|
|
|
auto *EmitSymbol = BC->Ctx->getOrCreateSymbol(EmitName);
|
2016-12-21 17:13:56 -08:00
|
|
|
Streamer.EmitSymbolAttribute(EmitSymbol, MCSA_ELF_TypeFunction);
|
|
|
|
|
DEBUG(dbgs() << "emitting symbol " << EmitSymbol->getName()
|
|
|
|
|
<< " for function " << Function << '\n');
|
|
|
|
|
Streamer.EmitLabel(EmitSymbol);
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Emit CFI start
|
2017-12-09 21:40:39 -08:00
|
|
|
if (Function.hasCFI() && (BC->HasRelocations || Function.isSimple())) {
|
2015-11-23 17:54:18 -08:00
|
|
|
Streamer.EmitCFIStartProc(/*IsSimple=*/false);
|
|
|
|
|
if (Function.getPersonalityFunction() != nullptr) {
|
|
|
|
|
Streamer.EmitCFIPersonality(Function.getPersonalityFunction(),
|
|
|
|
|
Function.getPersonalityEncoding());
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
auto *LSDASymbol = EmitColdPart ? Function.getColdLSDASymbol()
|
|
|
|
|
: Function.getLSDASymbol();
|
|
|
|
|
if (LSDASymbol) {
|
2017-05-24 18:40:29 -07:00
|
|
|
Streamer.EmitCFILsda(LSDASymbol, BC->MOFI->getLSDAEncoding());
|
2015-12-18 17:00:46 -08:00
|
|
|
} else {
|
|
|
|
|
Streamer.EmitCFILsda(0, dwarf::DW_EH_PE_omit);
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
// Emit CFI instructions relative to the CIE
|
2016-11-17 14:56:42 -08:00
|
|
|
for (const auto &CFIInstr : Function.cie()) {
|
|
|
|
|
// Only write CIE CFI insns that LLVM will not already emit
|
|
|
|
|
const std::vector<MCCFIInstruction> &FrameInstrs =
|
|
|
|
|
MAI->getInitialFrameState();
|
|
|
|
|
if (std::find(FrameInstrs.begin(), FrameInstrs.end(), CFIInstr) ==
|
|
|
|
|
FrameInstrs.end())
|
|
|
|
|
Streamer.EmitCFIInstruction(CFIInstr);
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
assert((Function.empty() || !(*Function.begin()).isCold()) &&
|
2016-01-26 16:03:58 -08:00
|
|
|
"first basic block should never be cold");
|
|
|
|
|
|
2016-04-21 09:54:33 -07:00
|
|
|
// Emit UD2 at the beginning if requested by user.
|
|
|
|
|
if (!opts::BreakFunctionNames.empty()) {
|
|
|
|
|
for (auto &Name : opts::BreakFunctionNames) {
|
2016-06-10 17:13:05 -07:00
|
|
|
if (Function.hasName(Name)) {
|
2016-04-21 09:54:33 -07:00
|
|
|
Streamer.EmitIntValue(0x0B0F, 2); // UD2: 0F 0B
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
// Emit code.
|
2016-09-27 19:09:38 -07:00
|
|
|
Function.emitBody(Streamer, EmitColdPart);
|
2016-04-19 22:00:29 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Emit padding if requested.
|
|
|
|
|
if (auto Padding = opts::padFunction(Function)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: padding function " << Function << " with "
|
|
|
|
|
<< Padding << " bytes\n");
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Streamer.emitFill(Padding, MAI->getTextAlignFillValue());
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2016-03-28 17:45:22 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::MarkFuncs) {
|
|
|
|
|
Streamer.EmitIntValue(MAI->getTrapFillValue(), 1);
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Emit CFI end
|
2017-12-09 21:40:39 -08:00
|
|
|
if (Function.hasCFI() && (BC->HasRelocations || Function.isSimple()))
|
2015-11-23 17:54:18 -08:00
|
|
|
Streamer.EmitCFIEndProc();
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
Streamer.EmitLabel(EmitColdPart ? Function.getFunctionColdEndLabel()
|
|
|
|
|
: Function.getFunctionEndLabel());
|
2016-01-22 16:45:39 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Exception handling info for the function.
|
|
|
|
|
Function.emitLSDA(&Streamer, EmitColdPart);
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2017-01-17 15:49:59 -08:00
|
|
|
if (!EmitColdPart && opts::JumpTables > JTS_NONE)
|
2016-09-27 19:09:38 -07:00
|
|
|
Function.emitJumpTables(&Streamer);
|
2017-05-08 22:51:36 -07:00
|
|
|
|
|
|
|
|
Function.setEmitted();
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2017-05-24 18:40:29 -07:00
|
|
|
namespace {
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
template <typename T>
|
|
|
|
|
std::vector<T> singletonSet(T t) {
|
|
|
|
|
std::vector<T> Vec;
|
|
|
|
|
Vec.push_back(std::move(t));
|
|
|
|
|
return Vec;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::emitFunctions() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("emitFunctions", "emit functions", TimerGroupName,
|
|
|
|
|
TimerGroupDesc, opts::TimeRewrite);
|
2015-11-23 17:54:18 -08:00
|
|
|
std::error_code EC;
|
|
|
|
|
|
|
|
|
|
// This is an object file, which we keep for debugging purposes.
|
|
|
|
|
// Once we decide it's useless, we should create it in memory.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
std::unique_ptr<ToolOutputFile> TempOut =
|
|
|
|
|
llvm::make_unique<ToolOutputFile>(opts::OutputFilename + ".bolt.o",
|
|
|
|
|
EC, sys::fs::F_None);
|
2015-11-23 17:54:18 -08:00
|
|
|
check_error(EC, "cannot create output object file");
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<buffer_ostream> BOS =
|
|
|
|
|
make_unique<buffer_ostream>(TempOut->os());
|
|
|
|
|
raw_pwrite_stream *OS = BOS.get();
|
|
|
|
|
|
|
|
|
|
// Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB)
|
|
|
|
|
// and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these
|
|
|
|
|
// two instances.
|
|
|
|
|
auto MCE = BC->TheTarget->createMCCodeEmitter(*BC->MII, *BC->MRI, *BC->Ctx);
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto MAB =
|
|
|
|
|
BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions());
|
|
|
|
|
std::unique_ptr<MCStreamer> Streamer(BC->TheTarget->createMCObjectStreamer(
|
|
|
|
|
*BC->TheTriple, *BC->Ctx, std::unique_ptr<MCAsmBackend>(MAB), *OS,
|
|
|
|
|
std::unique_ptr<MCCodeEmitter>(MCE), *BC->STI,
|
|
|
|
|
/* RelaxAll */ false,
|
|
|
|
|
/* IncrementalLinkerCompatible */ false,
|
|
|
|
|
/* DWARFMustBeAtTheEnd */ false));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
Streamer->InitSections(false);
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Mark beginning of "hot text".
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations && opts::HotText)
|
2016-09-27 19:09:38 -07:00
|
|
|
Streamer->EmitLabel(BC->Ctx->getOrCreateSymbol("__hot_start"));
|
|
|
|
|
|
|
|
|
|
// Sort functions for the output.
|
2017-08-31 11:45:37 -07:00
|
|
|
std::vector<BinaryFunction *> SortedFunctions =
|
|
|
|
|
BinaryContext::getSortedFunctions(BinaryFunctions);
|
2017-03-03 11:35:41 -08:00
|
|
|
|
|
|
|
|
DEBUG(
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!BC->HasRelocations) {
|
2017-03-03 11:35:41 -08:00
|
|
|
auto SortedIt = SortedFunctions.begin();
|
|
|
|
|
for (auto &It : BinaryFunctions) {
|
|
|
|
|
assert(&It.second == *SortedIt);
|
|
|
|
|
++SortedIt;
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
uint32_t LastHotIndex = -1u;
|
|
|
|
|
uint32_t CurrentIndex = 0;
|
|
|
|
|
for (auto *BF : SortedFunctions) {
|
|
|
|
|
if (!BF->hasValidIndex() && LastHotIndex == -1u) {
|
|
|
|
|
LastHotIndex = CurrentIndex;
|
|
|
|
|
}
|
|
|
|
|
assert(LastHotIndex == -1u || !BF->hasValidIndex());
|
|
|
|
|
assert(!BF->hasValidIndex() || CurrentIndex == BF->getIndex());
|
|
|
|
|
++CurrentIndex;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2017-03-03 11:35:41 -08:00
|
|
|
CurrentIndex = 0;
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: LastHotIndex = " << LastHotIndex << "\n");
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
bool ColdFunctionSeen = false;
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
// Output functions one by one.
|
2016-09-27 19:09:38 -07:00
|
|
|
for (auto *FunctionPtr : SortedFunctions) {
|
|
|
|
|
auto &Function = *FunctionPtr;
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Emit all cold function split parts at the border of hot and
|
|
|
|
|
// cold functions.
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations && !ColdFunctionSeen &&
|
|
|
|
|
CurrentIndex >= LastHotIndex) {
|
2016-09-27 19:09:38 -07:00
|
|
|
// Mark the end of "hot" stuff.
|
|
|
|
|
if (opts::HotText) {
|
|
|
|
|
Streamer->SwitchSection(BC->MOFI->getTextSection());
|
|
|
|
|
Streamer->EmitLabel(BC->Ctx->getOrCreateSymbol("__hot_end"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ColdFunctionSeen = true;
|
2018-07-08 12:14:08 -07:00
|
|
|
|
|
|
|
|
// Emit injected functions hot part
|
|
|
|
|
for (auto *InjectedFunction : BC->getInjectedBinaryFunctions())
|
|
|
|
|
emitFunction(*Streamer, *InjectedFunction, /*EmitColdPart=*/false);
|
|
|
|
|
|
|
|
|
|
// Emit injected functions cold part
|
|
|
|
|
for (auto *InjectedFunction : BC->getInjectedBinaryFunctions())
|
|
|
|
|
emitFunction(*Streamer, *InjectedFunction, /*EmitColdPart=*/true);
|
|
|
|
|
|
|
|
|
|
//TODO: this code is unreachable if all functions are hot
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::SplitFunctions != BinaryFunction::ST_NONE) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: generating code for split functions\n");
|
|
|
|
|
for (auto *FPtr : SortedFunctions) {
|
|
|
|
|
if (!FPtr->isSplit() || !FPtr->isSimple())
|
|
|
|
|
continue;
|
2017-05-24 18:40:29 -07:00
|
|
|
emitFunction(*Streamer, *FPtr, /*EmitColdPart=*/true);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: first cold function: " << Function << '\n');
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!BC->HasRelocations &&
|
2016-09-27 19:09:38 -07:00
|
|
|
(!Function.isSimple() || !opts::shouldProcess(Function))) {
|
2017-03-03 11:35:41 -08:00
|
|
|
++CurrentIndex;
|
2015-11-23 17:54:18 -08:00
|
|
|
continue;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-02-08 10:02:48 -08:00
|
|
|
DEBUG(dbgs() << "BOLT: generating code for function \""
|
2016-08-07 12:35:23 -07:00
|
|
|
<< Function << "\" : "
|
2016-02-08 10:02:48 -08:00
|
|
|
<< Function.getFunctionNumber() << '\n');
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2017-05-24 18:40:29 -07:00
|
|
|
emitFunction(*Streamer, Function, /*EmitColdPart=*/false);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!BC->HasRelocations && Function.isSplit())
|
2017-05-24 18:40:29 -07:00
|
|
|
emitFunction(*Streamer, Function, /*EmitColdPart=*/true);
|
2017-03-03 11:35:41 -08:00
|
|
|
|
|
|
|
|
++CurrentIndex;
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2018-07-08 12:14:08 -07:00
|
|
|
// Emit injected functions in non-reloc mode
|
|
|
|
|
if (!BC->HasRelocations) {
|
|
|
|
|
for (auto *InjectedFunction : BC->getInjectedBinaryFunctions()){
|
|
|
|
|
emitFunction(*Streamer, *InjectedFunction, /*EmitColdPart=*/false);
|
|
|
|
|
emitFunction(*Streamer, *InjectedFunction, /*EmitColdPart=*/true);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!ColdFunctionSeen && opts::HotText) {
|
|
|
|
|
Streamer->SwitchSection(BC->MOFI->getTextSection());
|
|
|
|
|
Streamer->EmitLabel(BC->Ctx->getOrCreateSymbol("__hot_end"));
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!BC->HasRelocations && opts::UpdateDebugSections)
|
2016-05-31 19:12:26 -07:00
|
|
|
updateDebugLineInfoForNonSimpleFunctions();
|
2016-04-05 19:35:45 -07:00
|
|
|
|
2017-04-05 09:29:24 -07:00
|
|
|
emitDataSections(Streamer.get());
|
|
|
|
|
|
2016-11-11 14:33:34 -08:00
|
|
|
// Relocate .eh_frame to .eh_frame_old.
|
2018-01-23 15:10:24 -08:00
|
|
|
if (EHFrameSection) {
|
2016-11-11 14:33:34 -08:00
|
|
|
relocateEHFrameSection();
|
2018-01-23 15:10:24 -08:00
|
|
|
emitDataSection(Streamer.get(), *EHFrameSection, ".eh_frame_old");
|
2016-11-11 14:33:34 -08:00
|
|
|
}
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
// Update _end if needed.
|
|
|
|
|
if (opts::UpdateEnd) {
|
|
|
|
|
Streamer->EmitLabel(BC->Ctx->getOrCreateSymbol("_end"));
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
Streamer->Finish();
|
|
|
|
|
|
2016-02-08 10:02:48 -08:00
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
2017-05-08 22:51:36 -07:00
|
|
|
// Assign addresses to new sections.
|
2016-02-08 10:02:48 -08:00
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
2016-03-02 18:40:10 -08:00
|
|
|
if (opts::UpdateDebugSections) {
|
|
|
|
|
// Compute offsets of tables in .debug_line for each compile unit.
|
2016-05-27 20:19:19 -07:00
|
|
|
updateLineTableOffsets();
|
2016-03-02 18:40:10 -08:00
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
// Get output object as ObjectFile.
|
|
|
|
|
std::unique_ptr<MemoryBuffer> ObjectMemBuffer =
|
|
|
|
|
MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false);
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
std::unique_ptr<object::ObjectFile> Obj = cantFail(
|
|
|
|
|
object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()),
|
|
|
|
|
"error creating in-memory object");
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2018-03-30 15:49:34 -07:00
|
|
|
auto Resolver = orc::createLegacyLookupResolver(
|
|
|
|
|
[&](const std::string &Name) -> JITSymbol {
|
|
|
|
|
DEBUG(dbgs() << "BOLT: looking for " << Name << "\n");
|
2018-04-20 20:03:31 -07:00
|
|
|
if (auto *I = BC->getBinaryDataByName(Name)) {
|
|
|
|
|
const uint64_t Address = I->isMoved() && !I->isJumpTable()
|
|
|
|
|
? I->getOutputAddress()
|
|
|
|
|
: I->getAddress();
|
|
|
|
|
return JITSymbol(Address, JITSymbolFlags());
|
|
|
|
|
}
|
2018-03-30 15:49:34 -07:00
|
|
|
return JITSymbol(nullptr);
|
|
|
|
|
},
|
|
|
|
|
[](Error Err) { cantFail(std::move(Err), "lookup failed"); });
|
2016-09-27 19:09:38 -07:00
|
|
|
Resolver->setAllowsZeroSymbols(true);
|
|
|
|
|
|
2017-05-08 22:51:36 -07:00
|
|
|
MCAsmLayout FinalLayout(
|
2016-09-27 19:09:38 -07:00
|
|
|
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2018-03-30 15:49:34 -07:00
|
|
|
SSP.reset(new decltype(SSP)::element_type());
|
|
|
|
|
ES.reset(new decltype(ES)::element_type(*SSP));
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
OLT.reset(new decltype(OLT)::element_type(
|
2018-03-30 15:49:34 -07:00
|
|
|
*ES,
|
|
|
|
|
[this, &Resolver](orc::VModuleKey Key) {
|
|
|
|
|
orc::RTDyldObjectLinkingLayer::Resources R;
|
|
|
|
|
R.MemMgr = EFMM;
|
|
|
|
|
R.Resolver = Resolver;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
// Get memory manager
|
2018-03-30 15:49:34 -07:00
|
|
|
return R;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
},
|
2018-03-14 15:07:16 -07:00
|
|
|
// Loaded notifier
|
2018-03-30 15:49:34 -07:00
|
|
|
[&](orc::VModuleKey Key, const object::ObjectFile &Obj,
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const RuntimeDyld::LoadedObjectInfo &) {
|
|
|
|
|
// Assign addresses to all sections.
|
2018-03-30 15:49:34 -07:00
|
|
|
mapFileSections(Key);
|
2018-03-14 15:07:16 -07:00
|
|
|
},
|
|
|
|
|
// Finalized notifier
|
2018-03-30 15:49:34 -07:00
|
|
|
[&](orc::VModuleKey Key) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
// Update output addresses based on the new section map and
|
|
|
|
|
// layout.
|
|
|
|
|
updateOutputValues(FinalLayout);
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
OLT->setProcessAllSections(true);
|
2018-03-30 15:49:34 -07:00
|
|
|
auto K = ES->allocateVModule();
|
|
|
|
|
cantFail(OLT->addObject(K, std::move(ObjectMemBuffer)));
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
|
2018-03-30 15:49:34 -07:00
|
|
|
cantFail(OLT->emitAndFinalize(K));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-10-16 16:53:50 -07:00
|
|
|
if (opts::PrintCacheMetrics) {
|
2017-11-14 16:51:24 -08:00
|
|
|
outs() << "BOLT-INFO: cache metrics after emitting functions:\n";
|
2017-10-16 16:53:50 -07:00
|
|
|
CacheMetrics::printAll(SortedFunctions);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::KeepTmp)
|
|
|
|
|
TempOut->keep();
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2018-03-30 15:49:34 -07:00
|
|
|
void RewriteInstance::mapFileSections(orc::VModuleKey Key) {
|
2018-04-20 20:03:31 -07:00
|
|
|
mapTextSections(Key);
|
|
|
|
|
mapDataSections(Key);
|
|
|
|
|
}
|
2018-06-20 12:03:24 -07:00
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
void RewriteInstance::mapTextSections(orc::VModuleKey Key) {
|
2016-09-27 19:09:38 -07:00
|
|
|
NewTextSectionStartAddress = NextAvailableAddress;
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations) {
|
2018-02-01 16:33:43 -08:00
|
|
|
auto TextSection = BC->getUniqueSectionByName(".text");
|
|
|
|
|
assert(TextSection && ".text not found in output");
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
uint64_t NewTextSectionOffset = 0;
|
2018-02-01 16:33:43 -08:00
|
|
|
if (opts::UseOldText &&
|
|
|
|
|
TextSection->getOutputSize() <= BC->OldTextSectionSize) {
|
2016-09-27 19:09:38 -07:00
|
|
|
outs() << "BOLT-INFO: using original .text for new code\n";
|
|
|
|
|
// Utilize the original .text for storage.
|
2017-09-20 10:43:01 -07:00
|
|
|
NewTextSectionStartAddress = BC->OldTextSectionAddress;
|
|
|
|
|
NewTextSectionOffset = BC->OldTextSectionOffset;
|
2016-09-27 19:09:38 -07:00
|
|
|
auto Padding = OffsetToAlignment(NewTextSectionStartAddress, PageAlign);
|
2018-02-01 16:33:43 -08:00
|
|
|
if (Padding + TextSection->getOutputSize() <= BC->OldTextSectionSize) {
|
2016-09-27 19:09:38 -07:00
|
|
|
outs() << "BOLT-INFO: using 0x200000 alignment\n";
|
|
|
|
|
NewTextSectionStartAddress += Padding;
|
|
|
|
|
NewTextSectionOffset += Padding;
|
|
|
|
|
}
|
2016-11-09 11:19:02 -08:00
|
|
|
} else {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::UseOldText) {
|
|
|
|
|
errs() << "BOLT-ERROR: original .text too small to fit the new code. "
|
2018-02-01 16:33:43 -08:00
|
|
|
<< TextSection->getOutputSize() << " bytes needed, have "
|
|
|
|
|
<< BC->OldTextSectionSize << " bytes available.\n";
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
auto Padding = OffsetToAlignment(NewTextSectionStartAddress, PageAlign);
|
|
|
|
|
NextAvailableAddress += Padding;
|
|
|
|
|
NewTextSectionStartAddress = NextAvailableAddress;
|
2017-01-17 15:49:59 -08:00
|
|
|
NewTextSectionOffset = getFileOffsetForAddress(NextAvailableAddress);
|
2018-02-01 16:33:43 -08:00
|
|
|
NextAvailableAddress += Padding + TextSection->getOutputSize();
|
2016-11-09 11:19:02 -08:00
|
|
|
}
|
2018-02-01 16:33:43 -08:00
|
|
|
TextSection->setFileAddress(NewTextSectionStartAddress);
|
|
|
|
|
TextSection->setFileOffset(NewTextSectionOffset);
|
2016-11-09 11:19:02 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
DEBUG(dbgs() << "BOLT: mapping .text 0x"
|
2018-02-01 16:33:43 -08:00
|
|
|
<< Twine::utohexstr(TextSection->getAllocAddress())
|
2016-09-27 19:09:38 -07:00
|
|
|
<< " to 0x" << Twine::utohexstr(NewTextSectionStartAddress)
|
|
|
|
|
<< '\n');
|
2018-03-30 15:49:34 -07:00
|
|
|
OLT->mapSectionAddress(Key, TextSection->getSectionID(),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NewTextSectionStartAddress);
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
2018-07-08 12:14:08 -07:00
|
|
|
|
|
|
|
|
// Prepare .text section for injected functions
|
|
|
|
|
auto TextSection = BC->getUniqueSectionByName(".text");
|
|
|
|
|
assert(TextSection && ".text not found in output");
|
|
|
|
|
if (TextSection->hasValidSectionID()) {
|
|
|
|
|
uint64_t NewTextSectionOffset = 0;
|
|
|
|
|
auto Padding = OffsetToAlignment(NewTextSectionStartAddress, PageAlign);
|
|
|
|
|
NextAvailableAddress += Padding;
|
|
|
|
|
NewTextSectionStartAddress = NextAvailableAddress;
|
|
|
|
|
NewTextSectionOffset = getFileOffsetForAddress(NextAvailableAddress);
|
|
|
|
|
NextAvailableAddress += Padding + TextSection->getOutputSize();
|
|
|
|
|
TextSection->setFileAddress(NewTextSectionStartAddress);
|
|
|
|
|
TextSection->setFileOffset(NewTextSectionOffset);
|
|
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "BOLT: mapping .text 0x"
|
|
|
|
|
<< Twine::utohexstr(TextSection->getAllocAddress())
|
|
|
|
|
<< " to 0x" << Twine::utohexstr(NewTextSectionStartAddress)
|
|
|
|
|
<< '\n');
|
|
|
|
|
OLT->mapSectionAddress(Key, TextSection->getSectionID(),
|
|
|
|
|
NewTextSectionStartAddress);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
auto &Function = BFI.second;
|
|
|
|
|
if (!Function.isSimple() || !opts::shouldProcess(Function))
|
|
|
|
|
continue;
|
2016-10-07 09:34:16 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
auto TooLarge = false;
|
2018-05-14 11:10:26 -07:00
|
|
|
auto FuncSection =
|
2018-07-08 12:14:08 -07:00
|
|
|
BC->getUniqueSectionByName(Function.getCodeSectionName());
|
2018-02-01 16:33:43 -08:00
|
|
|
assert(FuncSection && "cannot find section for function");
|
2016-09-27 19:09:38 -07:00
|
|
|
DEBUG(dbgs() << "BOLT: mapping 0x"
|
2018-02-01 16:33:43 -08:00
|
|
|
<< Twine::utohexstr(FuncSection->getAllocAddress())
|
2016-09-27 19:09:38 -07:00
|
|
|
<< " to 0x" << Twine::utohexstr(Function.getAddress())
|
|
|
|
|
<< '\n');
|
2018-03-30 15:49:34 -07:00
|
|
|
OLT->mapSectionAddress(Key, FuncSection->getSectionID(),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Function.getAddress());
|
2018-02-01 16:33:43 -08:00
|
|
|
Function.setImageAddress(FuncSection->getAllocAddress());
|
|
|
|
|
Function.setImageSize(FuncSection->getOutputSize());
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Function.getImageSize() > Function.getMaxSize()) {
|
|
|
|
|
TooLarge = true;
|
|
|
|
|
FailedAddresses.emplace_back(Function.getAddress());
|
|
|
|
|
}
|
2016-03-11 11:30:30 -08:00
|
|
|
|
2017-01-17 15:49:59 -08:00
|
|
|
// Map jump tables if updating in-place.
|
|
|
|
|
if (opts::JumpTables == JTS_BASIC) {
|
|
|
|
|
for (auto &JTI : Function.JumpTables) {
|
2017-11-14 20:05:11 -08:00
|
|
|
auto *JT = JTI.second;
|
2018-04-20 20:03:31 -07:00
|
|
|
auto &Section = JT->getOutputSection();
|
|
|
|
|
Section.setFileAddress(JT->getAddress());
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: mapping " << Section.getName()
|
2017-11-14 20:05:11 -08:00
|
|
|
<< " to 0x" << Twine::utohexstr(JT->getAddress())
|
|
|
|
|
<< '\n');
|
2018-04-20 20:03:31 -07:00
|
|
|
OLT->mapSectionAddress(Key, Section.getSectionID(),
|
2017-11-14 20:05:11 -08:00
|
|
|
JT->getAddress());
|
2017-01-17 15:49:59 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!Function.isSplit())
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
auto ColdSection =
|
|
|
|
|
BC->getUniqueSectionByName(Function.getColdCodeSectionName());
|
|
|
|
|
assert(ColdSection && "cannot find section for cold part");
|
2016-09-27 19:09:38 -07:00
|
|
|
// Cold fragments are aligned at 16 bytes.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NextAvailableAddress = alignTo(NextAvailableAddress, 16);
|
2017-01-17 15:49:59 -08:00
|
|
|
auto &ColdPart = Function.cold();
|
2016-09-27 19:09:38 -07:00
|
|
|
if (TooLarge) {
|
|
|
|
|
// The corresponding FDE will refer to address 0.
|
2017-01-17 15:49:59 -08:00
|
|
|
ColdPart.setAddress(0);
|
|
|
|
|
ColdPart.setImageAddress(0);
|
|
|
|
|
ColdPart.setImageSize(0);
|
|
|
|
|
ColdPart.setFileOffset(0);
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
2017-01-17 15:49:59 -08:00
|
|
|
ColdPart.setAddress(NextAvailableAddress);
|
2018-02-01 16:33:43 -08:00
|
|
|
ColdPart.setImageAddress(ColdSection->getAllocAddress());
|
|
|
|
|
ColdPart.setImageSize(ColdSection->getOutputSize());
|
2017-01-17 15:49:59 -08:00
|
|
|
ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress));
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "BOLT: mapping cold fragment 0x"
|
2017-01-17 15:49:59 -08:00
|
|
|
<< Twine::utohexstr(ColdPart.getImageAddress())
|
2016-09-27 19:09:38 -07:00
|
|
|
<< " to 0x"
|
2017-01-17 15:49:59 -08:00
|
|
|
<< Twine::utohexstr(ColdPart.getAddress())
|
2016-09-27 19:09:38 -07:00
|
|
|
<< " with size "
|
2017-01-17 15:49:59 -08:00
|
|
|
<< Twine::utohexstr(ColdPart.getImageSize()) << '\n');
|
2018-03-30 15:49:34 -07:00
|
|
|
OLT->mapSectionAddress(Key, ColdSection->getSectionID(),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
ColdPart.getAddress());
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-01-17 15:49:59 -08:00
|
|
|
NextAvailableAddress += ColdPart.getImageSize();
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add the new text section aggregating all existing code sections.
|
2017-01-17 15:49:59 -08:00
|
|
|
// This is pseudo-section that serves a purpose of creating a corresponding
|
|
|
|
|
// entry in section header table.
|
2016-09-27 19:09:38 -07:00
|
|
|
auto NewTextSectionSize = NextAvailableAddress - NewTextSectionStartAddress;
|
|
|
|
|
if (NewTextSectionSize) {
|
2018-02-01 16:33:43 -08:00
|
|
|
const auto Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
|
|
|
|
|
/*IsText=*/true,
|
|
|
|
|
/*IsAllocatable=*/true);
|
2018-06-14 14:27:20 -07:00
|
|
|
auto &Section = BC->registerOrUpdateSection(BOLTSecPrefix + ".text",
|
2018-02-01 16:33:43 -08:00
|
|
|
ELF::SHT_PROGBITS,
|
|
|
|
|
Flags,
|
|
|
|
|
nullptr,
|
|
|
|
|
NewTextSectionSize,
|
|
|
|
|
16,
|
|
|
|
|
true /*IsLocal*/);
|
|
|
|
|
Section.setFileAddress(NewTextSectionStartAddress);
|
|
|
|
|
Section.setFileOffset(
|
|
|
|
|
getFileOffsetForAddress(NewTextSectionStartAddress));
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
}
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
void RewriteInstance::mapDataSections(orc::VModuleKey Key) {
|
2015-12-18 17:00:46 -08:00
|
|
|
// Map special sections to their addresses in the output image.
|
2016-09-27 19:09:38 -07:00
|
|
|
// These are the sections that we generate via MCStreamer.
|
|
|
|
|
// The order is important.
|
2016-11-11 14:33:34 -08:00
|
|
|
std::vector<std::string> Sections = { ".eh_frame", ".eh_frame_old",
|
|
|
|
|
".gcc_except_table",
|
2016-09-16 15:54:32 -07:00
|
|
|
".rodata", ".rodata.cold" };
|
2016-02-12 19:01:53 -08:00
|
|
|
for (auto &SectionName : Sections) {
|
2018-02-01 16:33:43 -08:00
|
|
|
auto Section = BC->getUniqueSectionByName(SectionName);
|
|
|
|
|
if (!Section || !Section->isAllocatable() || !Section->isFinalized())
|
2016-09-16 15:54:32 -07:00
|
|
|
continue;
|
2018-02-01 16:33:43 -08:00
|
|
|
NextAvailableAddress = alignTo(NextAvailableAddress,
|
|
|
|
|
Section->getAlignment());
|
2016-09-16 15:54:32 -07:00
|
|
|
DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x"
|
2018-02-01 16:33:43 -08:00
|
|
|
<< Twine::utohexstr(Section->getAllocAddress())
|
2016-09-16 15:54:32 -07:00
|
|
|
<< ") to 0x" << Twine::utohexstr(NextAvailableAddress)
|
2018-04-20 20:03:31 -07:00
|
|
|
<< ":0x" << Twine::utohexstr(NextAvailableAddress +
|
|
|
|
|
Section->getOutputSize())
|
2016-09-16 15:54:32 -07:00
|
|
|
<< '\n');
|
|
|
|
|
|
2018-03-30 15:49:34 -07:00
|
|
|
OLT->mapSectionAddress(Key, Section->getSectionID(), NextAvailableAddress);
|
2018-02-01 16:33:43 -08:00
|
|
|
Section->setFileAddress(NextAvailableAddress);
|
|
|
|
|
Section->setFileOffset(getFileOffsetForAddress(NextAvailableAddress));
|
2016-09-16 15:54:32 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
NextAvailableAddress += Section->getOutputSize();
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Handling for sections with relocations.
|
2018-01-31 12:12:59 -08:00
|
|
|
for (const auto &Section : BC->sections()) {
|
2017-11-14 20:05:11 -08:00
|
|
|
if (!Section.hasRelocations() || !Section.hasSectionRef())
|
2018-01-23 15:10:24 -08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
StringRef SectionName = Section.getName();
|
2018-02-01 16:33:43 -08:00
|
|
|
auto OrgSection =
|
|
|
|
|
BC->getUniqueSectionByName(OrgSecPrefix + std::string(SectionName));
|
|
|
|
|
if (!OrgSection ||
|
|
|
|
|
!OrgSection->isAllocatable() ||
|
|
|
|
|
!OrgSection->isFinalized())
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
2016-04-06 18:03:44 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
if (OrgSection->getFileAddress()) {
|
2016-09-27 19:09:38 -07:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: section " << SectionName
|
|
|
|
|
<< " is already mapped at 0x"
|
2018-02-01 16:33:43 -08:00
|
|
|
<< Twine::utohexstr(OrgSection->getFileAddress()) << '\n');
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
2016-03-28 17:45:22 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
DEBUG(dbgs() << "BOLT: mapping original section " << SectionName << " (0x"
|
2018-02-01 16:33:43 -08:00
|
|
|
<< Twine::utohexstr(OrgSection->getAllocAddress())
|
2016-09-27 19:09:38 -07:00
|
|
|
<< ") to 0x" << Twine::utohexstr(Section.getAddress())
|
|
|
|
|
<< '\n');
|
2016-03-28 17:45:22 -07:00
|
|
|
|
2018-03-30 15:49:34 -07:00
|
|
|
OLT->mapSectionAddress(Key, OrgSection->getSectionID(),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Section.getAddress());
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
OrgSection->setFileAddress(Section.getAddress());
|
|
|
|
|
OrgSection->setFileOffset(Section.getContents().data() -
|
|
|
|
|
InputFile->getData().data());
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2017-05-08 22:51:36 -07:00
|
|
|
void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
|
|
|
|
|
|
2018-07-08 12:14:08 -07:00
|
|
|
auto updateOutputValue = [&](BinaryFunction &Function) {
|
2017-05-16 09:27:34 -07:00
|
|
|
if (!Function.isEmitted()) {
|
2018-07-08 12:14:08 -07:00
|
|
|
assert(!Function.isInjected() && "injected function should be emitted");
|
2017-05-16 09:27:34 -07:00
|
|
|
Function.setOutputAddress(Function.getAddress());
|
|
|
|
|
Function.setOutputSize(Function.getSize());
|
2018-07-08 12:14:08 -07:00
|
|
|
return;
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
2018-07-08 12:14:08 -07:00
|
|
|
if (BC->HasRelocations || Function.isInjected()) {
|
2017-05-08 22:51:36 -07:00
|
|
|
const auto BaseAddress = NewTextSectionStartAddress;
|
|
|
|
|
const auto StartOffset = Layout.getSymbolOffset(*Function.getSymbol());
|
|
|
|
|
const auto EndOffset =
|
|
|
|
|
Layout.getSymbolOffset(*Function.getFunctionEndLabel());
|
2017-09-20 10:43:01 -07:00
|
|
|
if (Function.hasConstantIsland()) {
|
|
|
|
|
const auto DataOffset =
|
|
|
|
|
Layout.getSymbolOffset(*Function.getFunctionConstantIslandLabel());
|
|
|
|
|
Function.setOutputDataAddress(BaseAddress + DataOffset);
|
|
|
|
|
}
|
2017-05-08 22:51:36 -07:00
|
|
|
Function.setOutputAddress(BaseAddress + StartOffset);
|
|
|
|
|
Function.setOutputSize(EndOffset - StartOffset);
|
|
|
|
|
if (Function.isSplit()) {
|
|
|
|
|
const auto *ColdStartSymbol = Function.getColdSymbol();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
|
2017-05-08 22:51:36 -07:00
|
|
|
"split function should have defined cold symbol");
|
|
|
|
|
const auto *ColdEndSymbol = Function.getFunctionColdEndLabel();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
assert(ColdEndSymbol && ColdEndSymbol->isDefined() &&
|
2017-05-08 22:51:36 -07:00
|
|
|
"split function should have defined cold end symbol");
|
|
|
|
|
const auto ColdStartOffset = Layout.getSymbolOffset(*ColdStartSymbol);
|
|
|
|
|
const auto ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol);
|
|
|
|
|
Function.cold().setAddress(BaseAddress + ColdStartOffset);
|
|
|
|
|
Function.cold().setImageSize(ColdEndOffset - ColdStartOffset);
|
2017-11-09 16:59:18 -08:00
|
|
|
if (Function.hasConstantIsland()) {
|
|
|
|
|
const auto DataOffset = Layout.getSymbolOffset(
|
|
|
|
|
*Function.getFunctionColdConstantIslandLabel());
|
|
|
|
|
Function.setOutputColdDataAddress(BaseAddress + DataOffset);
|
|
|
|
|
}
|
2017-05-08 22:51:36 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
Function.setOutputAddress(Function.getAddress());
|
|
|
|
|
Function.setOutputSize(
|
|
|
|
|
Layout.getSymbolOffset(*Function.getFunctionEndLabel()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update basic block output ranges only for the debug info.
|
|
|
|
|
if (!opts::UpdateDebugSections)
|
2018-07-08 12:14:08 -07:00
|
|
|
return;
|
2017-05-08 22:51:36 -07:00
|
|
|
|
|
|
|
|
// Output ranges should match the input if the body hasn't changed.
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!Function.isSimple() && !BC->HasRelocations)
|
2018-07-08 12:14:08 -07:00
|
|
|
return;
|
2017-05-08 22:51:36 -07:00
|
|
|
|
2018-04-12 10:07:11 -07:00
|
|
|
// AArch64 may have functions that only contains a constant island (no code)
|
|
|
|
|
if (Function.layout_begin() == Function.layout_end())
|
2018-07-08 12:14:08 -07:00
|
|
|
return;
|
2018-04-12 10:07:11 -07:00
|
|
|
|
2017-05-08 22:51:36 -07:00
|
|
|
BinaryBasicBlock *PrevBB = nullptr;
|
|
|
|
|
for (auto BBI = Function.layout_begin(), BBE = Function.layout_end();
|
|
|
|
|
BBI != BBE; ++BBI) {
|
|
|
|
|
auto *BB = *BBI;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
assert(BB->getLabel()->isDefined() && "symbol should be defined");
|
2017-05-31 09:36:49 -07:00
|
|
|
uint64_t BaseAddress;
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations) {
|
2017-05-31 09:36:49 -07:00
|
|
|
BaseAddress = NewTextSectionStartAddress;
|
|
|
|
|
} else {
|
|
|
|
|
BaseAddress = BB->isCold() ? Function.cold().getAddress()
|
|
|
|
|
: Function.getOutputAddress();
|
|
|
|
|
}
|
2017-05-08 22:51:36 -07:00
|
|
|
uint64_t Address = BaseAddress + Layout.getSymbolOffset(*BB->getLabel());
|
|
|
|
|
BB->setOutputStartAddress(Address);
|
|
|
|
|
|
|
|
|
|
if (PrevBB) {
|
|
|
|
|
auto PrevBBEndAddress = Address;
|
|
|
|
|
if (BB->isCold() != PrevBB->isCold()) {
|
|
|
|
|
PrevBBEndAddress =
|
2018-07-08 12:14:08 -07:00
|
|
|
Function.getOutputAddress() + Function.getOutputSize();
|
2017-05-08 22:51:36 -07:00
|
|
|
}
|
|
|
|
|
PrevBB->setOutputEndAddress(PrevBBEndAddress);
|
|
|
|
|
}
|
|
|
|
|
PrevBB = BB;
|
|
|
|
|
}
|
2017-10-09 14:15:38 -07:00
|
|
|
PrevBB->setOutputEndAddress(PrevBB->isCold() ?
|
2017-05-08 22:51:36 -07:00
|
|
|
Function.cold().getAddress() + Function.cold().getImageSize() :
|
|
|
|
|
Function.getOutputAddress() + Function.getOutputSize());
|
2018-07-08 12:14:08 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
auto &Function = BFI.second;
|
|
|
|
|
updateOutputValue(Function);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto *InjectedFunction : BC->getInjectedBinaryFunctions()) {
|
|
|
|
|
updateOutputValue(*InjectedFunction);
|
2017-05-08 22:51:36 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
void RewriteInstance::emitDataSection(MCStreamer *Streamer,
|
|
|
|
|
const BinarySection &Section,
|
2018-04-20 20:03:31 -07:00
|
|
|
StringRef NewName) {
|
|
|
|
|
StringRef SectionName = !NewName.empty() ? NewName : Section.getName();
|
2018-01-23 15:10:24 -08:00
|
|
|
StringRef SectionContents = Section.getContents();
|
2016-11-11 14:33:34 -08:00
|
|
|
auto *ELFSection = BC->Ctx->getELFSection(SectionName,
|
2018-02-01 16:33:43 -08:00
|
|
|
Section.getELFType(),
|
|
|
|
|
Section.getELFFlags());
|
2017-04-05 09:29:24 -07:00
|
|
|
|
2016-11-11 14:33:34 -08:00
|
|
|
Streamer->SwitchSection(ELFSection);
|
|
|
|
|
Streamer->EmitValueToAlignment(Section.getAlignment());
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
if (BC->HasRelocations && opts::HotData && Section.isReordered())
|
|
|
|
|
Streamer->EmitLabel(BC->Ctx->getOrCreateSymbol("__hot_data_start"));
|
2018-06-20 12:03:24 -07:00
|
|
|
|
2017-04-05 09:29:24 -07:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: emitting "
|
2018-02-01 16:33:43 -08:00
|
|
|
<< (Section.isAllocatable() ? "" : "non-")
|
2017-04-05 09:29:24 -07:00
|
|
|
<< "allocatable data section " << SectionName << '\n');
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
if (!Section.hasRelocations()) {
|
2016-11-11 14:33:34 -08:00
|
|
|
Streamer->EmitBytes(SectionContents);
|
2018-04-20 20:03:31 -07:00
|
|
|
} else {
|
|
|
|
|
uint64_t SectionOffset = 0;
|
|
|
|
|
for (auto &Relocation : Section.relocations()) {
|
|
|
|
|
assert(Relocation.Offset < SectionContents.size() && "overflow detected");
|
|
|
|
|
if (SectionOffset < Relocation.Offset) {
|
|
|
|
|
Streamer->EmitBytes(
|
2018-05-14 11:10:26 -07:00
|
|
|
SectionContents.substr(SectionOffset,
|
|
|
|
|
Relocation.Offset - SectionOffset));
|
2018-04-20 20:03:31 -07:00
|
|
|
SectionOffset = Relocation.Offset;
|
|
|
|
|
}
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: emitting relocation for symbol "
|
|
|
|
|
<< Relocation.Symbol->getName() << " at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Relocation.Offset)
|
|
|
|
|
<< " with size "
|
|
|
|
|
<< Relocation::getSizeForType(Relocation.Type) << '\n');
|
|
|
|
|
auto RelocationSize = Relocation.emit(Streamer);
|
|
|
|
|
SectionOffset += RelocationSize;
|
|
|
|
|
}
|
|
|
|
|
assert(SectionOffset <= SectionContents.size() && "overflow error");
|
|
|
|
|
if (SectionOffset < SectionContents.size()) {
|
|
|
|
|
Streamer->EmitBytes(SectionContents.substr(SectionOffset));
|
2016-11-11 14:33:34 -08:00
|
|
|
}
|
|
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
|
|
|
|
|
if (BC->HasRelocations && opts::HotData && Section.isReordered())
|
|
|
|
|
Streamer->EmitLabel(BC->Ctx->getOrCreateSymbol("__hot_data_end"));
|
2016-11-11 14:33:34 -08:00
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
void RewriteInstance::emitDataSections(MCStreamer *Streamer) {
|
2018-01-31 12:12:59 -08:00
|
|
|
for (const auto &Section : BC->sections()) {
|
2017-11-14 20:05:11 -08:00
|
|
|
if (!Section.hasRelocations() || !Section.hasSectionRef())
|
2018-01-23 15:10:24 -08:00
|
|
|
continue;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
StringRef SectionName = Section.getName();
|
2016-09-27 19:09:38 -07:00
|
|
|
assert(SectionName != ".eh_frame" && "should not emit .eh_frame as data");
|
2018-04-20 20:03:31 -07:00
|
|
|
std::string EmitName = Section.isReordered()
|
|
|
|
|
? std::string(Section.getOutputName())
|
|
|
|
|
: OrgSecPrefix + std::string(SectionName);
|
2016-09-27 19:09:38 -07:00
|
|
|
emitDataSection(Streamer, Section, EmitName);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-11 17:46:18 -07:00
|
|
|
bool RewriteInstance::checkLargeFunctions() {
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations)
|
2016-09-27 19:09:38 -07:00
|
|
|
return false;
|
|
|
|
|
|
2016-04-11 17:46:18 -07:00
|
|
|
LargeFunctions.clear();
|
2016-03-31 16:38:49 -07:00
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
auto &Function = BFI.second;
|
|
|
|
|
|
|
|
|
|
// Ignore this function if we failed to map it to the output binary
|
|
|
|
|
if (Function.getImageAddress() == 0 || Function.getImageSize() == 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (Function.getImageSize() <= Function.getMaxSize())
|
|
|
|
|
continue;
|
|
|
|
|
|
2016-04-11 17:46:18 -07:00
|
|
|
LargeFunctions.insert(BFI.first);
|
2016-03-31 16:38:49 -07:00
|
|
|
}
|
2016-04-11 17:46:18 -07:00
|
|
|
return !LargeFunctions.empty();
|
2016-03-31 16:38:49 -07:00
|
|
|
}
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
void RewriteInstance::patchELFPHDRTable() {
|
|
|
|
|
auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
|
2016-02-08 10:02:48 -08:00
|
|
|
if (!ELF64LEFile) {
|
|
|
|
|
errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
auto Obj = ELF64LEFile->getELFFile();
|
|
|
|
|
auto &OS = Out->os();
|
|
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
// Write/re-write program headers.
|
2016-03-03 10:13:11 -08:00
|
|
|
Phnum = Obj->getHeader()->e_phnum;
|
2016-02-12 19:01:53 -08:00
|
|
|
if (PHDRTableOffset) {
|
|
|
|
|
// Writing new pheader table.
|
|
|
|
|
Phnum += 1; // only adding one new segment
|
|
|
|
|
// Segment size includes the size of the PHDR area.
|
|
|
|
|
NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress;
|
|
|
|
|
} else {
|
|
|
|
|
assert(!PHDRTableAddress && "unexpected address for program header table");
|
|
|
|
|
// Update existing table.
|
|
|
|
|
PHDRTableOffset = Obj->getHeader()->e_phoff;
|
|
|
|
|
NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
|
|
|
|
|
}
|
|
|
|
|
OS.seek(PHDRTableOffset);
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
bool ModdedGnuStack = false;
|
2017-05-25 10:29:38 -07:00
|
|
|
(void)ModdedGnuStack;
|
2016-02-12 19:01:53 -08:00
|
|
|
bool AddedSegment = false;
|
2017-05-25 10:29:38 -07:00
|
|
|
(void)AddedSegment;
|
2016-02-08 10:02:48 -08:00
|
|
|
|
|
|
|
|
// Copy existing program headers with modifications.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (auto &Phdr : cantFail(Obj->program_headers())) {
|
2016-02-12 19:01:53 -08:00
|
|
|
auto NewPhdr = Phdr;
|
|
|
|
|
if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) {
|
2016-02-08 10:02:48 -08:00
|
|
|
NewPhdr.p_offset = PHDRTableOffset;
|
|
|
|
|
NewPhdr.p_vaddr = PHDRTableAddress;
|
|
|
|
|
NewPhdr.p_paddr = PHDRTableAddress;
|
|
|
|
|
NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum;
|
|
|
|
|
NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum;
|
|
|
|
|
} else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) {
|
2018-02-01 16:33:43 -08:00
|
|
|
auto EHFrameHdrSec = BC->getUniqueSectionByName(".eh_frame_hdr");
|
|
|
|
|
if (EHFrameHdrSec &&
|
|
|
|
|
EHFrameHdrSec->isAllocatable() &&
|
|
|
|
|
EHFrameHdrSec->isFinalized()) {
|
|
|
|
|
NewPhdr.p_offset = EHFrameHdrSec->getFileOffset();
|
|
|
|
|
NewPhdr.p_vaddr = EHFrameHdrSec->getFileAddress();
|
|
|
|
|
NewPhdr.p_paddr = EHFrameHdrSec->getFileAddress();
|
|
|
|
|
NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize();
|
|
|
|
|
NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize();
|
2016-07-12 16:43:53 -07:00
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
} else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) {
|
|
|
|
|
NewPhdr.p_type = ELF::PT_LOAD;
|
|
|
|
|
NewPhdr.p_offset = NewTextSegmentOffset;
|
|
|
|
|
NewPhdr.p_vaddr = NewTextSegmentAddress;
|
|
|
|
|
NewPhdr.p_paddr = NewTextSegmentAddress;
|
|
|
|
|
NewPhdr.p_filesz = NewTextSegmentSize;
|
|
|
|
|
NewPhdr.p_memsz = NewTextSegmentSize;
|
|
|
|
|
NewPhdr.p_flags = ELF::PF_X | ELF::PF_R;
|
|
|
|
|
NewPhdr.p_align = PageAlign;
|
|
|
|
|
ModdedGnuStack = true;
|
|
|
|
|
} else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) {
|
|
|
|
|
// Insert new pheader
|
|
|
|
|
ELFFile<ELF64LE>::Elf_Phdr NewTextPhdr;
|
|
|
|
|
NewTextPhdr.p_type = ELF::PT_LOAD;
|
|
|
|
|
NewTextPhdr.p_offset = PHDRTableOffset;
|
|
|
|
|
NewTextPhdr.p_vaddr = PHDRTableAddress;
|
|
|
|
|
NewTextPhdr.p_paddr = PHDRTableAddress;
|
|
|
|
|
NewTextPhdr.p_filesz = NewTextSegmentSize;
|
|
|
|
|
NewTextPhdr.p_memsz = NewTextSegmentSize;
|
|
|
|
|
NewTextPhdr.p_flags = ELF::PF_X | ELF::PF_R;
|
|
|
|
|
NewTextPhdr.p_align = PageAlign;
|
|
|
|
|
OS.write(reinterpret_cast<const char *>(&NewTextPhdr),
|
|
|
|
|
sizeof(NewTextPhdr));
|
|
|
|
|
AddedSegment = true;
|
2016-02-08 10:02:48 -08:00
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr));
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
assert((!opts::UseGnuStack || ModdedGnuStack) &&
|
|
|
|
|
"could not find GNU_STACK program header to modify");
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
assert((opts::UseGnuStack || AddedSegment) &&
|
|
|
|
|
"could not add program header for the new segment");
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
|
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
namespace {
|
2017-04-06 10:49:59 -07:00
|
|
|
|
|
|
|
|
/// Write padding to \p OS such that its current \p Offset becomes aligned
|
|
|
|
|
/// at \p Alignment. Return new (aligned) offset.
|
|
|
|
|
uint64_t appendPadding(raw_pwrite_stream &OS,
|
|
|
|
|
uint64_t Offset,
|
|
|
|
|
uint64_t Alignment) {
|
2017-05-16 17:29:31 -07:00
|
|
|
if (!Alignment)
|
|
|
|
|
return Offset;
|
|
|
|
|
|
2017-04-06 10:49:59 -07:00
|
|
|
const auto PaddingSize = OffsetToAlignment(Offset, Alignment);
|
|
|
|
|
for (unsigned I = 0; I < PaddingSize; ++I)
|
2016-09-16 15:54:32 -07:00
|
|
|
OS.write((unsigned char)0);
|
2017-04-06 10:49:59 -07:00
|
|
|
return Offset + PaddingSize;
|
2016-09-16 15:54:32 -07:00
|
|
|
}
|
2017-04-06 10:49:59 -07:00
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
}
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
void RewriteInstance::rewriteNoteSections() {
|
|
|
|
|
auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
|
|
|
|
|
if (!ELF64LEFile) {
|
|
|
|
|
errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
auto Obj = ELF64LEFile->getELFFile();
|
|
|
|
|
auto &OS = Out->os();
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2017-01-17 15:49:59 -08:00
|
|
|
uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
|
2016-02-12 19:01:53 -08:00
|
|
|
assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
|
|
|
|
|
"next available offset calculation failure");
|
2016-03-03 10:13:11 -08:00
|
|
|
OS.seek(NextAvailableOffset);
|
|
|
|
|
|
|
|
|
|
// Copy over non-allocatable section contents and update file offsets.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (auto &Section : cantFail(Obj->sections())) {
|
2016-03-03 10:13:11 -08:00
|
|
|
if (Section.sh_type == ELF::SHT_NULL)
|
|
|
|
|
continue;
|
|
|
|
|
if (Section.sh_flags & ELF::SHF_ALLOC)
|
|
|
|
|
continue;
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Section.sh_type == ELF::SHT_RELA)
|
|
|
|
|
continue;
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Insert padding as needed.
|
2017-04-06 10:49:59 -07:00
|
|
|
NextAvailableOffset =
|
|
|
|
|
appendPadding(OS, NextAvailableOffset, Section.sh_addralign);
|
2016-03-03 10:13:11 -08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
StringRef SectionName =
|
|
|
|
|
cantFail(Obj->getSectionName(&Section), "cannot get section name");
|
2016-03-11 11:30:30 -08:00
|
|
|
|
2016-05-16 17:02:17 -07:00
|
|
|
// New section size.
|
2016-03-11 11:30:30 -08:00
|
|
|
uint64_t Size = 0;
|
|
|
|
|
|
2016-11-11 14:33:34 -08:00
|
|
|
// Copy over section contents unless it's one of the sections we overwrite.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (!willOverwriteSection(SectionName)) {
|
2016-03-11 11:30:30 -08:00
|
|
|
Size = Section.sh_size;
|
Update subroutine address ranges in binary.
Summary:
[WIP] Update DWARF info for function address ranges.
This diff currently does not work for unknown reasons,
but I'm describing here what's the current state.
According to both llvm-dwarf and readelf our output seems correct,
but GDB does not interpret it as expected. All details go below in
hope I missed something.
I couldn't actually track the whole change that introduced support for
what we need in gdb yet, but I think I can get to it
(2007-12-04: Support
lexical bocks and function bodies that occupy non-contiguous address ranges). I have reasons to believe gdb at least at some
nges).
The set of introduced changes was basically this:
- After disassembly, iterate over the DIEs in .debug_info and find the
ones that correspond to each BinaryFunction.
- Refactor DebugArangesWriter to also write addresses of functions to
.debug_ranges and track the offsets of function address ranges there
- Add some infrastructure to facilitate patching the binary in
simple ways (BinaryPatcher.h)
- In RewriteInstance, after writing .debug_ranges already with
function address ranges, for each function do:
-- Find the abbreviation corresponding to the function
-- Patch .debug_abbrev to replace DW_AT_low_pc with DW_AT_ranges and
DW_AT_high_pc with DW_AT_producer (I'll explain this hack below).
Also patch the corresponding forms to DW_FORM_sec_offset and
DW_FORM_string (null-terminated in-place string).
-- Patch debug_info with the .debug_ranges offset in place of
the first 4 bytes of DW_AT_low_pc (DW_AT_ranges only occupies 4
bytes whereas low_pc occupies 8), and write an arbitrary string
in-place in the other 12 bytes that were the 4 MSB of low_pc
and the 8 bytes of high_pc before the patch. This depends on
low_pc and high_pc being put consecutively by the compiler, but
it serves to validate the idea. I tried another way of doing it
that does not rely on this but it didn't work either and I believe
the reason for either not working is the same (and still unknown,
but unrelated to them. I might be wrong though, and if I find yet
another way of doing it I may try it). The other way was to
use a form of DW_FORM_data8 for the section offset. This is
disallowed by the specification, but I doubt gdb validates this,
as it's just easier to store it as 64-bit anyway as this is even
necessary to support 64-bit DWARF (which is not what gcc generates
by default apparently).
I still need to make changes to the diff to make it production-ready,
but first I want to figure out why it doesn't work as expected.
By looking at the output of llvm-dwarfdump or readelf, all of
.debug_ranges, .debug_abbrev and .debug_info seem to have been
correctly updated. However, gdb seems to have serious problems with
what we write.
(In fact, readelf --debug-dump=Ranges shows some funny warning messages
of the form ("Warning: There is a hole [0x100 - 0x120] in .debug_ranges"),
but I played around with this and it seems it's just because no
compile unit was using these ranges. Changing .debug_info apparently
changes these warnings, so they seem to be unrelated to the section
itself. Also looking at the hex dump of the section doesn't help,
as everything seems fine. llvm-dwarfdump doesn't say anything.
So I think .debug_ranges is fine.)
The result is that gdb not only doesn't show the function name as we
wanted, but it also stops showing line number information.
Apparently it's not reading/interpreting the address ranges at all,
and so the functions now have no associated address ranges, only the
symbol value which allows one to put a breakpoint in the function,
but not to show source code.
As this left me without more ideas of what to try to feed gdb with,
I believe the most promising next trial is to try to debug gdb itself,
unless someone spots anything I missed.
I found where the interesting part of the code lies for this
case (gdb/dwarf2read.c and some other related files, but mainly that one).
It seems in some parts gdb uses DW_AT_ranges for only getting
its lowest and highest addresses and setting that as low_pc and
high_pc (see dwarf2_get_pc_bounds in gdb's code and where it's called).
I really hope this is not actually the case for
function address ranges. I'll investigate this further. Otherwise
I don't think any changes we make will make it work as initially
intended, as we'll simply need gdb to support it and in that case it
doesn't.
(cherry picked from FBD3073641)
2016-03-16 18:08:29 -07:00
|
|
|
std::string Data = InputFile->getData().substr(Section.sh_offset, Size);
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto SectionPatchersIt = SectionPatchers.find(SectionName);
|
Update subroutine address ranges in binary.
Summary:
[WIP] Update DWARF info for function address ranges.
This diff currently does not work for unknown reasons,
but I'm describing here what's the current state.
According to both llvm-dwarf and readelf our output seems correct,
but GDB does not interpret it as expected. All details go below in
hope I missed something.
I couldn't actually track the whole change that introduced support for
what we need in gdb yet, but I think I can get to it
(2007-12-04: Support
lexical bocks and function bodies that occupy non-contiguous address ranges). I have reasons to believe gdb at least at some
nges).
The set of introduced changes was basically this:
- After disassembly, iterate over the DIEs in .debug_info and find the
ones that correspond to each BinaryFunction.
- Refactor DebugArangesWriter to also write addresses of functions to
.debug_ranges and track the offsets of function address ranges there
- Add some infrastructure to facilitate patching the binary in
simple ways (BinaryPatcher.h)
- In RewriteInstance, after writing .debug_ranges already with
function address ranges, for each function do:
-- Find the abbreviation corresponding to the function
-- Patch .debug_abbrev to replace DW_AT_low_pc with DW_AT_ranges and
DW_AT_high_pc with DW_AT_producer (I'll explain this hack below).
Also patch the corresponding forms to DW_FORM_sec_offset and
DW_FORM_string (null-terminated in-place string).
-- Patch debug_info with the .debug_ranges offset in place of
the first 4 bytes of DW_AT_low_pc (DW_AT_ranges only occupies 4
bytes whereas low_pc occupies 8), and write an arbitrary string
in-place in the other 12 bytes that were the 4 MSB of low_pc
and the 8 bytes of high_pc before the patch. This depends on
low_pc and high_pc being put consecutively by the compiler, but
it serves to validate the idea. I tried another way of doing it
that does not rely on this but it didn't work either and I believe
the reason for either not working is the same (and still unknown,
but unrelated to them. I might be wrong though, and if I find yet
another way of doing it I may try it). The other way was to
use a form of DW_FORM_data8 for the section offset. This is
disallowed by the specification, but I doubt gdb validates this,
as it's just easier to store it as 64-bit anyway as this is even
necessary to support 64-bit DWARF (which is not what gcc generates
by default apparently).
I still need to make changes to the diff to make it production-ready,
but first I want to figure out why it doesn't work as expected.
By looking at the output of llvm-dwarfdump or readelf, all of
.debug_ranges, .debug_abbrev and .debug_info seem to have been
correctly updated. However, gdb seems to have serious problems with
what we write.
(In fact, readelf --debug-dump=Ranges shows some funny warning messages
of the form ("Warning: There is a hole [0x100 - 0x120] in .debug_ranges"),
but I played around with this and it seems it's just because no
compile unit was using these ranges. Changing .debug_info apparently
changes these warnings, so they seem to be unrelated to the section
itself. Also looking at the hex dump of the section doesn't help,
as everything seems fine. llvm-dwarfdump doesn't say anything.
So I think .debug_ranges is fine.)
The result is that gdb not only doesn't show the function name as we
wanted, but it also stops showing line number information.
Apparently it's not reading/interpreting the address ranges at all,
and so the functions now have no associated address ranges, only the
symbol value which allows one to put a breakpoint in the function,
but not to show source code.
As this left me without more ideas of what to try to feed gdb with,
I believe the most promising next trial is to try to debug gdb itself,
unless someone spots anything I missed.
I found where the interesting part of the code lies for this
case (gdb/dwarf2read.c and some other related files, but mainly that one).
It seems in some parts gdb uses DW_AT_ranges for only getting
its lowest and highest addresses and setting that as low_pc and
high_pc (see dwarf2_get_pc_bounds in gdb's code and where it's called).
I really hope this is not actually the case for
function address ranges. I'll investigate this further. Otherwise
I don't think any changes we make will make it work as initially
intended, as we'll simply need gdb to support it and in that case it
doesn't.
(cherry picked from FBD3073641)
2016-03-16 18:08:29 -07:00
|
|
|
if (SectionPatchersIt != SectionPatchers.end()) {
|
|
|
|
|
(*SectionPatchersIt->second).patchBinary(Data);
|
|
|
|
|
}
|
|
|
|
|
OS << Data;
|
2017-04-06 10:49:59 -07:00
|
|
|
|
|
|
|
|
// Add padding as the section extension might rely on the alignment.
|
|
|
|
|
Size = appendPadding(OS, Size, Section.sh_addralign);
|
2016-03-11 11:30:30 -08:00
|
|
|
}
|
2016-03-03 10:13:11 -08:00
|
|
|
|
2016-03-09 16:06:41 -08:00
|
|
|
// Perform section post-processing.
|
2018-02-01 16:33:43 -08:00
|
|
|
auto BSec = BC->getUniqueSectionByName(SectionName);
|
|
|
|
|
uint8_t *SectionData = nullptr;
|
|
|
|
|
if (BSec && !BSec->isAllocatable()) {
|
|
|
|
|
assert(BSec->getAlignment() <= Section.sh_addralign &&
|
2016-03-03 10:13:11 -08:00
|
|
|
"alignment exceeds value in file");
|
2016-03-09 16:06:41 -08:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
if (BSec->getAllocAddress()) {
|
|
|
|
|
SectionData = BSec->getOutputData();
|
2017-04-05 09:29:24 -07:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing")
|
2016-05-16 17:02:17 -07:00
|
|
|
<< " contents to section "
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
<< SectionName << '\n');
|
2018-02-01 16:33:43 -08:00
|
|
|
OS.write(reinterpret_cast<char *>(SectionData),
|
|
|
|
|
BSec->getOutputSize());
|
|
|
|
|
Size += BSec->getOutputSize();
|
2016-03-09 16:06:41 -08:00
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
if (BSec->hasPendingRelocations()) {
|
2016-03-09 16:06:41 -08:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: processing relocs for section "
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
<< SectionName << '\n');
|
2018-02-01 16:33:43 -08:00
|
|
|
for (auto &Reloc : BSec->pendingRelocations()) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: writing value 0x"
|
|
|
|
|
<< Twine::utohexstr(Reloc.Addend)
|
|
|
|
|
<< " of size " << Relocation::getSizeForType(Reloc.Type)
|
|
|
|
|
<< " at offset 0x"
|
2016-03-09 16:06:41 -08:00
|
|
|
<< Twine::utohexstr(Reloc.Offset) << '\n');
|
2018-02-01 16:33:43 -08:00
|
|
|
assert(Reloc.Type == ELF::R_X86_64_32 &&
|
|
|
|
|
"only R_X86_64_32 relocations are supported at the moment");
|
|
|
|
|
uint32_t Value = Reloc.Addend;
|
|
|
|
|
OS.pwrite(reinterpret_cast<const char*>(&Value),
|
|
|
|
|
Relocation::getSizeForType(Reloc.Type),
|
2016-03-09 16:06:41 -08:00
|
|
|
NextAvailableOffset + Reloc.Offset);
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Set/modify section info.
|
2018-02-01 16:33:43 -08:00
|
|
|
auto &NewSection =
|
|
|
|
|
BC->registerOrUpdateNoteSection(SectionName,
|
|
|
|
|
SectionData,
|
|
|
|
|
Size,
|
|
|
|
|
Section.sh_addralign,
|
|
|
|
|
BSec ? BSec->isReadOnly() : false,
|
|
|
|
|
BSec ? BSec->getELFType()
|
|
|
|
|
: ELF::SHT_PROGBITS,
|
|
|
|
|
BSec ? BSec->isLocal() : false);
|
|
|
|
|
NewSection.setFileAddress(0);
|
|
|
|
|
NewSection.setFileOffset(NextAvailableOffset);
|
2016-03-03 10:13:11 -08:00
|
|
|
|
|
|
|
|
NextAvailableOffset += Size;
|
|
|
|
|
}
|
2017-05-16 17:29:31 -07:00
|
|
|
|
|
|
|
|
// Write new note sections.
|
2017-11-14 20:05:11 -08:00
|
|
|
for (auto &Section : BC->nonAllocatableSections()) {
|
|
|
|
|
if (Section.getFileOffset() || !Section.getAllocAddress())
|
2017-05-16 17:29:31 -07:00
|
|
|
continue;
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
assert(!Section.hasPendingRelocations() && "cannot have pending relocs");
|
2017-05-16 17:29:31 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
NextAvailableOffset = appendPadding(OS, NextAvailableOffset,
|
|
|
|
|
Section.getAlignment());
|
|
|
|
|
Section.setFileOffset(NextAvailableOffset);
|
2017-05-16 17:29:31 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: writing out new section "
|
|
|
|
|
<< Section.getName() << " of size " << Section.getOutputSize()
|
|
|
|
|
<< " at offset 0x" << Twine::utohexstr(Section.getFileOffset())
|
|
|
|
|
<< '\n');
|
2017-05-16 17:29:31 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
OS.write(Section.getOutputContents().data(), Section.getOutputSize());
|
|
|
|
|
NextAvailableOffset += Section.getOutputSize();
|
2017-05-16 17:29:31 -07:00
|
|
|
}
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
|
|
|
|
|
2017-02-07 12:20:46 -08:00
|
|
|
template <typename ELFT>
|
2017-05-16 17:29:31 -07:00
|
|
|
void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) {
|
2017-02-07 12:20:46 -08:00
|
|
|
auto *Obj = File->getELFFile();
|
|
|
|
|
|
|
|
|
|
// Pre-populate section header string table.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (auto &Section : cantFail(Obj->sections())) {
|
|
|
|
|
StringRef SectionName =
|
|
|
|
|
cantFail(Obj->getSectionName(&Section), "cannot get section name");
|
|
|
|
|
SHStrTab.add(SectionName);
|
|
|
|
|
if (willOverwriteSection(SectionName)) {
|
|
|
|
|
AllSHStrTabStrings.emplace_back(
|
|
|
|
|
SHStrTabPool.intern(OrgSecPrefix + SectionName.str()));
|
|
|
|
|
SHStrTab.add(*AllSHStrTabStrings.back());
|
|
|
|
|
}
|
2017-02-07 12:20:46 -08:00
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
for (const auto &Section : BC->sections()) {
|
|
|
|
|
SHStrTab.add(Section.getName());
|
2017-05-16 17:29:31 -07:00
|
|
|
}
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
SHStrTab.finalize();
|
2017-02-07 12:20:46 -08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const auto SHStrTabSize = SHStrTab.getSize();
|
2017-05-16 17:29:31 -07:00
|
|
|
uint8_t *DataCopy = new uint8_t[SHStrTabSize];
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
memset(DataCopy, 0, SHStrTabSize);
|
|
|
|
|
SHStrTab.write(DataCopy);
|
2018-02-01 16:33:43 -08:00
|
|
|
BC->registerOrUpdateNoteSection(".shstrtab",
|
|
|
|
|
DataCopy,
|
|
|
|
|
SHStrTabSize,
|
|
|
|
|
/*Alignment=*/1,
|
|
|
|
|
/*IsReadOnly=*/true,
|
|
|
|
|
ELF::SHT_STRTAB);
|
2017-02-07 12:20:46 -08:00
|
|
|
}
|
|
|
|
|
|
2017-05-24 14:14:16 -07:00
|
|
|
void RewriteInstance::addBoltInfoSection() {
|
|
|
|
|
if (opts::AddBoltInfo) {
|
2017-10-06 17:54:26 -07:00
|
|
|
std::string DescStr;
|
|
|
|
|
raw_string_ostream DescOS(DescStr);
|
2017-05-24 14:14:16 -07:00
|
|
|
|
2018-06-14 14:27:20 -07:00
|
|
|
DescOS << "BOLT revision: " << BoltRevision << ", " << "command line:";
|
2017-05-24 14:14:16 -07:00
|
|
|
for (auto I = 0; I < Argc; ++I) {
|
2017-10-06 17:54:26 -07:00
|
|
|
DescOS << " " << Argv[I];
|
2017-05-24 14:14:16 -07:00
|
|
|
}
|
2017-10-06 17:54:26 -07:00
|
|
|
DescOS.flush();
|
|
|
|
|
|
|
|
|
|
std::string Str;
|
|
|
|
|
raw_string_ostream OS(Str);
|
|
|
|
|
std::string NameStr = "GNU";
|
|
|
|
|
const uint32_t NameSz = NameStr.size() + 1;
|
2017-10-10 13:30:05 -07:00
|
|
|
const uint32_t DescSz = DescStr.size();
|
2017-10-06 17:54:26 -07:00
|
|
|
const uint32_t Type = 4; // NT_GNU_GOLD_VERSION (gold version)
|
|
|
|
|
OS.write(reinterpret_cast<const char*>(&(NameSz)), 4);
|
|
|
|
|
OS.write(reinterpret_cast<const char*>(&(DescSz)), 4);
|
|
|
|
|
OS.write(reinterpret_cast<const char*>(&(Type)), 4);
|
2017-10-10 13:30:05 -07:00
|
|
|
OS << NameStr;
|
|
|
|
|
for (uint64_t I = NameStr.size();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
I < alignTo(NameStr.size(), 4); ++I) {
|
2017-10-10 13:30:05 -07:00
|
|
|
OS << '\0';
|
|
|
|
|
}
|
|
|
|
|
OS << DescStr;
|
|
|
|
|
for (uint64_t I = DescStr.size();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
I < alignTo(DescStr.size(), 4); ++I) {
|
2017-10-06 17:54:26 -07:00
|
|
|
OS << '\0';
|
|
|
|
|
}
|
2017-05-24 14:14:16 -07:00
|
|
|
|
|
|
|
|
const auto BoltInfo = OS.str();
|
2018-02-01 16:33:43 -08:00
|
|
|
BC->registerOrUpdateNoteSection(".note.bolt_info",
|
|
|
|
|
copyByteArray(BoltInfo),
|
|
|
|
|
BoltInfo.size(),
|
|
|
|
|
/*Alignment=*/1,
|
|
|
|
|
/*IsReadOnly=*/true,
|
|
|
|
|
ELF::SHT_NOTE);
|
2017-05-24 14:14:16 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
// Provide a mapping of the existing input binary sections to the output binary
|
|
|
|
|
// section header table.
|
|
|
|
|
// Return the map from the section header old index to its new index. Optionally
|
|
|
|
|
// return in OutputSections an ordered list of the output sections. This is
|
|
|
|
|
// optional because for reference updating in the symbol table we only need the
|
|
|
|
|
// map of input to output indices, not the real output section list.
|
|
|
|
|
template <typename ELFT, typename ELFShdrTy>
|
2018-04-20 20:03:31 -07:00
|
|
|
std::vector<uint32_t> RewriteInstance::getOutputSections(
|
|
|
|
|
ELFObjectFile<ELFT> *File,
|
|
|
|
|
std::vector<ELFShdrTy> *OutputSections,
|
|
|
|
|
std::map<std::string, uint32_t> *SectionNameMap
|
|
|
|
|
) {
|
2016-09-27 19:09:38 -07:00
|
|
|
auto *Obj = File->getELFFile();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto Sections = cantFail(Obj->sections());
|
2016-02-12 19:01:53 -08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
std::vector<uint32_t> NewSectionIndex(
|
|
|
|
|
std::distance(Sections.begin(), Sections.end()), 0);
|
2017-06-27 16:25:59 -07:00
|
|
|
NewTextSectionIndex = 0;
|
|
|
|
|
uint32_t CurIndex{0};
|
2016-09-16 15:54:32 -07:00
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Copy over entries for original allocatable sections with minor
|
|
|
|
|
// modifications (e.g. name).
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (auto &Section : Sections) {
|
2016-02-12 19:01:53 -08:00
|
|
|
// Always ignore this section.
|
|
|
|
|
if (Section.sh_type == ELF::SHT_NULL) {
|
2017-06-27 16:25:59 -07:00
|
|
|
NewSectionIndex[0] = CurIndex++;
|
|
|
|
|
if (OutputSections)
|
|
|
|
|
OutputSections->emplace_back(Section);
|
2016-02-12 19:01:53 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
// Is this our new text? Then update our pointer indicating the new output
|
|
|
|
|
// text section
|
|
|
|
|
if (opts::UseOldText && Section.sh_flags & ELF::SHF_ALLOC &&
|
|
|
|
|
Section.sh_addr <= NewTextSectionStartAddress &&
|
|
|
|
|
Section.sh_addr + Section.sh_size > NewTextSectionStartAddress) {
|
|
|
|
|
NewTextSectionIndex = CurIndex;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Skip non-allocatable sections.
|
2016-03-03 10:13:11 -08:00
|
|
|
if (!(Section.sh_flags & ELF::SHF_ALLOC))
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
StringRef SectionName =
|
|
|
|
|
cantFail(Obj->getSectionName(&Section), "cannot get section name");
|
|
|
|
|
|
|
|
|
|
if (SectionNameMap && !SectionNameMap->count(SectionName)) {
|
|
|
|
|
(*SectionNameMap)[SectionName] = CurIndex;
|
|
|
|
|
}
|
|
|
|
|
const auto OldIdx = std::distance(Sections.begin(), &Section);
|
|
|
|
|
assert(NewSectionIndex[OldIdx] == 0);
|
|
|
|
|
NewSectionIndex[OldIdx] = CurIndex++;
|
2017-06-27 16:25:59 -07:00
|
|
|
|
|
|
|
|
// If only computing the map, we're done with this iteration
|
|
|
|
|
if (!OutputSections)
|
|
|
|
|
continue;
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
auto NewSection = Section;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (SectionName == ".bss") {
|
2016-03-03 10:13:11 -08:00
|
|
|
// .bss section offset matches that of the next section.
|
|
|
|
|
NewSection.sh_offset = NewTextSegmentOffset;
|
|
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (willOverwriteSection(SectionName)) {
|
2017-02-07 12:20:46 -08:00
|
|
|
NewSection.sh_name = SHStrTab.getOffset(OrgSecPrefix +
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
SectionName.str());
|
2017-02-07 12:20:46 -08:00
|
|
|
} else {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NewSection.sh_name = SHStrTab.getOffset(SectionName);
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
OutputSections->emplace_back(NewSection);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we are creating our own .text section, it should be the first section
|
2018-02-01 16:33:43 -08:00
|
|
|
// we created in BinaryContext, so this is the correct index.
|
2017-06-27 16:25:59 -07:00
|
|
|
if (!opts::UseOldText) {
|
|
|
|
|
NewTextSectionIndex = CurIndex;
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
// Process entries for all new allocatable sections. Make sure
|
|
|
|
|
// allocatable sections follow the same order as in mapDataSections so
|
|
|
|
|
// that the section indices are consistent.
|
|
|
|
|
std::vector<const BinarySection *> AllocatableSections;
|
|
|
|
|
std::vector<std::string> SectionNames = { ".eh_frame",
|
|
|
|
|
".gcc_except_table",
|
|
|
|
|
".rodata",
|
|
|
|
|
".rodata.cold" };
|
|
|
|
|
for (const auto &SectionName : SectionNames) {
|
|
|
|
|
auto Section = BC->getUniqueSectionByName(SectionName);
|
|
|
|
|
if (Section && Section->isFinalized()) {
|
|
|
|
|
AllocatableSections.push_back(&*Section);
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
for (auto &Section : BC->allocatableSections()) {
|
|
|
|
|
if (!Section.isFinalized())
|
2018-02-01 16:33:43 -08:00
|
|
|
continue;
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
if (std::find_if(AllocatableSections.begin(),
|
|
|
|
|
AllocatableSections.end(),
|
|
|
|
|
[&Section](const BinarySection *BSec) {
|
|
|
|
|
return BSec == &Section;
|
|
|
|
|
}) == AllocatableSections.end()) {
|
|
|
|
|
AllocatableSections.push_back(&Section);
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-06-20 12:03:24 -07:00
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
for (const auto *Section : AllocatableSections) {
|
2016-03-03 10:13:11 -08:00
|
|
|
// Ignore function sections.
|
2018-04-20 20:03:31 -07:00
|
|
|
if (Section->getFileAddress() < NewTextSegmentAddress) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::Verbosity)
|
|
|
|
|
outs() << "BOLT-INFO: not writing section header for existing section "
|
2018-04-20 20:03:31 -07:00
|
|
|
<< Section->getName() << '\n';
|
2016-03-03 10:13:11 -08:00
|
|
|
continue;
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
if (SectionNameMap) {
|
|
|
|
|
(*SectionNameMap)[Section->getName()] = CurIndex;
|
|
|
|
|
}
|
2017-06-27 16:25:59 -07:00
|
|
|
++CurIndex;
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
// If only computing the map, we're done with this iteration
|
|
|
|
|
if (!OutputSections)
|
|
|
|
|
continue;
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::Verbosity >= 1)
|
2018-02-01 16:33:43 -08:00
|
|
|
outs() << "BOLT-INFO: writing section header for "
|
2018-04-20 20:03:31 -07:00
|
|
|
<< Section->getName() << '\n';
|
2017-06-27 16:25:59 -07:00
|
|
|
ELFShdrTy NewSection;
|
2018-04-20 20:03:31 -07:00
|
|
|
NewSection.sh_name = SHStrTab.getOffset(Section->getName());
|
2016-03-03 10:13:11 -08:00
|
|
|
NewSection.sh_type = ELF::SHT_PROGBITS;
|
2018-04-20 20:03:31 -07:00
|
|
|
NewSection.sh_addr = Section->getFileAddress();
|
|
|
|
|
NewSection.sh_offset = Section->getFileOffset();
|
|
|
|
|
NewSection.sh_size = Section->getOutputSize();
|
2016-03-03 10:13:11 -08:00
|
|
|
NewSection.sh_entsize = 0;
|
2018-04-20 20:03:31 -07:00
|
|
|
NewSection.sh_flags = Section->getELFFlags();
|
2016-03-03 10:13:11 -08:00
|
|
|
NewSection.sh_link = 0;
|
|
|
|
|
NewSection.sh_info = 0;
|
2018-04-20 20:03:31 -07:00
|
|
|
NewSection.sh_addralign = Section->getAlignment();
|
2017-06-27 16:25:59 -07:00
|
|
|
OutputSections->emplace_back(NewSection);
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2017-05-16 17:29:31 -07:00
|
|
|
uint64_t LastFileOffset = 0;
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Copy over entries for non-allocatable sections performing necessary
|
2016-09-27 19:09:38 -07:00
|
|
|
// adjustments.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (auto &Section : Sections) {
|
2016-03-03 10:13:11 -08:00
|
|
|
if (Section.sh_type == ELF::SHT_NULL)
|
|
|
|
|
continue;
|
|
|
|
|
if (Section.sh_flags & ELF::SHF_ALLOC)
|
|
|
|
|
continue;
|
2017-06-07 20:06:29 -07:00
|
|
|
// Strip non-allocatable relocation sections.
|
|
|
|
|
if (Section.sh_type == ELF::SHT_RELA)
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
StringRef SectionName =
|
|
|
|
|
cantFail(Obj->getSectionName(&Section), "cannot get section name");
|
|
|
|
|
|
|
|
|
|
if (SectionNameMap && !SectionNameMap->count(SectionName)) {
|
|
|
|
|
(*SectionNameMap)[SectionName] = CurIndex;
|
|
|
|
|
}
|
|
|
|
|
const auto OldIdx = std::distance(Sections.begin(), &Section);
|
|
|
|
|
assert(NewSectionIndex[OldIdx] == 0);
|
|
|
|
|
NewSectionIndex[OldIdx] = CurIndex++;
|
2017-06-27 16:25:59 -07:00
|
|
|
|
|
|
|
|
// If only computing the map, we're done with this iteration
|
|
|
|
|
if (!OutputSections)
|
|
|
|
|
continue;
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
auto BSec = BC->getUniqueSectionByName(SectionName);
|
|
|
|
|
assert(BSec && "missing section info for non-allocatable section");
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
auto NewSection = Section;
|
2018-02-01 16:33:43 -08:00
|
|
|
NewSection.sh_offset = BSec->getFileOffset();
|
|
|
|
|
NewSection.sh_size = BSec->getOutputSize();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NewSection.sh_name = SHStrTab.getOffset(SectionName);
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
OutputSections->emplace_back(NewSection);
|
2017-05-16 17:29:31 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
LastFileOffset = BSec->getFileOffset();
|
2016-02-12 19:01:53 -08:00
|
|
|
}
|
|
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
// Map input -> output is ready. Early return if that's all we need.
|
|
|
|
|
if (!OutputSections)
|
|
|
|
|
return NewSectionIndex;
|
|
|
|
|
|
2017-05-16 17:29:31 -07:00
|
|
|
// Create entries for new non-allocatable sections.
|
2017-11-14 20:05:11 -08:00
|
|
|
for (auto &Section : BC->nonAllocatableSections()) {
|
|
|
|
|
if (Section.getFileOffset() <= LastFileOffset)
|
2017-05-16 17:29:31 -07:00
|
|
|
continue;
|
2017-02-07 12:20:46 -08:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
outs() << "BOLT-INFO: writing section header for "
|
|
|
|
|
<< Section.getName() << '\n';
|
|
|
|
|
}
|
2017-06-27 16:25:59 -07:00
|
|
|
ELFShdrTy NewSection;
|
2018-02-01 16:33:43 -08:00
|
|
|
NewSection.sh_name = SHStrTab.getOffset(Section.getName());
|
|
|
|
|
NewSection.sh_type = Section.getELFType();
|
2017-05-16 17:29:31 -07:00
|
|
|
NewSection.sh_addr = 0;
|
2018-02-01 16:33:43 -08:00
|
|
|
NewSection.sh_offset = Section.getFileOffset();
|
|
|
|
|
NewSection.sh_size = Section.getOutputSize();
|
2017-05-16 17:29:31 -07:00
|
|
|
NewSection.sh_entsize = 0;
|
2018-02-01 16:33:43 -08:00
|
|
|
NewSection.sh_flags = Section.getELFFlags();
|
2017-05-16 17:29:31 -07:00
|
|
|
NewSection.sh_link = 0;
|
|
|
|
|
NewSection.sh_info = 0;
|
2018-02-01 16:33:43 -08:00
|
|
|
NewSection.sh_addralign = Section.getAlignment();
|
2017-06-27 16:25:59 -07:00
|
|
|
OutputSections->emplace_back(NewSection);
|
2017-05-16 17:29:31 -07:00
|
|
|
}
|
|
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
return NewSectionIndex;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Rewrite section header table inserting new entries as needed. The sections
|
|
|
|
|
// header table size itself may affect the offsets of other sections,
|
|
|
|
|
// so we are placing it at the end of the binary.
|
|
|
|
|
//
|
|
|
|
|
// As we rewrite entries we need to track how many sections were inserted
|
|
|
|
|
// as it changes the sh_link value. We map old indices to new ones for
|
|
|
|
|
// existing sections.
|
|
|
|
|
//
|
|
|
|
|
// The following are assumptions about file modifications:
|
|
|
|
|
// * There are no modifications done to address and/or size of existing
|
|
|
|
|
// allocatable sections.
|
|
|
|
|
// * All new allocatable sections are written immediately after existing
|
|
|
|
|
// allocatable sections.
|
|
|
|
|
// * There could be modifications done to non-allocatable sections, e.g.
|
|
|
|
|
// size could be increased.
|
|
|
|
|
// * New non-allocatable sections are added to the end of the file.
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
|
|
|
|
|
using Elf_Shdr = typename ELFObjectFile<ELFT>::Elf_Shdr;
|
|
|
|
|
std::vector<Elf_Shdr> OutputSections;
|
|
|
|
|
auto &OS = Out->os();
|
|
|
|
|
auto *Obj = File->getELFFile();
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
auto NewSectionIndex = getOutputSections(File, &OutputSections);
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2017-06-07 20:06:29 -07:00
|
|
|
// Sort sections by their offset prior to writing. Only newly created sections
|
|
|
|
|
// were unsorted, hence this wouldn't ruin indices in NewSectionIndex.
|
2017-06-27 16:25:59 -07:00
|
|
|
std::stable_sort(OutputSections.begin(), OutputSections.end(),
|
2017-05-16 17:29:31 -07:00
|
|
|
[] (Elf_Shdr A, Elf_Shdr B) {
|
|
|
|
|
return A.sh_offset < B.sh_offset;
|
|
|
|
|
});
|
2017-06-07 20:06:29 -07:00
|
|
|
|
|
|
|
|
DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: old to new section index mapping:\n";
|
|
|
|
|
for (uint64_t I = 0; I < NewSectionIndex.size(); ++I) {
|
|
|
|
|
dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n';
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Align starting address for section header table.
|
|
|
|
|
auto SHTOffset = OS.tell();
|
|
|
|
|
SHTOffset = appendPadding(OS, SHTOffset, sizeof(Elf_Shdr));
|
|
|
|
|
|
|
|
|
|
// Write all section header entries while patching section references.
|
2017-06-27 16:25:59 -07:00
|
|
|
for (uint64_t Index = 0; Index < OutputSections.size(); ++Index) {
|
|
|
|
|
auto &Section = OutputSections[Index];
|
2017-06-07 20:06:29 -07:00
|
|
|
Section.sh_link = NewSectionIndex[Section.sh_link];
|
|
|
|
|
if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) {
|
|
|
|
|
if (Section.sh_info)
|
|
|
|
|
Section.sh_info = NewSectionIndex[Section.sh_info];
|
|
|
|
|
}
|
|
|
|
|
OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section));
|
2017-05-16 17:29:31 -07:00
|
|
|
}
|
2017-02-22 11:29:52 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
// Fix ELF header.
|
|
|
|
|
auto NewEhdr = *Obj->getHeader();
|
2017-05-08 22:51:36 -07:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations) {
|
2017-05-08 22:51:36 -07:00
|
|
|
NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry);
|
|
|
|
|
assert(NewEhdr.e_entry && "cannot find new address for entry point");
|
|
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
NewEhdr.e_phoff = PHDRTableOffset;
|
|
|
|
|
NewEhdr.e_phnum = Phnum;
|
2016-03-03 10:13:11 -08:00
|
|
|
NewEhdr.e_shoff = SHTOffset;
|
2017-06-27 16:25:59 -07:00
|
|
|
NewEhdr.e_shnum = OutputSections.size();
|
2017-06-07 20:06:29 -07:00
|
|
|
NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx];
|
2016-02-12 19:01:53 -08:00
|
|
|
OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) {
|
|
|
|
|
auto *Obj = File->getELFFile();
|
2017-06-27 16:25:59 -07:00
|
|
|
// Set pointer at the end of the output file, so we can pwrite old symbol
|
|
|
|
|
// tables if we need to.
|
|
|
|
|
uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
|
|
|
|
|
assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
|
|
|
|
|
"next available offset calculation failure");
|
|
|
|
|
Out->os().seek(NextAvailableOffset);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
using Elf_Shdr = typename ELFObjectFile<ELFT>::Elf_Shdr;
|
|
|
|
|
using Elf_Sym = typename ELFObjectFile<ELFT>::Elf_Sym;
|
|
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
// Compute a preview of how section indices will change after rewriting, so
|
|
|
|
|
// we can properly update the symbol table.
|
2018-04-20 20:03:31 -07:00
|
|
|
std::map<std::string, uint32_t> SectionNameMap;
|
2017-06-27 16:25:59 -07:00
|
|
|
auto NewSectionIndex =
|
2018-04-20 20:03:31 -07:00
|
|
|
getOutputSections(File, (std::vector<Elf_Shdr> *)nullptr, &SectionNameMap);
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: SectionNameMap:\n";
|
|
|
|
|
for (auto &Entry : SectionNameMap) {
|
|
|
|
|
dbgs() << "BOLT-DEBUG: " << Entry.first << " -> "
|
|
|
|
|
<< Entry.second << "\n";
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
auto updateSymbolTable =
|
|
|
|
|
[&](bool PatchExisting,
|
|
|
|
|
const Elf_Shdr *Section,
|
|
|
|
|
std::function<void(size_t, const char *, size_t)>
|
|
|
|
|
Write,
|
|
|
|
|
std::function<size_t(StringRef)> AddToStrTab) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto StringSection = cantFail(Obj->getStringTableForSymtab(*Section));
|
2017-10-10 18:06:45 -07:00
|
|
|
unsigned IsHotTextUpdated = 0;
|
2018-04-20 20:03:31 -07:00
|
|
|
unsigned IsHotDataUpdated = 0;
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
std::map<const BinaryFunction *, uint64_t> IslandSizes;
|
|
|
|
|
auto getConstantIslandSize = [&IslandSizes](const BinaryFunction *BF) {
|
|
|
|
|
auto Itr = IslandSizes.find(BF);
|
|
|
|
|
if (Itr != IslandSizes.end())
|
|
|
|
|
return Itr->second;
|
|
|
|
|
return IslandSizes[BF] = BF->estimateConstantIslandSize();
|
|
|
|
|
};
|
|
|
|
|
|
2018-07-08 12:14:08 -07:00
|
|
|
// Add symbols of injected functions
|
|
|
|
|
for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) {
|
|
|
|
|
Elf_Sym NewSymbol;
|
|
|
|
|
NewSymbol.st_shndx = NewTextSectionIndex;
|
|
|
|
|
NewSymbol.st_value = Function->getOutputAddress();
|
|
|
|
|
NewSymbol.st_name = AddToStrTab(Function->getPrintName());
|
|
|
|
|
NewSymbol.st_size = Function->getOutputSize();
|
|
|
|
|
NewSymbol.st_other = 0;
|
|
|
|
|
NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
|
|
|
|
|
Write(0, reinterpret_cast<const char *>(&NewSymbol), sizeof(NewSymbol));
|
|
|
|
|
|
|
|
|
|
if (Function->isSplit()) {
|
|
|
|
|
auto NewColdSym = NewSymbol;
|
|
|
|
|
NewColdSym.setType(ELF::STT_NOTYPE);
|
|
|
|
|
SmallVector<char, 256> Buf;
|
|
|
|
|
NewColdSym.st_name = AddToStrTab(
|
|
|
|
|
Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf));
|
|
|
|
|
NewColdSym.st_value = Function->cold().getAddress();
|
|
|
|
|
NewColdSym.st_size = Function->cold().getImageSize();
|
|
|
|
|
Write(0, reinterpret_cast<const char *>(&NewColdSym),
|
|
|
|
|
sizeof(NewColdSym));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (const Elf_Sym &Symbol : cantFail(Obj->symbols(Section))) {
|
2016-09-27 19:09:38 -07:00
|
|
|
auto NewSymbol = Symbol;
|
2017-06-27 16:25:59 -07:00
|
|
|
const auto *Function = getBinaryFunctionAtAddress(Symbol.st_value);
|
|
|
|
|
// Some section symbols may be mistakenly associated with the first
|
|
|
|
|
// function emitted in the section. Dismiss if it is a section symbol.
|
2017-08-04 11:21:05 -07:00
|
|
|
if (Function &&
|
|
|
|
|
!Function->getPLTSymbol() &&
|
|
|
|
|
NewSymbol.getType() != ELF::STT_SECTION) {
|
2017-05-08 22:51:36 -07:00
|
|
|
NewSymbol.st_value = Function->getOutputAddress();
|
|
|
|
|
NewSymbol.st_size = Function->getOutputSize();
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations)
|
2017-09-20 10:43:01 -07:00
|
|
|
NewSymbol.st_shndx = NewTextSectionIndex;
|
|
|
|
|
else
|
|
|
|
|
NewSymbol.st_shndx = NewSectionIndex[NewSymbol.st_shndx];
|
2017-06-27 16:25:59 -07:00
|
|
|
if (!PatchExisting && Function->isSplit()) {
|
|
|
|
|
auto NewColdSym = NewSymbol;
|
|
|
|
|
SmallVector<char, 256> Buf;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NewColdSym.st_name =
|
|
|
|
|
AddToStrTab(Twine(cantFail(Symbol.getName(StringSection)))
|
|
|
|
|
.concat(".cold.0")
|
|
|
|
|
.toStringRef(Buf));
|
2017-06-27 16:25:59 -07:00
|
|
|
NewColdSym.st_value = Function->cold().getAddress();
|
|
|
|
|
NewColdSym.st_size = Function->cold().getImageSize();
|
|
|
|
|
Write(0, reinterpret_cast<const char *>(&NewColdSym),
|
|
|
|
|
sizeof(NewColdSym));
|
|
|
|
|
}
|
2017-09-20 10:43:01 -07:00
|
|
|
if (!PatchExisting && Function->hasConstantIsland()) {
|
|
|
|
|
auto DataMark = Function->getOutputDataAddress();
|
2017-11-14 20:05:11 -08:00
|
|
|
auto CISize = getConstantIslandSize(Function);
|
2017-09-20 10:43:01 -07:00
|
|
|
auto CodeMark = DataMark + CISize;
|
|
|
|
|
auto DataMarkSym = NewSymbol;
|
|
|
|
|
DataMarkSym.st_name = AddToStrTab("$d");
|
|
|
|
|
DataMarkSym.st_value = DataMark;
|
|
|
|
|
DataMarkSym.st_size = 0;
|
|
|
|
|
DataMarkSym.setType(ELF::STT_NOTYPE);
|
|
|
|
|
DataMarkSym.setBinding(ELF::STB_LOCAL);
|
|
|
|
|
auto CodeMarkSym = DataMarkSym;
|
|
|
|
|
CodeMarkSym.st_name = AddToStrTab("$x");
|
|
|
|
|
CodeMarkSym.st_value = CodeMark;
|
2017-11-09 16:59:18 -08:00
|
|
|
Write(0, reinterpret_cast<const char *>(&DataMarkSym),
|
|
|
|
|
sizeof(DataMarkSym));
|
|
|
|
|
Write(0, reinterpret_cast<const char *>(&CodeMarkSym),
|
|
|
|
|
sizeof(CodeMarkSym));
|
|
|
|
|
}
|
|
|
|
|
if (!PatchExisting && Function->hasConstantIsland() &&
|
|
|
|
|
Function->isSplit()) {
|
|
|
|
|
auto DataMark = Function->getOutputColdDataAddress();
|
2017-11-14 20:05:11 -08:00
|
|
|
auto CISize = getConstantIslandSize(Function);
|
2017-11-09 16:59:18 -08:00
|
|
|
auto CodeMark = DataMark + CISize;
|
|
|
|
|
auto DataMarkSym = NewSymbol;
|
|
|
|
|
DataMarkSym.st_name = AddToStrTab("$d");
|
|
|
|
|
DataMarkSym.st_value = DataMark;
|
|
|
|
|
DataMarkSym.st_size = 0;
|
|
|
|
|
DataMarkSym.setType(ELF::STT_NOTYPE);
|
|
|
|
|
DataMarkSym.setBinding(ELF::STB_LOCAL);
|
|
|
|
|
auto CodeMarkSym = DataMarkSym;
|
|
|
|
|
CodeMarkSym.st_name = AddToStrTab("$x");
|
|
|
|
|
CodeMarkSym.st_value = CodeMark;
|
2017-09-20 10:43:01 -07:00
|
|
|
Write(0, reinterpret_cast<const char *>(&DataMarkSym),
|
|
|
|
|
sizeof(DataMarkSym));
|
|
|
|
|
Write(0, reinterpret_cast<const char *>(&CodeMarkSym),
|
|
|
|
|
sizeof(CodeMarkSym));
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
2018-04-20 20:03:31 -07:00
|
|
|
uint32_t OldSectionIndex = NewSymbol.st_shndx;
|
|
|
|
|
auto *BD = !Function ? BC->getBinaryDataAtAddress(NewSymbol.st_value)
|
|
|
|
|
: nullptr;
|
|
|
|
|
if (BD && BD->isMoved() && !BD->isJumpTable()) {
|
|
|
|
|
assert((!BD->getSize() ||
|
|
|
|
|
!NewSymbol.st_size ||
|
|
|
|
|
NewSymbol.st_size == BD->getSize()) &&
|
|
|
|
|
"sizes must match");
|
|
|
|
|
|
|
|
|
|
auto &OutputSection = BD->getOutputSection();
|
|
|
|
|
|
|
|
|
|
assert(SectionNameMap.count(OutputSection.getName()));
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: moving " << BD->getName() << " from "
|
|
|
|
|
<< *BC->getSectionNameForAddress(NewSymbol.st_value)
|
|
|
|
|
<< " (" << NewSymbol.st_shndx << ") to "
|
|
|
|
|
<< OutputSection.getName() << " ("
|
|
|
|
|
<< SectionNameMap[OutputSection.getName()] << ")\n");
|
|
|
|
|
OldSectionIndex = ELF::SHN_LORESERVE;
|
|
|
|
|
NewSymbol.st_shndx = SectionNameMap[OutputSection.getName()];
|
|
|
|
|
|
|
|
|
|
// TODO: use getNewValueForSymbol()?
|
|
|
|
|
NewSymbol.st_value = BD->getOutputAddress();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (OldSectionIndex < ELF::SHN_LORESERVE) {
|
|
|
|
|
NewSymbol.st_shndx = NewSectionIndex[OldSectionIndex];
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
// Detect local syms in the text section that we didn't update
|
2017-06-16 20:04:43 -07:00
|
|
|
// and were preserved by the linker to support relocations against
|
2017-06-27 16:25:59 -07:00
|
|
|
// .text (t15274167). Remove then from the symtab.
|
2017-09-20 10:43:01 -07:00
|
|
|
if (NewSymbol.getType() == ELF::STT_NOTYPE &&
|
2017-06-16 20:04:43 -07:00
|
|
|
NewSymbol.getBinding() == ELF::STB_LOCAL &&
|
|
|
|
|
NewSymbol.st_size == 0) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto ExpectedSec = File->getELFFile()->getSection(NewSymbol.st_shndx);
|
|
|
|
|
if (ExpectedSec) {
|
|
|
|
|
auto Section = *ExpectedSec;
|
2017-06-16 20:04:43 -07:00
|
|
|
if (Section->sh_type == ELF::SHT_PROGBITS &&
|
|
|
|
|
Section->sh_flags & ELF::SHF_ALLOC &&
|
|
|
|
|
Section->sh_flags & ELF::SHF_EXECINSTR) {
|
2017-06-27 16:25:59 -07:00
|
|
|
// This will cause the symbol to not be emitted if we are
|
|
|
|
|
// creating a new symtab from scratch instead of patching one.
|
|
|
|
|
if (!PatchExisting)
|
|
|
|
|
continue;
|
|
|
|
|
// If patching an existing symtab, patch this value to zero.
|
2017-06-16 20:04:43 -07:00
|
|
|
NewSymbol.st_value = 0;
|
|
|
|
|
}
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
} else {
|
|
|
|
|
consumeError(ExpectedSec.takeError());
|
2017-06-16 20:04:43 -07:00
|
|
|
}
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
auto SymbolName = Symbol.getName(StringSection);
|
|
|
|
|
assert(SymbolName && "cannot get symbol name");
|
|
|
|
|
|
|
|
|
|
auto updateSymbolValue = [&](const StringRef Name, unsigned &IsUpdated) {
|
|
|
|
|
NewSymbol.st_value = getNewValueForSymbol(Name);
|
|
|
|
|
NewSymbol.st_shndx = ELF::SHN_ABS;
|
|
|
|
|
outs() << "BOLT-INFO: setting " << Name << " to 0x"
|
|
|
|
|
<< Twine::utohexstr(NewSymbol.st_value) << '\n';
|
|
|
|
|
++IsUpdated;
|
|
|
|
|
return true;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (opts::HotText && (*SymbolName == "__hot_start" ||
|
|
|
|
|
*SymbolName == "__hot_end"))
|
|
|
|
|
updateSymbolValue(*SymbolName, IsHotTextUpdated);
|
|
|
|
|
|
|
|
|
|
if (opts::HotData && (*SymbolName == "__hot_data_start" ||
|
|
|
|
|
*SymbolName == "__hot_data_end"))
|
|
|
|
|
updateSymbolValue(*SymbolName, IsHotDataUpdated);
|
|
|
|
|
|
|
|
|
|
if (opts::UpdateEnd && *SymbolName == "_end") {
|
|
|
|
|
NewSymbol.st_value = getNewValueForSymbol(*SymbolName);
|
|
|
|
|
NewSymbol.st_shndx = ELF::SHN_ABS;
|
|
|
|
|
outs() << "BOLT-INFO: setting " << *SymbolName << " to 0x"
|
|
|
|
|
<< Twine::utohexstr(NewSymbol.st_value) << '\n';
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Write((&Symbol - cantFail(Obj->symbols(Section)).begin()) *
|
|
|
|
|
sizeof(Elf_Sym),
|
2017-06-27 16:25:59 -07:00
|
|
|
reinterpret_cast<const char *>(&NewSymbol), sizeof(NewSymbol));
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2017-10-10 18:06:45 -07:00
|
|
|
|
|
|
|
|
assert((!IsHotTextUpdated || IsHotTextUpdated == 2) &&
|
|
|
|
|
"either none or both __hot_start/__hot_end symbols were expected");
|
2018-04-20 20:03:31 -07:00
|
|
|
assert((!IsHotDataUpdated || IsHotDataUpdated == 2) &&
|
|
|
|
|
"either none or both __hot_data_start/__hot_data_end symbols were expected");
|
|
|
|
|
|
|
|
|
|
auto addSymbol = [&](const std::string &Name) {
|
|
|
|
|
Elf_Sym Symbol;
|
|
|
|
|
Symbol.st_value = getNewValueForSymbol(Name);
|
|
|
|
|
Symbol.st_shndx = ELF::SHN_ABS;
|
|
|
|
|
Symbol.st_name = AddToStrTab(Name);
|
|
|
|
|
Symbol.st_size = 0;
|
|
|
|
|
Symbol.st_other = 0;
|
|
|
|
|
Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE);
|
|
|
|
|
|
|
|
|
|
outs() << "BOLT-INFO: setting " << Name << " to 0x"
|
|
|
|
|
<< Twine::utohexstr(Symbol.st_value) << '\n';
|
|
|
|
|
|
|
|
|
|
Write(0, reinterpret_cast<const char *>(&Symbol), sizeof(Symbol));
|
|
|
|
|
};
|
2017-10-10 18:06:45 -07:00
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
if (opts::HotText && !IsHotTextUpdated && !PatchExisting) {
|
2017-10-10 18:06:45 -07:00
|
|
|
addSymbol("__hot_start");
|
|
|
|
|
addSymbol("__hot_end");
|
2018-07-08 12:14:08 -07:00
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2018-07-08 12:14:08 -07:00
|
|
|
if (opts::HotData && !IsHotDataUpdated && !PatchExisting) {
|
|
|
|
|
addSymbol("__hot_data_start");
|
|
|
|
|
addSymbol("__hot_data_end");
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Update dynamic symbol table.
|
|
|
|
|
const Elf_Shdr *DynSymSection = nullptr;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (const Elf_Shdr &Section : cantFail(Obj->sections())) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Section.sh_type == ELF::SHT_DYNSYM) {
|
|
|
|
|
DynSymSection = &Section;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(DynSymSection && "no dynamic symbol table found");
|
2018-04-20 20:03:31 -07:00
|
|
|
updateSymbolTable(/*patch existing table?*/ true,
|
|
|
|
|
DynSymSection,
|
2017-06-27 16:25:59 -07:00
|
|
|
[&](size_t Offset, const char *Buf, size_t Size) {
|
|
|
|
|
Out->os().pwrite(Buf, Size,
|
|
|
|
|
DynSymSection->sh_offset + Offset);
|
|
|
|
|
},
|
|
|
|
|
[](StringRef) -> size_t { return 0; });
|
|
|
|
|
|
|
|
|
|
// (re)create regular symbol table.
|
2016-09-27 19:09:38 -07:00
|
|
|
const Elf_Shdr *SymTabSection = nullptr;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (const auto &Section : cantFail(Obj->sections())) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Section.sh_type == ELF::SHT_SYMTAB) {
|
|
|
|
|
SymTabSection = &Section;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!SymTabSection) {
|
|
|
|
|
errs() << "BOLT-WARNING: no symbol table found\n";
|
|
|
|
|
return;
|
|
|
|
|
}
|
2017-06-27 16:25:59 -07:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const Elf_Shdr *StrTabSection =
|
|
|
|
|
cantFail(Obj->getSection(SymTabSection->sh_link));
|
2017-06-27 16:25:59 -07:00
|
|
|
std::string NewContents;
|
|
|
|
|
std::string NewStrTab =
|
|
|
|
|
File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size);
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto SecName = cantFail(Obj->getSectionName(SymTabSection));
|
|
|
|
|
auto StrSecName = cantFail(Obj->getSectionName(StrTabSection));
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
updateSymbolTable(/*patch existing table?*/ false,
|
|
|
|
|
SymTabSection,
|
2017-06-27 16:25:59 -07:00
|
|
|
[&](size_t Offset, const char *Buf, size_t Size) {
|
|
|
|
|
NewContents.append(Buf, Size);
|
2018-04-20 20:03:31 -07:00
|
|
|
},
|
|
|
|
|
[&](StringRef Str) {
|
2017-06-27 16:25:59 -07:00
|
|
|
size_t Idx = NewStrTab.size();
|
|
|
|
|
NewStrTab.append(Str.data(), Str.size());
|
|
|
|
|
NewStrTab.append(1, '\0');
|
|
|
|
|
return Idx;
|
|
|
|
|
});
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
BC->registerOrUpdateNoteSection(SecName,
|
|
|
|
|
copyByteArray(NewContents),
|
|
|
|
|
NewContents.size(),
|
|
|
|
|
/*Alignment=*/1,
|
|
|
|
|
/*IsReadOnly=*/true,
|
|
|
|
|
ELF::SHT_SYMTAB);
|
|
|
|
|
|
|
|
|
|
BC->registerOrUpdateNoteSection(StrSecName,
|
|
|
|
|
copyByteArray(NewStrTab),
|
|
|
|
|
NewStrTab.size(),
|
|
|
|
|
/*Alignment=*/1,
|
|
|
|
|
/*IsReadOnly=*/true,
|
|
|
|
|
ELF::SHT_STRTAB);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
void RewriteInstance::patchELFRelaPLT(ELFObjectFile<ELFT> *File) {
|
|
|
|
|
auto &OS = Out->os();
|
|
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
if (!RelaPLTSection) {
|
2016-09-27 19:09:38 -07:00
|
|
|
errs() << "BOLT-INFO: no .rela.plt section found\n";
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
for (const auto &Rel : RelaPLTSection->getSectionRef().relocations()) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Rel.getType() == ELF::R_X86_64_IRELATIVE) {
|
|
|
|
|
DataRefImpl DRI = Rel.getRawDataRefImpl();
|
|
|
|
|
const auto *RelA = File->getRela(DRI);
|
|
|
|
|
auto Address = RelA->r_addend;
|
|
|
|
|
auto NewAddress = getNewFunctionAddress(Address);
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: patching IRELATIVE .rela.plt entry 0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << " with 0x"
|
|
|
|
|
<< Twine::utohexstr(NewAddress) << '\n');
|
|
|
|
|
auto NewRelA = *RelA;
|
|
|
|
|
NewRelA.r_addend = NewAddress;
|
|
|
|
|
OS.pwrite(reinterpret_cast<const char *>(&NewRelA), sizeof(NewRelA),
|
|
|
|
|
reinterpret_cast<const char *>(RelA) - File->getData().data());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) {
|
|
|
|
|
auto &OS = Out->os();
|
|
|
|
|
|
|
|
|
|
SectionRef GOTSection;
|
|
|
|
|
for (const auto &Section : File->sections()) {
|
|
|
|
|
StringRef SectionName;
|
|
|
|
|
Section.getName(SectionName);
|
|
|
|
|
if (SectionName == ".got") {
|
|
|
|
|
GOTSection = Section;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!GOTSection.getObject()) {
|
|
|
|
|
errs() << "BOLT-INFO: no .got section found\n";
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
StringRef GOTContents;
|
|
|
|
|
GOTSection.getContents(GOTContents);
|
|
|
|
|
for (const uint64_t *GOTEntry =
|
|
|
|
|
reinterpret_cast<const uint64_t *>(GOTContents.data());
|
|
|
|
|
GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() +
|
|
|
|
|
GOTContents.size());
|
|
|
|
|
++GOTEntry) {
|
|
|
|
|
if (auto NewAddress = getNewFunctionAddress(*GOTEntry)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x"
|
|
|
|
|
<< Twine::utohexstr(*GOTEntry) << " with 0x"
|
|
|
|
|
<< Twine::utohexstr(NewAddress) << '\n');
|
|
|
|
|
OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress),
|
|
|
|
|
reinterpret_cast<const char *>(GOTEntry) - File->getData().data());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
|
|
|
|
|
auto *Obj = File->getELFFile();
|
|
|
|
|
auto &OS = Out->os();
|
|
|
|
|
|
|
|
|
|
using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
|
|
|
|
|
using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
|
|
|
|
|
|
|
|
|
|
// Locate DYNAMIC by looking through program headers.
|
|
|
|
|
uint64_t DynamicOffset = 0;
|
|
|
|
|
const Elf_Phdr *DynamicPhdr = 0;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
for (auto &Phdr : cantFail(Obj->program_headers())) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Phdr.p_type == ELF::PT_DYNAMIC) {
|
|
|
|
|
DynamicOffset = Phdr.p_offset;
|
|
|
|
|
DynamicPhdr = &Phdr;
|
|
|
|
|
assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(DynamicPhdr && "missing dynamic in ELF binary");
|
|
|
|
|
|
2017-08-04 11:21:05 -07:00
|
|
|
bool ZNowSet = false;
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Go through all dynamic entries and patch functions addresses with
|
|
|
|
|
// new ones.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const Elf_Dyn *DTB = cantFail(Obj->dynamic_table_begin(DynamicPhdr),
|
|
|
|
|
"error accessing dynamic table");
|
|
|
|
|
const Elf_Dyn *DTE = cantFail(Obj->dynamic_table_end(DynamicPhdr),
|
|
|
|
|
"error accessing dynamic table");
|
|
|
|
|
for (auto *DE = DTB; DE != DTE; ++DE) {
|
2016-09-27 19:09:38 -07:00
|
|
|
auto NewDE = *DE;
|
|
|
|
|
bool ShouldPatch = true;
|
|
|
|
|
switch (DE->getTag()) {
|
|
|
|
|
default:
|
|
|
|
|
ShouldPatch = false;
|
|
|
|
|
break;
|
|
|
|
|
case ELF::DT_INIT:
|
|
|
|
|
case ELF::DT_FINI:
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations) {
|
2017-08-04 11:21:05 -07:00
|
|
|
if (auto NewAddress = getNewFunctionAddress(DE->getPtr())) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type "
|
|
|
|
|
<< DE->getTag() << '\n');
|
|
|
|
|
NewDE.d_un.d_ptr = NewAddress;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case ELF::DT_FLAGS:
|
|
|
|
|
if (BC->RequiresZNow) {
|
|
|
|
|
NewDE.d_un.d_val |= ELF::DF_BIND_NOW;
|
|
|
|
|
ZNowSet = true;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case ELF::DT_FLAGS_1:
|
|
|
|
|
if (BC->RequiresZNow) {
|
|
|
|
|
NewDE.d_un.d_val |= ELF::DF_1_NOW;
|
|
|
|
|
ZNowSet = true;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (ShouldPatch) {
|
|
|
|
|
OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
DynamicOffset + (DE - DTB) * sizeof(*DE));
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
}
|
2017-08-04 11:21:05 -07:00
|
|
|
|
|
|
|
|
if (BC->RequiresZNow && !ZNowSet) {
|
|
|
|
|
errs() << "BOLT-ERROR: output binary requires immediate relocation "
|
|
|
|
|
"processing which depends on DT_FLAGS or DT_FLAGS_1 presence in "
|
|
|
|
|
".dynamic. Please re-link the binary with -znow.\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
|
2016-12-21 17:13:56 -08:00
|
|
|
const auto *Function = getBinaryFunctionAtAddress(OldAddress);
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!Function)
|
|
|
|
|
return 0;
|
2017-05-08 22:51:36 -07:00
|
|
|
return Function->getOutputAddress();
|
2016-02-08 10:02:48 -08:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
void RewriteInstance::rewriteFile() {
|
2016-09-27 19:09:38 -07:00
|
|
|
auto &OS = Out->os();
|
|
|
|
|
|
2016-02-08 10:02:48 -08:00
|
|
|
// We obtain an asm-specific writer so that we can emit nops in an
|
|
|
|
|
// architecture-specific way at the end of the function.
|
2015-11-23 17:54:18 -08:00
|
|
|
auto MCE = BC->TheTarget->createMCCodeEmitter(*BC->MII, *BC->MRI, *BC->Ctx);
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto MAB =
|
|
|
|
|
BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions());
|
|
|
|
|
std::unique_ptr<MCStreamer> Streamer(BC->TheTarget->createMCObjectStreamer(
|
|
|
|
|
*BC->TheTriple, *BC->Ctx, std::unique_ptr<MCAsmBackend>(MAB), OS,
|
|
|
|
|
std::unique_ptr<MCCodeEmitter>(MCE), *BC->STI,
|
|
|
|
|
/* RelaxAll */ false,
|
|
|
|
|
/*IncrementalLinkerCompatible */ false,
|
|
|
|
|
/* DWARFMustBeAtTheEnd */ false));
|
2016-03-11 11:30:30 -08:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
auto &Writer = static_cast<MCObjectStreamer *>(Streamer.get())
|
|
|
|
|
->getAssembler()
|
|
|
|
|
.getWriter();
|
|
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
// Make sure output stream has enough reserved space, otherwise
|
|
|
|
|
// pwrite() will fail.
|
2017-01-17 15:49:59 -08:00
|
|
|
auto Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress));
|
2017-05-25 10:29:38 -07:00
|
|
|
(void)Offset;
|
2017-01-17 15:49:59 -08:00
|
|
|
assert(Offset == getFileOffsetForAddress(NextAvailableAddress) &&
|
2016-02-08 10:02:48 -08:00
|
|
|
"error resizing output file");
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (!BC->HasRelocations) {
|
2016-09-27 19:09:38 -07:00
|
|
|
// Overwrite functions in the output file.
|
|
|
|
|
uint64_t CountOverwrittenFunctions = 0;
|
|
|
|
|
uint64_t OverwrittenScore = 0;
|
|
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
auto &Function = BFI.second;
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Function.getImageAddress() == 0 || Function.getImageSize() == 0)
|
|
|
|
|
continue;
|
2016-04-05 19:35:45 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Function.getImageSize() > Function.getMaxSize()) {
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: new function size (0x"
|
|
|
|
|
<< Twine::utohexstr(Function.getImageSize())
|
|
|
|
|
<< ") is larger than maximum allowed size (0x"
|
|
|
|
|
<< Twine::utohexstr(Function.getMaxSize())
|
|
|
|
|
<< ") for function " << Function << '\n';
|
|
|
|
|
}
|
|
|
|
|
FailedAddresses.emplace_back(Function.getAddress());
|
|
|
|
|
continue;
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Function.isSplit() && (Function.cold().getImageAddress() == 0 ||
|
|
|
|
|
Function.cold().getImageSize() == 0))
|
|
|
|
|
continue;
|
2016-09-08 14:52:26 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
OverwrittenScore += Function.getFunctionScore();
|
|
|
|
|
// Overwrite function in the output file.
|
|
|
|
|
if (opts::Verbosity >= 2) {
|
2018-06-14 14:27:20 -07:00
|
|
|
outs() << "BOLT: rewriting function \"" << Function << "\"\n";
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2017-01-17 15:49:59 -08:00
|
|
|
OS.pwrite(reinterpret_cast<char *>(Function.getImageAddress()),
|
2017-05-08 22:51:36 -07:00
|
|
|
Function.getImageSize(),
|
|
|
|
|
Function.getFileOffset());
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
// Write nops at the end of the function.
|
2017-01-17 15:49:59 -08:00
|
|
|
auto Pos = OS.tell();
|
|
|
|
|
OS.seek(Function.getFileOffset() + Function.getImageSize());
|
2016-09-27 19:09:38 -07:00
|
|
|
MAB->writeNopData(Function.getMaxSize() - Function.getImageSize(),
|
|
|
|
|
&Writer);
|
2017-01-17 15:49:59 -08:00
|
|
|
OS.seek(Pos);
|
|
|
|
|
|
|
|
|
|
// Write jump tables if updating in-place.
|
|
|
|
|
if (opts::JumpTables == JTS_BASIC) {
|
|
|
|
|
for (auto &JTI : Function.JumpTables) {
|
2017-11-14 20:05:11 -08:00
|
|
|
auto *JT = JTI.second;
|
2018-04-20 20:03:31 -07:00
|
|
|
auto &Section = JT->getOutputSection();
|
2017-11-14 20:05:11 -08:00
|
|
|
Section.setFileOffset(getFileOffsetForAddress(JT->getAddress()));
|
|
|
|
|
assert(Section.getFileOffset() && "no matching offset in file");
|
|
|
|
|
OS.pwrite(reinterpret_cast<const char*>(Section.getOutputData()),
|
|
|
|
|
Section.getOutputSize(),
|
|
|
|
|
Section.getFileOffset());
|
2017-01-17 15:49:59 -08:00
|
|
|
}
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
if (!Function.isSplit()) {
|
|
|
|
|
++CountOverwrittenFunctions;
|
|
|
|
|
if (opts::MaxFunctions &&
|
|
|
|
|
CountOverwrittenFunctions == opts::MaxFunctions) {
|
2018-06-14 14:27:20 -07:00
|
|
|
outs() << "BOLT: maximum number of functions reached\n";
|
2016-09-27 19:09:38 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Write cold part
|
|
|
|
|
if (opts::Verbosity >= 2) {
|
2018-06-14 14:27:20 -07:00
|
|
|
outs() << "BOLT: rewriting function \"" << Function
|
2016-09-27 19:09:38 -07:00
|
|
|
<< "\" (cold part)\n";
|
|
|
|
|
}
|
2017-05-08 22:51:36 -07:00
|
|
|
OS.pwrite(reinterpret_cast<char*>(Function.cold().getImageAddress()),
|
|
|
|
|
Function.cold().getImageSize(),
|
|
|
|
|
Function.cold().getFileOffset());
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
// FIXME: write nops after cold part too.
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
++CountOverwrittenFunctions;
|
|
|
|
|
if (opts::MaxFunctions &&
|
|
|
|
|
CountOverwrittenFunctions == opts::MaxFunctions) {
|
2018-06-14 14:27:20 -07:00
|
|
|
outs() << "BOLT: maximum number of functions reached\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Print function statistics.
|
|
|
|
|
outs() << "BOLT: " << CountOverwrittenFunctions
|
|
|
|
|
<< " out of " << BinaryFunctions.size()
|
|
|
|
|
<< " functions were overwritten.\n";
|
2017-11-28 09:57:21 -08:00
|
|
|
if (BC->TotalScore != 0) {
|
|
|
|
|
double Coverage = OverwrittenScore / (double) BC->TotalScore * 100.0;
|
2016-09-27 19:09:38 -07:00
|
|
|
outs() << format("BOLT: Rewritten functions cover %.2lf", Coverage)
|
|
|
|
|
<< "% of the execution count of simple functions of "
|
|
|
|
|
"this binary.\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
}
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations && opts::TrapOldCode) {
|
2017-01-17 15:49:59 -08:00
|
|
|
auto SavedPos = OS.tell();
|
2016-09-27 19:09:38 -07:00
|
|
|
// Overwrite function body to make sure we never execute these instructions.
|
|
|
|
|
for (auto &BFI : BinaryFunctions) {
|
|
|
|
|
auto &BF = BFI.second;
|
|
|
|
|
if (!BF.getFileOffset())
|
|
|
|
|
continue;
|
2017-01-17 15:49:59 -08:00
|
|
|
OS.seek(BF.getFileOffset());
|
2016-09-27 19:09:38 -07:00
|
|
|
for (unsigned I = 0; I < BF.getMaxSize(); ++I)
|
2017-01-17 15:49:59 -08:00
|
|
|
OS.write((unsigned char)
|
2016-09-27 19:09:38 -07:00
|
|
|
Streamer->getContext().getAsmInfo()->getTrapFillValue());
|
|
|
|
|
}
|
2017-01-17 15:49:59 -08:00
|
|
|
OS.seek(SavedPos);
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2017-01-17 15:49:59 -08:00
|
|
|
// Write all non-local sections, i.e. those not emitted with the function.
|
2017-11-14 20:05:11 -08:00
|
|
|
for (auto &Section : BC->allocatableSections()) {
|
|
|
|
|
if (!Section.isFinalized() || Section.isLocal())
|
2016-02-08 10:02:48 -08:00
|
|
|
continue;
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
2018-06-14 14:27:20 -07:00
|
|
|
outs() << "BOLT: writing new section " << Section.getName()
|
[BOLT][Refactoring] Isolate changes to MC layer
Summary:
Changes that we made to MCInst, MCOperand, MCExpr, etc. are now all
moved into tools/llvm-bolt. That required a change to the way we handle
annotations and any extra operands for MCInst.
Any MCPlus information is now attached via an extra operand of type
MCInst with an opcode ANNOTATION_LABEL. Since this operand is MCInst, we
attach extra info as operands to this instruction. For first-level
annotations use functions to access the information, such as
getConditionalTailCall() or getEHInfo(), etc. For the rest, optional or
second-class annotations, use a general named-annotation interface such
as getAnnotationAs<uint64_t>(Inst, "Count").
I did a test on HHVM binary, and a memory consumption went down a little
bit while the runtime remained the same.
(cherry picked from FBD7405412)
2018-03-19 18:32:12 -07:00
|
|
|
<< "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress())
|
|
|
|
|
<< "\n of size " << Section.getOutputSize()
|
|
|
|
|
<< "\n at offset " << Section.getFileOffset() << '\n';
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2018-02-01 16:33:43 -08:00
|
|
|
OS.pwrite(reinterpret_cast<const char*>(Section.getOutputData()),
|
|
|
|
|
Section.getOutputSize(),
|
|
|
|
|
Section.getFileOffset());
|
2016-02-08 10:02:48 -08:00
|
|
|
}
|
|
|
|
|
|
2016-11-11 14:33:34 -08:00
|
|
|
// If .eh_frame is present create .eh_frame_hdr.
|
2018-02-01 16:33:43 -08:00
|
|
|
if (EHFrameSection && EHFrameSection->isFinalized()) {
|
|
|
|
|
writeEHFrameHeader();
|
2015-12-18 17:00:46 -08:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Patch program header table.
|
|
|
|
|
patchELFPHDRTable();
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2017-05-16 17:29:31 -07:00
|
|
|
// Finalize memory image of section string table.
|
|
|
|
|
finalizeSectionStringTable();
|
|
|
|
|
|
2017-09-20 10:43:01 -07:00
|
|
|
// Update symbol tables.
|
|
|
|
|
patchELFSymTabs();
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Copy non-allocatable sections once allocatable part is finished.
|
|
|
|
|
rewriteNoteSections();
|
|
|
|
|
|
2017-08-04 11:21:05 -07:00
|
|
|
// Patch dynamic section/segment.
|
|
|
|
|
patchELFDynamic();
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations) {
|
2017-01-17 15:49:59 -08:00
|
|
|
patchELFRelaPLT();
|
|
|
|
|
patchELFGOT();
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Update ELF book-keeping info.
|
|
|
|
|
patchELFSectionHeaderTable();
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
if (opts::PrintSections) {
|
|
|
|
|
outs() << "BOLT-INFO: Sections after processing:\n";
|
|
|
|
|
BC->printSections(outs());
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
Out->keep();
|
2016-11-15 10:40:00 -08:00
|
|
|
|
|
|
|
|
// If requested, open again the binary we just wrote to dump its EH Frame
|
|
|
|
|
if (opts::DumpEHFrame) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Expected<OwningBinary<Binary>> BinaryOrErr =
|
2016-11-15 10:40:00 -08:00
|
|
|
createBinary(opts::OutputFilename);
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (auto E = BinaryOrErr.takeError())
|
|
|
|
|
report_error(opts::OutputFilename, std::move(E));
|
2016-11-15 10:40:00 -08:00
|
|
|
Binary &Binary = *BinaryOrErr.get().getBinary();
|
|
|
|
|
|
|
|
|
|
if (auto *E = dyn_cast<ELFObjectFileBase>(&Binary)) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto DwCtx = DWARFContext::create(*E);
|
|
|
|
|
const auto &EHFrame = DwCtx->getEHFrame();
|
2016-11-15 10:40:00 -08:00
|
|
|
outs() << "BOLT-INFO: Dumping rewritten .eh_frame\n";
|
2018-03-30 15:49:34 -07:00
|
|
|
EHFrame->dump(outs(), &*BC->MRI, NoneType());
|
2016-11-15 10:40:00 -08:00
|
|
|
}
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
2016-03-02 18:40:10 -08:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
void RewriteInstance::writeEHFrameHeader() {
|
|
|
|
|
DWARFDebugFrame NewEHFrame(true, EHFrameSection->getFileAddress());
|
|
|
|
|
NewEHFrame.parse(DWARFDataExtractor(EHFrameSection->getOutputContents(),
|
|
|
|
|
BC->AsmInfo->isLittleEndian(),
|
|
|
|
|
BC->AsmInfo->getCodePointerSize()));
|
|
|
|
|
|
|
|
|
|
auto OldEHFrameSection = BC->getUniqueSectionByName(".eh_frame_old");
|
|
|
|
|
assert(OldEHFrameSection && "expected .eh_frame_old to be present");
|
|
|
|
|
DWARFDebugFrame OldEHFrame(true, OldEHFrameSection->getFileAddress());
|
|
|
|
|
OldEHFrame.parse(DWARFDataExtractor(OldEHFrameSection->getOutputContents(),
|
|
|
|
|
BC->AsmInfo->isLittleEndian(),
|
|
|
|
|
BC->AsmInfo->getCodePointerSize()));
|
2016-11-11 14:33:34 -08:00
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n");
|
|
|
|
|
|
2017-04-06 10:49:59 -07:00
|
|
|
NextAvailableAddress =
|
|
|
|
|
appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign);
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
const auto EHFrameHdrFileAddress = NextAvailableAddress;
|
|
|
|
|
const auto EHFrameHdrFileOffset =
|
|
|
|
|
getFileOffsetForAddress(NextAvailableAddress);
|
2016-11-11 14:33:34 -08:00
|
|
|
|
|
|
|
|
auto NewEHFrameHdr =
|
|
|
|
|
CFIRdWrt->generateEHFrameHeader(OldEHFrame,
|
|
|
|
|
NewEHFrame,
|
2018-02-01 16:33:43 -08:00
|
|
|
EHFrameHdrFileAddress,
|
2016-11-11 14:33:34 -08:00
|
|
|
FailedAddresses);
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch");
|
|
|
|
|
Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size());
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
const auto Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
|
|
|
|
|
/*IsText=*/false,
|
|
|
|
|
/*IsAllocatable=*/true);
|
|
|
|
|
auto &EHFrameHdrSec = BC->registerOrUpdateSection(".eh_frame_hdr",
|
|
|
|
|
ELF::SHT_PROGBITS,
|
|
|
|
|
Flags,
|
|
|
|
|
nullptr,
|
|
|
|
|
NewEHFrameHdr.size(),
|
|
|
|
|
/*Alignment=*/1);
|
|
|
|
|
EHFrameHdrSec.setFileOffset(EHFrameHdrFileOffset);
|
|
|
|
|
EHFrameHdrSec.setFileAddress(EHFrameHdrFileAddress);
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
NextAvailableAddress += EHFrameHdrSec.getOutputSize();
|
2016-11-11 14:33:34 -08:00
|
|
|
|
|
|
|
|
// Merge .eh_frame and .eh_frame_old so that gdb can locate all FDEs.
|
2018-02-01 16:33:43 -08:00
|
|
|
const auto EHFrameSectionSize = (OldEHFrameSection->getFileAddress() +
|
|
|
|
|
OldEHFrameSection->getOutputSize() -
|
|
|
|
|
EHFrameSection->getFileAddress());
|
|
|
|
|
|
|
|
|
|
EHFrameSection =
|
|
|
|
|
BC->registerOrUpdateSection(".eh_frame",
|
|
|
|
|
EHFrameSection->getELFType(),
|
|
|
|
|
EHFrameSection->getELFFlags(),
|
|
|
|
|
EHFrameSection->getOutputData(),
|
|
|
|
|
EHFrameSectionSize,
|
|
|
|
|
EHFrameSection->getAlignment(),
|
|
|
|
|
EHFrameSection->isLocal());
|
|
|
|
|
|
|
|
|
|
BC->deregisterSection(*OldEHFrameSection);
|
|
|
|
|
|
2016-11-11 14:33:34 -08:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is "
|
2018-02-01 16:33:43 -08:00
|
|
|
<< EHFrameSection->getOutputSize() << '\n');
|
2016-11-11 14:33:34 -08:00
|
|
|
}
|
|
|
|
|
|
2017-01-17 15:49:59 -08:00
|
|
|
uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const {
|
|
|
|
|
// Check if it's possibly part of the new segment.
|
|
|
|
|
if (Address >= NewTextSegmentAddress) {
|
|
|
|
|
return Address - NewTextSegmentAddress + NewTextSegmentOffset;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Find an existing segment that matches the address.
|
|
|
|
|
const auto SegmentInfoI = EFMM->SegmentMapInfo.upper_bound(Address);
|
|
|
|
|
if (SegmentInfoI == EFMM->SegmentMapInfo.begin())
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
const auto &SegmentInfo = std::prev(SegmentInfoI)->second;
|
|
|
|
|
if (Address < SegmentInfo.Address ||
|
|
|
|
|
Address >= SegmentInfo.Address + SegmentInfo.FileSize)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
return SegmentInfo.FileOffset + Address - SegmentInfo.Address;
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-07 12:20:46 -08:00
|
|
|
bool RewriteInstance::willOverwriteSection(StringRef SectionName) {
|
2017-09-20 10:43:01 -07:00
|
|
|
for (auto &OverwriteName : SectionsToOverwrite) {
|
|
|
|
|
if (SectionName == OverwriteName)
|
|
|
|
|
return true;
|
2016-05-16 17:02:17 -07:00
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
auto Section = BC->getUniqueSectionByName(SectionName);
|
|
|
|
|
return Section && Section->isAllocatable() && Section->isFinalized();
|
2016-05-16 17:02:17 -07:00
|
|
|
}
|
2016-08-22 14:24:09 -07:00
|
|
|
|
|
|
|
|
BinaryFunction *
|
2016-09-27 19:09:38 -07:00
|
|
|
RewriteInstance::getBinaryFunctionContainingAddress(uint64_t Address,
|
2017-02-21 14:18:09 -08:00
|
|
|
bool CheckPastEnd,
|
|
|
|
|
bool UseMaxSize) {
|
2016-08-22 14:24:09 -07:00
|
|
|
auto FI = BinaryFunctions.upper_bound(Address);
|
|
|
|
|
if (FI == BinaryFunctions.begin())
|
|
|
|
|
return nullptr;
|
|
|
|
|
--FI;
|
2017-02-21 14:18:09 -08:00
|
|
|
|
|
|
|
|
const auto UsedSize = UseMaxSize ? FI->second.getMaxSize()
|
|
|
|
|
: FI->second.getSize();
|
|
|
|
|
|
|
|
|
|
if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
|
2016-08-22 14:24:09 -07:00
|
|
|
return nullptr;
|
|
|
|
|
return &FI->second;
|
|
|
|
|
}
|
2016-12-21 17:13:56 -08:00
|
|
|
|
|
|
|
|
const BinaryFunction *
|
|
|
|
|
RewriteInstance::getBinaryFunctionAtAddress(uint64_t Address) const {
|
2017-11-14 20:05:11 -08:00
|
|
|
if (const auto *BD = BC->getBinaryDataAtAddress(Address))
|
|
|
|
|
return BC->getFunctionForSymbol(BD->getSymbol());
|
|
|
|
|
return nullptr;
|
2016-12-21 17:13:56 -08:00
|
|
|
}
|
2017-05-16 09:27:34 -07:00
|
|
|
|
|
|
|
|
DWARFAddressRangesVector RewriteInstance::translateModuleAddressRanges(
|
|
|
|
|
const DWARFAddressRangesVector &InputRanges) const {
|
|
|
|
|
DWARFAddressRangesVector OutputRanges;
|
|
|
|
|
|
|
|
|
|
for (const auto Range : InputRanges) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
|
2017-05-16 09:27:34 -07:00
|
|
|
while (BFI != BinaryFunctions.end()) {
|
|
|
|
|
const auto &Function = BFI->second;
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (Function.getAddress() >= Range.HighPC)
|
2017-05-16 09:27:34 -07:00
|
|
|
break;
|
|
|
|
|
const auto FunctionRanges = Function.getOutputAddressRanges();
|
|
|
|
|
std::move(std::begin(FunctionRanges),
|
|
|
|
|
std::end(FunctionRanges),
|
|
|
|
|
std::back_inserter(OutputRanges));
|
|
|
|
|
std::advance(BFI, 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return OutputRanges;
|
|
|
|
|
}
|