mirror of
https://github.com/intel/llvm.git
synced 2026-01-14 03:50:17 +08:00
[BOLT] Add shrink wrapping pass
Summary: Add an implementation for shrink wrapping, a frame optimization that moves callee-saved register spills from hot prologues to cold successors. (cherry picked from FBD4983706)
This commit is contained in:
committed by
Maksim Panchenko
parent
4b485f4167
commit
d850ca3622
@@ -148,8 +148,9 @@ BinaryBasicBlock *BinaryBasicBlock::getLandingPad(const MCSymbol *Label) const {
|
||||
}
|
||||
|
||||
int32_t BinaryBasicBlock::getCFIStateAtInstr(const MCInst *Instr) const {
|
||||
assert(getFunction()->getState() == BinaryFunction::State::CFG &&
|
||||
"can only calculate CFI state when function is in active CFG state");
|
||||
assert(
|
||||
getFunction()->getState() >= BinaryFunction::State::CFG &&
|
||||
"can only calculate CFI state when function is in or past the CFG state");
|
||||
|
||||
const auto &FDEProgram = getFunction()->getFDEProgram();
|
||||
|
||||
@@ -316,6 +317,38 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB,
|
||||
return MIA->analyzeBranch(Instructions, TBB, FBB, CondBranch, UncondBranch);
|
||||
}
|
||||
|
||||
MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
|
||||
auto &BC = Function->getBinaryContext();
|
||||
auto Itr = rbegin();
|
||||
bool Check = Pos ? false : true;
|
||||
MCInst *FirstTerminator{nullptr};
|
||||
while (Itr != rend()) {
|
||||
if (!Check) {
|
||||
if (&*Itr == Pos)
|
||||
Check = true;
|
||||
++Itr;
|
||||
continue;
|
||||
}
|
||||
if (BC.MIA->isTerminator(*Itr))
|
||||
FirstTerminator = &*Itr;
|
||||
++Itr;
|
||||
}
|
||||
return FirstTerminator;
|
||||
}
|
||||
|
||||
bool BinaryBasicBlock::hasTerminatorAfter(MCInst *Pos) {
|
||||
auto &BC = Function->getBinaryContext();
|
||||
auto Itr = rbegin();
|
||||
while (Itr != rend()) {
|
||||
if (&*Itr == Pos)
|
||||
return false;
|
||||
if (BC.MIA->isTerminator(*Itr))
|
||||
return true;
|
||||
++Itr;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BinaryBasicBlock::swapConditionalSuccessors() {
|
||||
if (succ_size() != 2)
|
||||
return false;
|
||||
|
||||
@@ -617,20 +617,26 @@ public:
|
||||
return Instructions.erase(II);
|
||||
}
|
||||
|
||||
/// Retrieve iterator for \p Inst or return end iterator if instruction is not
|
||||
/// from this basic block.
|
||||
decltype(Instructions)::iterator findInstruction(const MCInst *Inst) {
|
||||
if (Instructions.empty())
|
||||
return Instructions.end();
|
||||
size_t Index = Inst - &Instructions[0];
|
||||
return Index >= Instructions.size() ? Instructions.end()
|
||||
: Instructions.begin() + Index;
|
||||
}
|
||||
|
||||
/// Replace an instruction with a sequence of instructions. Returns true
|
||||
/// if the instruction to be replaced was found and replaced.
|
||||
template <typename Itr>
|
||||
bool replaceInstruction(const MCInst *Inst, Itr Begin, Itr End) {
|
||||
auto I = Instructions.end();
|
||||
auto B = Instructions.begin();
|
||||
while (I > B) {
|
||||
--I;
|
||||
if (&*I == Inst) {
|
||||
adjustNumPseudos(*Inst, -1);
|
||||
Instructions.insert(Instructions.erase(I), Begin, End);
|
||||
adjustNumPseudos(Begin, End, 1);
|
||||
return true;
|
||||
}
|
||||
auto I = findInstruction(Inst);
|
||||
if (I != Instructions.end()) {
|
||||
adjustNumPseudos(*Inst, -1);
|
||||
Instructions.insert(Instructions.erase(I), Begin, End);
|
||||
adjustNumPseudos(Begin, End, 1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@@ -640,6 +646,23 @@ public:
|
||||
return replaceInstruction(Inst, Replacement.begin(), Replacement.end());
|
||||
}
|
||||
|
||||
/// Insert \p NewInst before \p At, which must be an existing instruction in
|
||||
/// this BB. Return a pointer to the newly inserted instruction.
|
||||
iterator insertInstruction(iterator At, MCInst &&NewInst) {
|
||||
adjustNumPseudos(NewInst, 1);
|
||||
return Instructions.emplace(At, std::move(NewInst));
|
||||
}
|
||||
|
||||
/// Helper to retrieve any terminators in \p BB before \p Pos. This is used
|
||||
/// to skip CFI instructions and to retrieve the first terminator instruction
|
||||
/// in basic blocks with two terminators (conditional jump and unconditional
|
||||
/// jump).
|
||||
MCInst *getTerminatorBefore(MCInst *Pos);
|
||||
|
||||
/// Used to identify whether an instruction is before a terminator and whether
|
||||
/// moving it to the end of the BB would render it dead code.
|
||||
bool hasTerminatorAfter(MCInst *Pos);
|
||||
|
||||
/// Split apart the instructions in this basic block starting at Inst.
|
||||
/// The instructions following Inst are removed and returned in a vector.
|
||||
std::vector<MCInst> splitInstructions(const MCInst *Inst) {
|
||||
|
||||
@@ -239,24 +239,57 @@ void BinaryContext::preprocessDebugInfo(
|
||||
}
|
||||
}
|
||||
|
||||
void BinaryContext::printCFI(raw_ostream &OS, uint32_t Operation) {
|
||||
switch(Operation) {
|
||||
case MCCFIInstruction::OpSameValue: OS << "OpSameValue"; break;
|
||||
case MCCFIInstruction::OpRememberState: OS << "OpRememberState"; break;
|
||||
case MCCFIInstruction::OpRestoreState: OS << "OpRestoreState"; break;
|
||||
case MCCFIInstruction::OpOffset: OS << "OpOffset"; break;
|
||||
case MCCFIInstruction::OpDefCfaRegister: OS << "OpDefCfaRegister"; break;
|
||||
case MCCFIInstruction::OpDefCfaOffset: OS << "OpDefCfaOffset"; break;
|
||||
case MCCFIInstruction::OpDefCfa: OS << "OpDefCfa"; break;
|
||||
case MCCFIInstruction::OpRelOffset: OS << "OpRelOffset"; break;
|
||||
case MCCFIInstruction::OpAdjustCfaOffset: OS << "OfAdjustCfaOffset"; break;
|
||||
case MCCFIInstruction::OpEscape: OS << "OpEscape"; break;
|
||||
case MCCFIInstruction::OpRestore: OS << "OpRestore"; break;
|
||||
case MCCFIInstruction::OpUndefined: OS << "OpUndefined"; break;
|
||||
case MCCFIInstruction::OpRegister: OS << "OpRegister"; break;
|
||||
case MCCFIInstruction::OpWindowSave: OS << "OpWindowSave"; break;
|
||||
case MCCFIInstruction::OpGnuArgsSize: OS << "OpGnuArgsSize"; break;
|
||||
default: OS << "Op#" << Operation; break;
|
||||
void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
|
||||
uint32_t Operation = Inst.getOperation();
|
||||
switch (Operation) {
|
||||
case MCCFIInstruction::OpSameValue:
|
||||
OS << "OpSameValue Reg" << Inst.getRegister();
|
||||
break;
|
||||
case MCCFIInstruction::OpRememberState:
|
||||
OS << "OpRememberState";
|
||||
break;
|
||||
case MCCFIInstruction::OpRestoreState:
|
||||
OS << "OpRestoreState";
|
||||
break;
|
||||
case MCCFIInstruction::OpOffset:
|
||||
OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
|
||||
break;
|
||||
case MCCFIInstruction::OpDefCfaRegister:
|
||||
OS << "OpDefCfaRegister Reg" << Inst.getRegister();
|
||||
break;
|
||||
case MCCFIInstruction::OpDefCfaOffset:
|
||||
OS << "OpDefCfaOffset " << Inst.getOffset();
|
||||
break;
|
||||
case MCCFIInstruction::OpDefCfa:
|
||||
OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
|
||||
break;
|
||||
case MCCFIInstruction::OpRelOffset:
|
||||
OS << "OpRelOffset";
|
||||
break;
|
||||
case MCCFIInstruction::OpAdjustCfaOffset:
|
||||
OS << "OfAdjustCfaOffset";
|
||||
break;
|
||||
case MCCFIInstruction::OpEscape:
|
||||
OS << "OpEscape";
|
||||
break;
|
||||
case MCCFIInstruction::OpRestore:
|
||||
OS << "OpRestore";
|
||||
break;
|
||||
case MCCFIInstruction::OpUndefined:
|
||||
OS << "OpUndefined";
|
||||
break;
|
||||
case MCCFIInstruction::OpRegister:
|
||||
OS << "OpRegister";
|
||||
break;
|
||||
case MCCFIInstruction::OpWindowSave:
|
||||
OS << "OpWindowSave";
|
||||
break;
|
||||
case MCCFIInstruction::OpGnuArgsSize:
|
||||
OS << "OpGnuArgsSize";
|
||||
break;
|
||||
default:
|
||||
OS << "Op#" << Operation;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -274,7 +307,7 @@ void BinaryContext::printInstruction(raw_ostream &OS,
|
||||
uint32_t Offset = Instruction.getOperand(0).getImm();
|
||||
OS << "\t!CFI\t$" << Offset << "\t; ";
|
||||
if (Function)
|
||||
printCFI(OS, Function->getCFIFor(Instruction)->getOperation());
|
||||
printCFI(OS, *Function->getCFIFor(Instruction));
|
||||
OS << "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -143,6 +143,12 @@ public:
|
||||
|
||||
const DataReader &DR;
|
||||
|
||||
/// Sum of execution count of all functions
|
||||
uint64_t SumExecutionCount{0};
|
||||
|
||||
/// Number of functions with profile information
|
||||
uint64_t NumProfiledFuncs{0};
|
||||
|
||||
BinaryContext(std::unique_ptr<MCContext> Ctx,
|
||||
std::unique_ptr<DWARFContext> DwCtx,
|
||||
std::unique_ptr<Triple> TheTriple,
|
||||
@@ -262,8 +268,19 @@ public:
|
||||
return Size;
|
||||
}
|
||||
|
||||
/// Return a function execution count threshold for determining whether the
|
||||
/// the function is 'hot'. Consider it hot if count is above the average exec
|
||||
/// count of profiled functions.
|
||||
uint64_t getHotThreshold() const {
|
||||
static uint64_t Threshold{0};
|
||||
if (Threshold == 0) {
|
||||
Threshold = NumProfiledFuncs ? SumExecutionCount / NumProfiledFuncs : 1;
|
||||
}
|
||||
return Threshold;
|
||||
}
|
||||
|
||||
/// Print the string name for a CFI operation.
|
||||
static void printCFI(raw_ostream &OS, uint32_t Operation);
|
||||
static void printCFI(raw_ostream &OS, const MCCFIInstruction &Inst);
|
||||
|
||||
/// Print a single MCInst in native format. If Function is non-null,
|
||||
/// the instruction will be annotated with CFI and possibly DWARF line table
|
||||
|
||||
@@ -150,7 +150,7 @@ constexpr unsigned NoRegister = 0;
|
||||
|
||||
constexpr const char *DynoStats::Desc[];
|
||||
constexpr unsigned BinaryFunction::MinAlign;
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
/// Gets debug line information for the instruction located at the given
|
||||
@@ -535,8 +535,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
||||
for (auto &Elmt : OffsetToCFI) {
|
||||
OS << format(" %08x:\t", Elmt.first);
|
||||
assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
|
||||
BinaryContext::printCFI(OS,
|
||||
FrameInstructions[Elmt.second].getOperation());
|
||||
BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
|
||||
OS << "\n";
|
||||
}
|
||||
} else {
|
||||
@@ -544,7 +543,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
||||
for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
|
||||
const MCCFIInstruction &CFI = FrameInstructions[I];
|
||||
OS << format(" %d:\t", I);
|
||||
BinaryContext::printCFI(OS, CFI.getOperation());
|
||||
BinaryContext::printCFI(OS, CFI);
|
||||
OS << "\n";
|
||||
}
|
||||
}
|
||||
@@ -3442,6 +3441,54 @@ void BinaryFunction::updateLayout(LayoutType Type,
|
||||
updateLayoutIndices();
|
||||
}
|
||||
|
||||
bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
|
||||
BinaryBasicBlock *OldDest,
|
||||
BinaryBasicBlock *NewDest) {
|
||||
auto *Instr = BB->getLastNonPseudoInstr();
|
||||
if (!Instr || !BC.MIA->isIndirectBranch(*Instr))
|
||||
return false;
|
||||
auto JTAddress = BC.MIA->getJumpTable(*Instr);
|
||||
assert(JTAddress && "Invalid jump table address");
|
||||
auto *JT = getJumpTableContainingAddress(JTAddress);
|
||||
assert(JT && "No jump table structure for this indirect branch");
|
||||
bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
|
||||
NewDest->getLabel());
|
||||
assert(Patched && "Invalid entry to be replaced in jump table");
|
||||
return true;
|
||||
}
|
||||
|
||||
BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
|
||||
BinaryBasicBlock *To) {
|
||||
// Create intermediate BB
|
||||
MCSymbol *Tmp = BC.Ctx->createTempSymbol("SplitEdge", true);
|
||||
auto NewBB = createBasicBlock(0, Tmp);
|
||||
auto NewBBPtr = NewBB.get();
|
||||
|
||||
// Update "From" BB
|
||||
auto I = From->succ_begin();
|
||||
auto BI = From->branch_info_begin();
|
||||
for (; I != From->succ_end(); ++I) {
|
||||
if (*I == To)
|
||||
break;
|
||||
++BI;
|
||||
}
|
||||
assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
|
||||
uint64_t OrigCount{BI->Count};
|
||||
uint64_t OrigMispreds{BI->MispredictedCount};
|
||||
replaceJumpTableEntryIn(From, To, NewBBPtr);
|
||||
From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);
|
||||
|
||||
NewBB->addSuccessor(To, OrigCount, OrigMispreds);
|
||||
NewBB->setExecutionCount(OrigCount);
|
||||
NewBB->setIsCold(From->isCold());
|
||||
|
||||
// Update CFI and BB layout with new intermediate BB
|
||||
std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
|
||||
NewBBs.emplace_back(std::move(NewBB));
|
||||
insertBasicBlocks(From, std::move(NewBBs), true, true);
|
||||
return NewBBPtr;
|
||||
}
|
||||
|
||||
bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
|
||||
uint64_t SymbolSize) const {
|
||||
// Some symbols are tolerated inside function bodies, others are not.
|
||||
@@ -3578,6 +3625,22 @@ BinaryFunction::JumpTable::getEntriesForAddress(const uint64_t Addr) const {
|
||||
return std::make_pair(StartIndex, EndIndex);
|
||||
}
|
||||
|
||||
bool BinaryFunction::JumpTable::replaceDestination(uint64_t JTAddress,
|
||||
const MCSymbol *OldDest,
|
||||
MCSymbol *NewDest) {
|
||||
bool Patched{false};
|
||||
const auto Range = getEntriesForAddress(JTAddress);
|
||||
for (auto I = &Entries[Range.first], E = &Entries[Range.second];
|
||||
I != E; ++I) {
|
||||
auto &Entry = *I;
|
||||
if (Entry == OldDest) {
|
||||
Patched = true;
|
||||
Entry = NewDest;
|
||||
}
|
||||
}
|
||||
return Patched;
|
||||
}
|
||||
|
||||
void BinaryFunction::JumpTable::updateOriginal(BinaryContext &BC) {
|
||||
// In non-relocation mode we have to emit jump tables in local sections.
|
||||
// This way we only overwrite them when a corresponding function is
|
||||
|
||||
@@ -624,6 +624,11 @@ public:
|
||||
/// Total number of times this jump table was used.
|
||||
uint64_t Count{0};
|
||||
|
||||
/// Change all entries of the jump table in \p JTAddress pointing to
|
||||
/// \p OldDest to \p NewDest. Return false if unsuccessful.
|
||||
bool replaceDestination(uint64_t JTAddress, const MCSymbol *OldDest,
|
||||
MCSymbol *NewDest);
|
||||
|
||||
/// Update jump table at its original location.
|
||||
void updateOriginal(BinaryContext &BC);
|
||||
|
||||
@@ -1368,6 +1373,21 @@ public:
|
||||
/// new blocks into the CFG. This must be called after updateLayout.
|
||||
void updateCFIState(BinaryBasicBlock *Start, const unsigned NumNewBlocks);
|
||||
|
||||
/// Change \p OrigDest to \p NewDest in the jump table used at the end of
|
||||
/// \p BB. Returns false if \p OrigDest couldn't be find as a valid target
|
||||
/// and no replacement took place.
|
||||
bool replaceJumpTableEntryIn(BinaryBasicBlock *BB,
|
||||
BinaryBasicBlock *OldDest,
|
||||
BinaryBasicBlock *NewDest);
|
||||
|
||||
/// Split the CFG edge <From, To> by inserting an intermediate basic block.
|
||||
/// Returns a pointer to this new intermediate basic block. BB "From" will be
|
||||
/// updated to jump to the intermediate block, which in turn will have an
|
||||
/// unconditional branch to BB "To".
|
||||
/// User needs to manually call fixBranches(). This function only creates the
|
||||
/// correct CFG edges.
|
||||
BinaryBasicBlock *splitEdge(BinaryBasicBlock *From, BinaryBasicBlock *To);
|
||||
|
||||
/// Determine direction of the branch based on the current layout.
|
||||
/// Callee is responsible of updating basic block indices prior to using
|
||||
/// this function (e.g. by calling BinaryFunction::updateLayoutIndices()).
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "BinaryPassManager.h"
|
||||
#include "Passes/AllocCombiner.h"
|
||||
#include "Passes/FrameOptimizer.h"
|
||||
#include "Passes/IndirectCallPromotion.h"
|
||||
#include "Passes/Inliner.h"
|
||||
@@ -62,12 +63,6 @@ OptimizeBodylessFunctions("optimize-bodyless-functions",
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
OptimizeFrameAccesses("frame-opt",
|
||||
cl::desc("optimize stack frame accesses"),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
Peepholes("peepholes",
|
||||
cl::desc("run peephole optimizations"),
|
||||
@@ -331,9 +326,6 @@ void BinaryFunctionPassManager::runAllPasses(
|
||||
// fix branches consistency internally.
|
||||
Manager.registerPass(llvm::make_unique<FixupBranches>(PrintAfterBranchFixup));
|
||||
|
||||
Manager.registerPass(llvm::make_unique<FrameOptimizerPass>(PrintFOP),
|
||||
OptimizeFrameAccesses);
|
||||
|
||||
// This pass should come close to last since it uses the estimated hot
|
||||
// size of a function to determine the order. It should definitely
|
||||
// also happen after any changes to the call graph are made, e.g. inlining.
|
||||
@@ -356,6 +348,14 @@ void BinaryFunctionPassManager::runAllPasses(
|
||||
// This pass should always run last.*
|
||||
Manager.registerPass(llvm::make_unique<FinalizeFunctions>(PrintFinalized));
|
||||
|
||||
// FrameOptimizer has an implicit dependency on FinalizeFunctions.
|
||||
// FrameOptimizer move values around and needs to update CFIs. To do this, it
|
||||
// must read CFI, interpret it and rewrite it, so CFIs need to be correctly
|
||||
// placed according to the final layout.
|
||||
Manager.registerPass(llvm::make_unique<FrameOptimizerPass>(PrintFOP));
|
||||
|
||||
Manager.registerPass(llvm::make_unique<AllocCombinerPass>(PrintFOP));
|
||||
|
||||
// *except for this pass. This pass turns tail calls into jumps which
|
||||
// makes them invisible to function reordering.
|
||||
Manager.registerPass(
|
||||
|
||||
116
bolt/Passes/AllocCombiner.cpp
Normal file
116
bolt/Passes/AllocCombiner.cpp
Normal file
@@ -0,0 +1,116 @@
|
||||
#include "AllocCombiner.h"
|
||||
|
||||
#define DEBUG_TYPE "alloccombiner"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace opts {
|
||||
extern bool shouldProcess(const bolt::BinaryFunction &Function);
|
||||
|
||||
extern cl::opt<bolt::FrameOptimizationType> FrameOptimization;
|
||||
|
||||
} // end namespace opts
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
namespace {
|
||||
|
||||
bool getStackAdjustmentSize(const BinaryContext &BC, const MCInst &Inst,
|
||||
int64_t &Adjustment) {
|
||||
return BC.MIA->evaluateSimple(Inst, Adjustment,
|
||||
std::make_pair(BC.MIA->getStackPointer(), 0LL),
|
||||
std::make_pair(0, 0LL));
|
||||
}
|
||||
|
||||
bool isIndifferentToSP(const MCInst &Inst, const BinaryContext &BC) {
|
||||
if (BC.MIA->isCFI(Inst))
|
||||
return true;
|
||||
|
||||
const auto II = BC.MII->get(Inst.getOpcode());
|
||||
if (BC.MIA->isTerminator(Inst) ||
|
||||
II.hasImplicitDefOfPhysReg(BC.MIA->getStackPointer(), BC.MRI.get()) ||
|
||||
II.hasImplicitUseOfPhysReg(BC.MIA->getStackPointer()))
|
||||
return false;
|
||||
|
||||
for (int I = 0, E = Inst.getNumOperands(); I != E; ++I) {
|
||||
const auto &Operand = Inst.getOperand(I);
|
||||
if (Operand.isReg() && Operand.getReg() == BC.MIA->getStackPointer()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool shouldProc(BinaryFunction &Function) {
|
||||
return Function.isSimple() && Function.hasCFG() &&
|
||||
opts::shouldProcess(Function) && (Function.getSize() > 0);
|
||||
}
|
||||
|
||||
void runForAllWeCare(std::map<uint64_t, BinaryFunction> &BFs,
|
||||
std::function<void(BinaryFunction &)> Task) {
|
||||
for (auto &It : BFs) {
|
||||
auto &Function = It.second;
|
||||
if (shouldProc(Function))
|
||||
Task(Function);
|
||||
}
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
void AllocCombinerPass::combineAdjustments(BinaryContext &BC,
|
||||
BinaryFunction &BF) {
|
||||
for (auto &BB : BF) {
|
||||
MCInst *Prev = nullptr;
|
||||
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
|
||||
auto &Inst = *I;
|
||||
if (isIndifferentToSP(Inst, BC))
|
||||
continue; // Skip updating Prev
|
||||
|
||||
int64_t Adjustment{0LL};
|
||||
if (!Prev || !BC.MIA->isStackAdjustment(Inst) ||
|
||||
!BC.MIA->isStackAdjustment(*Prev) ||
|
||||
!getStackAdjustmentSize(BC, *Prev, Adjustment)) {
|
||||
Prev = &Inst;
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG({
|
||||
dbgs() << "At \"" << BF.getPrintName() << "\", combining: \n";
|
||||
Inst.dump();
|
||||
Prev->dump();
|
||||
dbgs() << "Adjustment: " << Adjustment << "\n";
|
||||
});
|
||||
|
||||
if (BC.MIA->isSUB(Inst))
|
||||
Adjustment = -Adjustment;
|
||||
|
||||
BC.MIA->addToImm(Inst, Adjustment, BC.Ctx.get());
|
||||
|
||||
DEBUG({
|
||||
dbgs() << "After adjustment:\n";
|
||||
Inst.dump();
|
||||
});
|
||||
|
||||
BB.eraseInstruction(Prev);
|
||||
++NumCombined;
|
||||
Prev = &Inst;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AllocCombinerPass::runOnFunctions(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
std::set<uint64_t> &LargeFunctions) {
|
||||
if (opts::FrameOptimization == FOP_NONE)
|
||||
return;
|
||||
|
||||
runForAllWeCare(
|
||||
BFs, [&](BinaryFunction &Function) { combineAdjustments(BC, Function); });
|
||||
|
||||
outs() << "BOLT-INFO: Allocation combiner: " << NumCoalesced
|
||||
<< " empty spaces coalesced.\n";
|
||||
}
|
||||
|
||||
} // end namespace bolt
|
||||
} // end namespace llvm
|
||||
48
bolt/Passes/AllocCombiner.h
Normal file
48
bolt/Passes/AllocCombiner.h
Normal file
@@ -0,0 +1,48 @@
|
||||
//===--- Passes/AllocCombiner.h -------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_FRAMEDEFRAG_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_FRAMEDEFRAG_H
|
||||
|
||||
#include "BinaryPasses.h"
|
||||
#include "DataflowInfoManager.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class AllocCombinerPass : public BinaryFunctionPass {
|
||||
/// Stats aggregating variables
|
||||
uint64_t NumCombined{0};
|
||||
uint64_t NumCoalesced{0};
|
||||
|
||||
void combineAdjustments(BinaryContext &BC, BinaryFunction &BF);
|
||||
void coalesceEmptySpace(BinaryContext &BC, BinaryFunction &BF,
|
||||
DataflowInfoManager &Info, FrameAnalysis &FA);
|
||||
|
||||
public:
|
||||
explicit AllocCombinerPass(const cl::opt<bool> &PrintPass)
|
||||
: BinaryFunctionPass(PrintPass) {}
|
||||
|
||||
const char *getName() const override {
|
||||
return "alloc-combiner";
|
||||
}
|
||||
|
||||
/// Pass entry point
|
||||
void runOnFunctions(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
std::set<uint64_t> &LargeFunctions) override;
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
|
||||
#endif
|
||||
@@ -584,9 +584,11 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
|
||||
auto BI = PredBB->branch_info_begin();
|
||||
std::swap(*BI, *(BI + 1));
|
||||
} else {
|
||||
// Change destination of the unconditional branch.
|
||||
// Change destination of the conditional branch.
|
||||
MIA->replaceBranchTarget(*CondBranch, CalleeSymbol, BC.Ctx.get());
|
||||
}
|
||||
// Annotate it, so "isCall" returns true for this jcc
|
||||
MIA->addAnnotation(BC.Ctx.get(), *CondBranch, "IsCTC", true);
|
||||
|
||||
// Remove the unused successor which may be eliminated later
|
||||
// if there are no other users.
|
||||
|
||||
@@ -359,6 +359,12 @@ public:
|
||||
std::set<uint64_t> &LargeFunctions) override;
|
||||
};
|
||||
|
||||
enum FrameOptimizationType : char {
|
||||
FOP_NONE, /// Don't perform FOP.
|
||||
FOP_HOT, /// Perform FOP on hot functions.
|
||||
FOP_ALL /// Perform FOP on all functions.
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
add_llvm_library(LLVMBOLTPasses
|
||||
AllocCombiner.cpp
|
||||
BinaryPasses.cpp
|
||||
BinaryFunctionCallGraph.cpp
|
||||
CallGraph.cpp
|
||||
@@ -14,7 +15,11 @@ add_llvm_library(LLVMBOLTPasses
|
||||
PettisAndHansen.cpp
|
||||
ReorderAlgorithm.cpp
|
||||
ReorderFunctions.cpp
|
||||
ShrinkWrapping.cpp
|
||||
StackAllocationAnalysis.cpp
|
||||
StackAvailableExpressions.cpp
|
||||
StackPointerTracking.cpp
|
||||
StackReachingUses.cpp
|
||||
)
|
||||
|
||||
include_directories( ${LLVM_MAIN_SRC_DIR}/tools/llvm-bolt )
|
||||
|
||||
@@ -265,12 +265,13 @@ public:
|
||||
return getStateAt(*Point.getInst());
|
||||
}
|
||||
|
||||
/// Relies on a ptr map to fetch the previous instruction and then retrieve
|
||||
/// state. WARNING: Watch out for invalidated pointers. Do not use this
|
||||
/// function if you invalidated pointers after the analysis has been completed
|
||||
ErrorOr<const StateTy &> getStateBefore(const MCInst &Point) {
|
||||
return getStateAt(PrevPoint[&Point]);
|
||||
}
|
||||
|
||||
/// Return the in set (out set) of a given program point if the direction of
|
||||
/// the dataflow is forward (backward).
|
||||
ErrorOr<const StateTy &>getStateBefore(ProgramPoint Point) {
|
||||
if (Point.isBB())
|
||||
return getStateAt(*Point.getBB());
|
||||
@@ -491,6 +492,25 @@ public:
|
||||
/// Maps expressions defs (MCInsts) to its index in the Expressions vector
|
||||
std::unordered_map<const MCInst *, uint64_t> ExprToIdx;
|
||||
|
||||
/// Return whether \p Expr is in the state set at \p Point
|
||||
bool count(ProgramPoint Point, const MCInst &Expr) const {
|
||||
auto IdxIter = ExprToIdx.find(&Expr);
|
||||
assert (IdxIter != ExprToIdx.end() && "Invalid Expr");
|
||||
return (*this->getStateAt(Point))[IdxIter->second];
|
||||
}
|
||||
|
||||
bool count(const MCInst &Point, const MCInst &Expr) const {
|
||||
auto IdxIter = ExprToIdx.find(&Expr);
|
||||
assert (IdxIter != ExprToIdx.end() && "Invalid Expr");
|
||||
return (*this->getStateAt(Point))[IdxIter->second];
|
||||
}
|
||||
|
||||
/// Return whether \p Expr is in the state set at the instr of index
|
||||
/// \p PointIdx
|
||||
bool count(unsigned PointIdx, const MCInst &Expr) const {
|
||||
return count(*Expressions[PointIdx], Expr);
|
||||
}
|
||||
|
||||
InstrsDataflowAnalysis(const BinaryContext &BC, BinaryFunction &BF)
|
||||
: DataflowAnalysis<Derived, BitVector, Backward>(BC, BF) {}
|
||||
virtual ~InstrsDataflowAnalysis() {}
|
||||
|
||||
@@ -20,10 +20,7 @@ ReachingDefOrUse</*Def=*/true> &DataflowInfoManager::getReachingDefs() {
|
||||
return *RD;
|
||||
assert(FA && "FrameAnalysis required");
|
||||
RD.reset(new ReachingDefOrUse<true>(*FA, BC, BF));
|
||||
{
|
||||
NamedRegionTimer T1("RD", "Dataflow", true);
|
||||
RD->run();
|
||||
}
|
||||
RD->run();
|
||||
return *RD;
|
||||
}
|
||||
|
||||
@@ -36,10 +33,7 @@ ReachingDefOrUse</*Def=*/false> &DataflowInfoManager::getReachingUses() {
|
||||
return *RU;
|
||||
assert(FA && "FrameAnalysis required");
|
||||
RU.reset(new ReachingDefOrUse<false>(*FA, BC, BF));
|
||||
{
|
||||
NamedRegionTimer T1("RU", "Dataflow", true);
|
||||
RU->run();
|
||||
}
|
||||
RU->run();
|
||||
return *RU;
|
||||
}
|
||||
|
||||
@@ -52,10 +46,7 @@ LivenessAnalysis &DataflowInfoManager::getLivenessAnalysis() {
|
||||
return *LA;
|
||||
assert(FA && "FrameAnalysis required");
|
||||
LA.reset(new LivenessAnalysis(*FA, BC, BF));
|
||||
{
|
||||
NamedRegionTimer T1("LA", "Dataflow", true);
|
||||
LA->run();
|
||||
}
|
||||
LA->run();
|
||||
return *LA;
|
||||
}
|
||||
|
||||
@@ -63,14 +54,24 @@ void DataflowInfoManager::invalidateLivenessAnalysis() {
|
||||
LA.reset(nullptr);
|
||||
}
|
||||
|
||||
StackReachingUses &DataflowInfoManager::getStackReachingUses() {
|
||||
if (SRU)
|
||||
return *SRU;
|
||||
assert(FA && "FrameAnalysis required");
|
||||
SRU.reset(new StackReachingUses(*FA, BC, BF));
|
||||
SRU->run();
|
||||
return *SRU;
|
||||
}
|
||||
|
||||
void DataflowInfoManager::invalidateStackReachingUses() {
|
||||
SRU.reset(nullptr);
|
||||
}
|
||||
|
||||
DominatorAnalysis<false> &DataflowInfoManager::getDominatorAnalysis() {
|
||||
if (DA)
|
||||
return *DA;
|
||||
DA.reset(new DominatorAnalysis<false>(BC, BF));
|
||||
{
|
||||
NamedRegionTimer T1("DA", "Dataflow", true);
|
||||
DA->run();
|
||||
}
|
||||
DA->run();
|
||||
return *DA;
|
||||
}
|
||||
|
||||
@@ -82,10 +83,7 @@ DominatorAnalysis<true> &DataflowInfoManager::getPostDominatorAnalysis() {
|
||||
if (PDA)
|
||||
return *PDA;
|
||||
PDA.reset(new DominatorAnalysis<true>(BC, BF));
|
||||
{
|
||||
NamedRegionTimer T1("PDA", "Dataflow", true);
|
||||
PDA->run();
|
||||
}
|
||||
PDA->run();
|
||||
return *PDA;
|
||||
}
|
||||
|
||||
@@ -97,14 +95,12 @@ StackPointerTracking &DataflowInfoManager::getStackPointerTracking() {
|
||||
if (SPT)
|
||||
return *SPT;
|
||||
SPT.reset(new StackPointerTracking(BC, BF));
|
||||
{
|
||||
NamedRegionTimer T1("SPT", "Dataflow", true);
|
||||
SPT->run();
|
||||
}
|
||||
SPT->run();
|
||||
return *SPT;
|
||||
}
|
||||
|
||||
void DataflowInfoManager::invalidateStackPointerTracking() {
|
||||
invalidateStackAllocationAnalysis();
|
||||
SPT.reset(nullptr);
|
||||
}
|
||||
|
||||
@@ -112,10 +108,7 @@ ReachingInsns<false> &DataflowInfoManager::getReachingInsns() {
|
||||
if (RI)
|
||||
return *RI;
|
||||
RI.reset(new ReachingInsns<false>(BC, BF));
|
||||
{
|
||||
NamedRegionTimer T1("RI", "Dataflow", true);
|
||||
RI->run();
|
||||
}
|
||||
RI->run();
|
||||
return *RI;
|
||||
}
|
||||
|
||||
@@ -127,10 +120,7 @@ ReachingInsns<true> &DataflowInfoManager::getReachingInsnsBackwards() {
|
||||
if (RIB)
|
||||
return *RIB;
|
||||
RIB.reset(new ReachingInsns<true>(BC, BF));
|
||||
{
|
||||
NamedRegionTimer T1("RIB", "Dataflow", true);
|
||||
RIB->run();
|
||||
}
|
||||
RIB->run();
|
||||
return *RIB;
|
||||
}
|
||||
|
||||
@@ -138,6 +128,18 @@ void DataflowInfoManager::invalidateReachingInsnsBackwards() {
|
||||
RIB.reset(nullptr);
|
||||
}
|
||||
|
||||
StackAllocationAnalysis &DataflowInfoManager::getStackAllocationAnalysis() {
|
||||
if (SAA)
|
||||
return *SAA;
|
||||
SAA.reset(new StackAllocationAnalysis(BC, BF, getStackPointerTracking()));
|
||||
SAA->run();
|
||||
return *SAA;
|
||||
}
|
||||
|
||||
void DataflowInfoManager::invalidateStackAllocationAnalysis() {
|
||||
SAA.reset(nullptr);
|
||||
}
|
||||
|
||||
std::unordered_map<const MCInst *, BinaryBasicBlock *> &
|
||||
DataflowInfoManager::getInsnToBBMap() {
|
||||
if (InsnToBB)
|
||||
@@ -158,11 +160,13 @@ void DataflowInfoManager::invalidateAll() {
|
||||
invalidateReachingDefs();
|
||||
invalidateReachingUses();
|
||||
invalidateLivenessAnalysis();
|
||||
invalidateStackReachingUses();
|
||||
invalidateDominatorAnalysis();
|
||||
invalidatePostDominatorAnalysis();
|
||||
invalidateStackPointerTracking();
|
||||
invalidateReachingInsns();
|
||||
invalidateReachingInsnsBackwards();
|
||||
invalidateStackAllocationAnalysis();
|
||||
invalidateInsnToBBMap();
|
||||
}
|
||||
|
||||
|
||||
@@ -14,10 +14,12 @@
|
||||
|
||||
#include "FrameAnalysis.h"
|
||||
#include "ReachingDefOrUse.h"
|
||||
#include "StackReachingUses.h"
|
||||
#include "DominatorAnalysis.h"
|
||||
#include "StackPointerTracking.h"
|
||||
#include "ReachingInsns.h"
|
||||
#include "LivenessAnalysis.h"
|
||||
#include "StackAllocationAnalysis.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
@@ -33,11 +35,13 @@ class DataflowInfoManager {
|
||||
std::unique_ptr<ReachingDefOrUse</*Def=*/true>> RD;
|
||||
std::unique_ptr<ReachingDefOrUse</*Def=*/false>> RU;
|
||||
std::unique_ptr<LivenessAnalysis> LA;
|
||||
std::unique_ptr<StackReachingUses> SRU;
|
||||
std::unique_ptr<DominatorAnalysis</*Bwd=*/false>> DA;
|
||||
std::unique_ptr<DominatorAnalysis</*Bwd=*/true>> PDA;
|
||||
std::unique_ptr<StackPointerTracking> SPT;
|
||||
std::unique_ptr<ReachingInsns<false>> RI;
|
||||
std::unique_ptr<ReachingInsns<true>> RIB;
|
||||
std::unique_ptr<StackAllocationAnalysis> SAA;
|
||||
std::unique_ptr<std::unordered_map<const MCInst *, BinaryBasicBlock *>>
|
||||
InsnToBB;
|
||||
|
||||
@@ -45,12 +49,20 @@ public:
|
||||
DataflowInfoManager(const FrameAnalysis *FA, const BinaryContext &BC,
|
||||
BinaryFunction &BF) : FA(FA), BC(BC), BF(BF) {};
|
||||
|
||||
/// Helper function to fetch the parent BB associated with a program point
|
||||
/// If PP is a BB itself, then return itself (cast to a BinaryBasicBlock)
|
||||
BinaryBasicBlock *getParentBB(ProgramPoint PP) {
|
||||
return PP.isBB() ? PP.getBB() : getInsnToBBMap()[PP.getInst()];
|
||||
}
|
||||
|
||||
ReachingDefOrUse</*Def=*/true> &getReachingDefs();
|
||||
void invalidateReachingDefs();
|
||||
ReachingDefOrUse</*Def=*/false> &getReachingUses();
|
||||
void invalidateReachingUses();
|
||||
LivenessAnalysis &getLivenessAnalysis();
|
||||
void invalidateLivenessAnalysis();
|
||||
StackReachingUses &getStackReachingUses();
|
||||
void invalidateStackReachingUses();
|
||||
DominatorAnalysis<false> &getDominatorAnalysis();
|
||||
void invalidateDominatorAnalysis();
|
||||
DominatorAnalysis<true> &getPostDominatorAnalysis();
|
||||
@@ -61,6 +73,8 @@ public:
|
||||
void invalidateReachingInsns();
|
||||
ReachingInsns<true> &getReachingInsnsBackwards();
|
||||
void invalidateReachingInsnsBackwards();
|
||||
StackAllocationAnalysis &getStackAllocationAnalysis();
|
||||
void invalidateStackAllocationAnalysis();
|
||||
std::unordered_map<const MCInst *, BinaryBasicBlock *> &getInsnToBBMap();
|
||||
void invalidateInsnToBBMap();
|
||||
void invalidateAll();
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_DOMINATORANALYSIS_H
|
||||
|
||||
#include "DataflowAnalysis.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
@@ -60,13 +61,21 @@ public:
|
||||
return Result;
|
||||
}
|
||||
|
||||
bool doesADominatesB(const MCInst &A, const MCInst &B) {
|
||||
return (*this->getStateAt(B))[this->ExprToIdx[&A]];
|
||||
bool doesADominateB(const MCInst &A, unsigned BIdx) {
|
||||
return this->count(BIdx, A);
|
||||
}
|
||||
|
||||
bool doesADominatesB(ProgramPoint A, const MCInst &B) {
|
||||
bool doesADominateB(const MCInst &A, const MCInst &B) {
|
||||
return this->count(B, A);
|
||||
}
|
||||
|
||||
bool doesADominateB(const MCInst &A, ProgramPoint B) {
|
||||
return this->count(B, A);
|
||||
}
|
||||
|
||||
bool doesADominateB(ProgramPoint A, const MCInst &B) {
|
||||
if (A.isInst())
|
||||
return doesADominatesB(*A.getInst(), B);
|
||||
return doesADominateB(*A.getInst(), B);
|
||||
|
||||
// This analysis keep track of which instructions dominates another
|
||||
// instruction, it doesn't keep track of BBs. So we need a non-empty
|
||||
@@ -79,7 +88,7 @@ public:
|
||||
BB = *BB->succ_begin();
|
||||
}
|
||||
const MCInst &InstA = *BB->begin();
|
||||
return doesADominatesB(InstA, B);
|
||||
return doesADominateB(InstA, B);
|
||||
}
|
||||
|
||||
void doForAllDominators(const MCInst &Inst,
|
||||
@@ -89,6 +98,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void run() {
|
||||
NamedRegionTimer T1("DA", "Dataflow", true);
|
||||
InstrsDataflowAnalysis<DominatorAnalysis<Backward>, Backward>::run();
|
||||
}
|
||||
|
||||
private:
|
||||
void preflight() {
|
||||
// Populate our universe of tracked expressions with all instructions
|
||||
|
||||
@@ -215,6 +215,12 @@ public:
|
||||
|
||||
void FrameAnalysis::addArgAccessesFor(const BinaryContext &BC, MCInst &Inst,
|
||||
ArgAccesses &&AA) {
|
||||
if (auto OldAA = getArgAccessesFor(BC, Inst)) {
|
||||
if (OldAA->AssumeEverything)
|
||||
return;
|
||||
*OldAA = std::move(AA);
|
||||
return;
|
||||
}
|
||||
if (AA.AssumeEverything) {
|
||||
// Index 0 in ArgAccessesVector represents an "assumeeverything" entry
|
||||
BC.MIA->addAnnotation(BC.Ctx.get(), Inst, "ArgAccessEntry", 0U);
|
||||
@@ -222,7 +228,7 @@ void FrameAnalysis::addArgAccessesFor(const BinaryContext &BC, MCInst &Inst,
|
||||
}
|
||||
BC.MIA->addAnnotation(BC.Ctx.get(), Inst, "ArgAccessEntry",
|
||||
(unsigned)ArgAccessesVector.size());
|
||||
ArgAccessesVector.emplace_back(AA);
|
||||
ArgAccessesVector.emplace_back(std::move(AA));
|
||||
}
|
||||
|
||||
void FrameAnalysis::addArgInStackAccessFor(const BinaryContext &BC,
|
||||
@@ -329,29 +335,39 @@ BitVector FrameAnalysis::getFunctionClobberList(const BinaryContext &BC,
|
||||
|
||||
void FrameAnalysis::buildClobberMap(const BinaryContext &BC) {
|
||||
std::queue<BinaryFunction *> Queue;
|
||||
std::set<BinaryFunction *> InQueue;
|
||||
|
||||
for (auto *Func : TopologicalCGOrder) {
|
||||
Queue.push(Func);
|
||||
InQueue.insert(Func);
|
||||
}
|
||||
|
||||
while (!Queue.empty()) {
|
||||
auto *Func = Queue.front();
|
||||
Queue.pop();
|
||||
InQueue.erase(Func);
|
||||
|
||||
BitVector RegsKilled = getFunctionClobberList(BC, Func);
|
||||
bool Updated = ClobberAnalysisOnly ? false : computeArgsAccessed(BC, *Func);
|
||||
bool ArgsUpdated = ClobberAnalysisOnly ? false : computeArgsAccessed(BC, *Func);
|
||||
bool RegsUpdated = false;
|
||||
|
||||
if (RegsKilledMap.find(Func) == RegsKilledMap.end()) {
|
||||
RegsKilledMap[Func] = std::move(RegsKilled);
|
||||
continue;
|
||||
} else {
|
||||
RegsUpdated = RegsKilledMap[Func] != RegsKilled;
|
||||
if (RegsUpdated)
|
||||
RegsKilledMap[Func] = std::move(RegsKilled);
|
||||
}
|
||||
|
||||
if (RegsKilledMap[Func] != RegsKilled || Updated) {
|
||||
if (RegsUpdated || ArgsUpdated) {
|
||||
for (auto Caller : Cg.predecessors(Cg.getNodeId(Func))) {
|
||||
Queue.push(Cg.nodeIdToFunc(Caller));
|
||||
BinaryFunction *CallerFunc = Cg.nodeIdToFunc(Caller);
|
||||
if (!InQueue.count(CallerFunc)) {
|
||||
InQueue.insert(CallerFunc);
|
||||
Queue.push(CallerFunc);
|
||||
}
|
||||
}
|
||||
}
|
||||
RegsKilledMap[Func] = std::move(RegsKilled);
|
||||
}
|
||||
|
||||
if (opts::Verbosity == 0) {
|
||||
@@ -453,10 +469,11 @@ bool FrameAnalysis::updateArgsTouchedFor(const BinaryContext &BC,
|
||||
break;
|
||||
}
|
||||
DEBUG(dbgs() << "Added arg in stack access annotation "
|
||||
<< CurOffset + Elem.first << "\n");
|
||||
<< CurOffset + Elem.first << "\n");
|
||||
addArgInStackAccessFor(
|
||||
BC, Inst, ArgInStackAccess{/*StackOffset=*/CurOffset + Elem.first,
|
||||
/*Size=*/Elem.second});
|
||||
BC, Inst,
|
||||
ArgInStackAccess{/*StackOffset=*/CurOffset + Elem.first,
|
||||
/*Size=*/Elem.second});
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
@@ -10,6 +10,11 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "FrameOptimizer.h"
|
||||
#include "FrameAnalysis.h"
|
||||
#include "ShrinkWrapping.h"
|
||||
#include "StackAvailableExpressions.h"
|
||||
#include "StackReachingUses.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
|
||||
@@ -19,616 +24,34 @@ using namespace llvm;
|
||||
|
||||
namespace opts {
|
||||
extern cl::opt<unsigned> Verbosity;
|
||||
}
|
||||
extern cl::OptionCategory BoltOptCategory;
|
||||
|
||||
using namespace bolt;
|
||||
|
||||
cl::opt<FrameOptimizationType>
|
||||
FrameOptimization("frame-opt",
|
||||
cl::init(FOP_NONE),
|
||||
cl::desc("optimize stack frame accesses"),
|
||||
cl::values(
|
||||
clEnumValN(FOP_NONE, "none", "do not perform frame optimization"),
|
||||
clEnumValN(FOP_HOT, "hot", "perform FOP on hot functions"),
|
||||
clEnumValN(FOP_ALL, "all", "perform FOP on all functions"),
|
||||
clEnumValEnd),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
} // namespace opts
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
void FrameOptimizerPass::getInstClobberList(const BinaryContext &BC,
|
||||
const MCInst &Inst,
|
||||
BitVector &KillSet) const {
|
||||
if (!BC.MIA->isCall(Inst)) {
|
||||
BC.MIA->getClobberedRegs(Inst, KillSet, *BC.MRI);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto *TargetSymbol = BC.MIA->getTargetSymbol(Inst);
|
||||
// If indirect call, kill set should have all elements
|
||||
if (TargetSymbol == nullptr) {
|
||||
KillSet.set(0, KillSet.size());
|
||||
return;
|
||||
}
|
||||
|
||||
const auto *Function = BC.getFunctionForSymbol(TargetSymbol);
|
||||
if (Function == nullptr) {
|
||||
// Call to a function without a BinaryFunction object.
|
||||
// This should be a call to a PLT entry, and since it is a trampoline to
|
||||
// a DSO, we can't really know the code in advance. Conservatively assume
|
||||
// everything is clobbered.
|
||||
KillSet.set(0, KillSet.size());
|
||||
return;
|
||||
}
|
||||
auto BV = RegsKilledMap.find(Function);
|
||||
if (BV != RegsKilledMap.end()) {
|
||||
KillSet |= BV->second;
|
||||
return;
|
||||
}
|
||||
// Ignore calls to function whose clobber list wasn't yet calculated. This
|
||||
// instruction will be evaluated again once we have info for the callee.
|
||||
return;
|
||||
}
|
||||
|
||||
BitVector
|
||||
FrameOptimizerPass::getFunctionClobberList(const BinaryContext &BC,
|
||||
const BinaryFunction *Func) {
|
||||
BitVector RegsKilled = BitVector(BC.MRI->getNumRegs(), false);
|
||||
|
||||
if (!Func->isSimple() || !shouldOptimize(*Func)) {
|
||||
RegsKilled.set(0, RegsKilled.size());
|
||||
return RegsKilled;
|
||||
}
|
||||
|
||||
for (const auto &BB : *Func) {
|
||||
for (const auto &Inst : BB) {
|
||||
getInstClobberList(BC, Inst, RegsKilled);
|
||||
}
|
||||
}
|
||||
|
||||
return RegsKilled;
|
||||
}
|
||||
|
||||
void FrameOptimizerPass::buildClobberMap(const BinaryContext &BC) {
|
||||
std::queue<const BinaryFunction *> Queue;
|
||||
|
||||
for (auto *Func : TopologicalCGOrder) {
|
||||
Queue.push(Func);
|
||||
}
|
||||
|
||||
while (!Queue.empty()) {
|
||||
auto *Func = Queue.front();
|
||||
Queue.pop();
|
||||
|
||||
BitVector RegsKilled = getFunctionClobberList(BC, Func);
|
||||
|
||||
if (RegsKilledMap.find(Func) == RegsKilledMap.end()) {
|
||||
RegsKilledMap[Func] = std::move(RegsKilled);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (RegsKilledMap[Func] != RegsKilled) {
|
||||
for (auto Caller : Cg.predecessors(Cg.getNodeId(Func))) {
|
||||
Queue.push(Cg.nodeIdToFunc(Caller));
|
||||
}
|
||||
}
|
||||
RegsKilledMap[Func] = std::move(RegsKilled);
|
||||
}
|
||||
|
||||
if (opts::Verbosity == 0) {
|
||||
#ifndef NDEBUG
|
||||
if (!DebugFlag || !isCurrentDebugType("fop"))
|
||||
return;
|
||||
#else
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
// This loop is for computing statistics only
|
||||
for (auto *Func : TopologicalCGOrder) {
|
||||
auto Iter = RegsKilledMap.find(Func);
|
||||
assert(Iter != RegsKilledMap.end() &&
|
||||
"Failed to compute all clobbers list");
|
||||
if (Iter->second.all()) {
|
||||
auto Count = Func->getExecutionCount();
|
||||
if (Count != BinaryFunction::COUNT_NO_PROFILE)
|
||||
CountFunctionsAllClobber += Count;
|
||||
++NumFunctionsAllClobber;
|
||||
}
|
||||
DEBUG_WITH_TYPE("fop",
|
||||
dbgs() << "Killed regs set for func: " << Func->getPrintName() << "\n";
|
||||
const BitVector &RegsKilled = Iter->second;
|
||||
int RegIdx = RegsKilled.find_first();
|
||||
while (RegIdx != -1) {
|
||||
dbgs() << "\tREG" << RegIdx;
|
||||
RegIdx = RegsKilled.find_next(RegIdx);
|
||||
};
|
||||
dbgs() << "\n";
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename StateTy>
|
||||
class ForwardDataflow {
|
||||
protected:
|
||||
/// Reference to the function being analysed
|
||||
const BinaryFunction &Func;
|
||||
|
||||
/// Tracks the set of available exprs at the end of each MCInst in this
|
||||
/// function
|
||||
std::unordered_map<const MCInst *, StateTy> StateAtPoint;
|
||||
/// Tracks the set of available exprs at basic block start
|
||||
std::unordered_map<const BinaryBasicBlock *, StateTy> StateAtBBEntry;
|
||||
|
||||
virtual void preflight() = 0;
|
||||
|
||||
virtual StateTy getStartingStateAtBB(const BinaryBasicBlock &BB) = 0;
|
||||
|
||||
virtual StateTy getStartingStateAtPoint(const MCInst &Point) = 0;
|
||||
|
||||
virtual void doConfluence(StateTy &StateOut, const StateTy &StateIn) = 0;
|
||||
|
||||
virtual StateTy computeNext(const MCInst &Point, const StateTy &Cur) = 0;
|
||||
|
||||
public:
|
||||
ForwardDataflow(const BinaryFunction &BF) : Func(BF) {}
|
||||
virtual ~ForwardDataflow() {}
|
||||
|
||||
ErrorOr<const StateTy &>getStateAt(const BinaryBasicBlock &BB) const {
|
||||
auto Iter = StateAtBBEntry.find(&BB);
|
||||
if (Iter == StateAtBBEntry.end())
|
||||
return make_error_code(errc::result_out_of_range);
|
||||
return Iter->second;
|
||||
}
|
||||
|
||||
ErrorOr<const StateTy &>getStateAt(const MCInst &Point) const {
|
||||
auto Iter = StateAtPoint.find(&Point);
|
||||
if (Iter == StateAtPoint.end())
|
||||
return make_error_code(errc::result_out_of_range);
|
||||
return Iter->second;
|
||||
}
|
||||
|
||||
void run() {
|
||||
preflight();
|
||||
|
||||
// Initialize state for all points of the function
|
||||
for (auto &BB : Func) {
|
||||
StateAtBBEntry[&BB] = getStartingStateAtBB(BB);
|
||||
for (auto &Inst : BB) {
|
||||
StateAtPoint[&Inst] = getStartingStateAtPoint(Inst);
|
||||
}
|
||||
}
|
||||
assert(Func.begin() != Func.end() && "Unexpected empty function");
|
||||
|
||||
std::queue<const BinaryBasicBlock *> Worklist;
|
||||
// TODO: Pushing this in a DFS ordering will greatly speed up the dataflow
|
||||
// performance.
|
||||
for (auto &BB : Func) {
|
||||
Worklist.push(&BB);
|
||||
}
|
||||
|
||||
// Main dataflow loop
|
||||
while (!Worklist.empty()) {
|
||||
auto *BB = Worklist.front();
|
||||
Worklist.pop();
|
||||
|
||||
DEBUG(dbgs() << "\tNow at BB " << BB->getName() << "\n");
|
||||
|
||||
// Calculate state at the entry of first instruction in BB
|
||||
StateTy &StateAtEntry = StateAtBBEntry[BB];
|
||||
for (auto I = BB->pred_begin(), E = BB->pred_end(); I != E; ++I) {
|
||||
auto Last = (*I)->rbegin();
|
||||
if (Last != (*I)->rend()) {
|
||||
doConfluence(StateAtEntry, StateAtPoint[&*Last]);
|
||||
} else {
|
||||
doConfluence(StateAtEntry, StateAtBBEntry[*I]);
|
||||
}
|
||||
}
|
||||
// Skip empty
|
||||
if (BB->begin() == BB->end())
|
||||
continue;
|
||||
|
||||
// Propagate information from first instruction down to the last one
|
||||
bool Changed = false;
|
||||
StateTy *PrevState = &StateAtEntry;
|
||||
const MCInst *LAST = &*BB->rbegin();
|
||||
for (auto &Inst : *BB) {
|
||||
DEBUG(dbgs() << "\t\tNow at ");
|
||||
DEBUG(Inst.dump());
|
||||
|
||||
StateTy CurState = computeNext(Inst, *PrevState);
|
||||
|
||||
if (StateAtPoint[&Inst] != CurState) {
|
||||
StateAtPoint[&Inst] = CurState;
|
||||
if (&Inst == LAST)
|
||||
Changed = true;
|
||||
}
|
||||
PrevState = &StateAtPoint[&Inst];
|
||||
}
|
||||
|
||||
if (Changed) {
|
||||
for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) {
|
||||
Worklist.push(*I);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class StackAvailableExpressions : public ForwardDataflow<BitVector> {
|
||||
public:
|
||||
StackAvailableExpressions(const FrameOptimizerPass &FOP,
|
||||
const BinaryContext &BC, const BinaryFunction &BF)
|
||||
: ForwardDataflow(BF), FOP(FOP), FrameIndexMap(FOP.FrameIndexMap),
|
||||
BC(BC) {}
|
||||
virtual ~StackAvailableExpressions() {}
|
||||
|
||||
/// Define an iterator for navigating the expressions calculated by the
|
||||
/// dataflow at each program point
|
||||
class ExprIterator
|
||||
: public std::iterator<std::forward_iterator_tag, const MCInst *> {
|
||||
public:
|
||||
ExprIterator &operator++() {
|
||||
assert(Idx != -1 && "Iterator already at the end");
|
||||
Idx = BV->find_next(Idx);
|
||||
return *this;
|
||||
}
|
||||
ExprIterator operator++(int) {
|
||||
assert(Idx != -1 && "Iterator already at the end");
|
||||
ExprIterator Ret = *this;
|
||||
++(*this);
|
||||
return Ret;
|
||||
}
|
||||
bool operator==(ExprIterator Other) const { return Idx == Other.Idx; }
|
||||
bool operator!=(ExprIterator Other) const { return Idx != Other.Idx; }
|
||||
const MCInst *operator*() {
|
||||
assert(Idx != -1 && "Invalid access to end iterator");
|
||||
return Expressions[Idx];
|
||||
}
|
||||
ExprIterator(const BitVector *BV, const std::vector<const MCInst *> &Exprs)
|
||||
: BV(BV), Expressions(Exprs) {
|
||||
Idx = BV->find_first();
|
||||
}
|
||||
ExprIterator(const BitVector *BV, const std::vector<const MCInst *> &Exprs,
|
||||
int Idx)
|
||||
: BV(BV), Expressions(Exprs), Idx(Idx) {}
|
||||
|
||||
private:
|
||||
const BitVector *BV;
|
||||
const std::vector<const MCInst *> &Expressions;
|
||||
public:
|
||||
int Idx;
|
||||
};
|
||||
ExprIterator expr_begin(const BitVector &BV) const {
|
||||
return ExprIterator(&BV, Expressions);
|
||||
}
|
||||
ExprIterator expr_begin(const MCInst &Point) const {
|
||||
auto Iter = StateAtPoint.find(&Point);
|
||||
if (Iter == StateAtPoint.end())
|
||||
return expr_end();
|
||||
return ExprIterator(&Iter->second, Expressions);
|
||||
}
|
||||
ExprIterator expr_begin(const BinaryBasicBlock &BB) const {
|
||||
auto Iter = StateAtBBEntry.find(&BB);
|
||||
if (Iter == StateAtBBEntry.end())
|
||||
return expr_end();
|
||||
return ExprIterator(&Iter->second, Expressions);
|
||||
}
|
||||
ExprIterator expr_end() const {
|
||||
return ExprIterator(nullptr, Expressions, -1);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Reference to the result of stack frame analysis
|
||||
const FrameOptimizerPass &FOP;
|
||||
const FrameOptimizerPass::FrameIndexMapTy &FrameIndexMap;
|
||||
const BinaryContext &BC;
|
||||
|
||||
/// Used to size the set of expressions/definitions being tracked by the
|
||||
/// dataflow analysis
|
||||
uint64_t NumInstrs{0};
|
||||
/// We put every MCInst we want to track (which one representing an
|
||||
/// expression/def) into a vector because we need to associate them with
|
||||
/// small numbers. They will be tracked via BitVectors throughout the
|
||||
/// dataflow analysis.
|
||||
std::vector<const MCInst *> Expressions;
|
||||
/// Maps expressions defs (MCInsts) to its index in the Expressions vector
|
||||
std::unordered_map<const MCInst *, uint64_t> ExprToIdx;
|
||||
|
||||
void preflight() override {
|
||||
DEBUG(dbgs() << "Starting StackAvailableExpressions on \""
|
||||
<< Func.getPrintName() << "\"\n");
|
||||
|
||||
// Populate our universe of tracked expressions. We are interested in
|
||||
// tracking available stores to frame position at any given point of the
|
||||
// program.
|
||||
for (auto &BB : Func) {
|
||||
for (auto &Inst : BB) {
|
||||
auto FIEIter = FrameIndexMap.find(&Inst);
|
||||
if (FIEIter == FrameIndexMap.end())
|
||||
continue;
|
||||
const auto &FIE = FIEIter->second;
|
||||
if (FIE.IsLoad == false && FIE.IsSimple == true) {
|
||||
Expressions.push_back(&Inst);
|
||||
ExprToIdx[&Inst] = NumInstrs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BitVector getStartingStateAtBB(const BinaryBasicBlock &BB) override {
|
||||
// Entry points start with empty set (Function entry and landing pads).
|
||||
// All others start with the full set.
|
||||
if (BB.pred_size() == 0)
|
||||
return BitVector(NumInstrs, false);
|
||||
return BitVector(NumInstrs, true);
|
||||
}
|
||||
|
||||
BitVector getStartingStateAtPoint(const MCInst &Point) override {
|
||||
return BitVector(NumInstrs, true);
|
||||
}
|
||||
|
||||
void doConfluence(BitVector &StateOut, const BitVector &StateIn) override {
|
||||
StateOut &= StateIn;
|
||||
}
|
||||
|
||||
/// Define the function computing the kill set -- whether expression Y, a
|
||||
/// tracked expression, will be considered to be dead after executing X.
|
||||
bool doesXKillsY(const MCInst *X, const MCInst *Y) {
|
||||
// if both are stores, and both store to the same stack location, return
|
||||
// true
|
||||
auto FIEIterX = FrameIndexMap.find(X);
|
||||
auto FIEIterY = FrameIndexMap.find(Y);
|
||||
if (FIEIterX != FrameIndexMap.end() && FIEIterY != FrameIndexMap.end()) {
|
||||
const FrameOptimizerPass::FrameIndexEntry &FIEX = FIEIterX->second;
|
||||
const FrameOptimizerPass::FrameIndexEntry &FIEY = FIEIterY->second;;
|
||||
if (FIEX.IsLoad == 0 && FIEY.IsLoad == 0 &&
|
||||
FIEX.StackOffset + FIEX.Size > FIEY.StackOffset &&
|
||||
FIEX.StackOffset < FIEY.StackOffset + FIEY.Size)
|
||||
return true;
|
||||
}
|
||||
// getClobberedRegs for X and Y. If they intersect, return true
|
||||
BitVector XClobbers = BitVector(BC.MRI->getNumRegs(), false);
|
||||
BitVector YClobbers = BitVector(BC.MRI->getNumRegs(), false);
|
||||
FOP.getInstClobberList(BC, *X, XClobbers);
|
||||
// If Y is a store to stack, its clobber list is its source reg. This is
|
||||
// different than the rest because we want to check if the store source
|
||||
// reaches its corresponding load untouched.
|
||||
if (FIEIterY != FrameIndexMap.end() && FIEIterY->second.IsLoad == 0 &&
|
||||
FIEIterY->second.IsStoreFromReg) {
|
||||
YClobbers.set(FIEIterY->second.RegOrImm);
|
||||
} else {
|
||||
FOP.getInstClobberList(BC, *Y, YClobbers);
|
||||
}
|
||||
XClobbers &= YClobbers;
|
||||
return XClobbers.any();
|
||||
}
|
||||
|
||||
BitVector computeNext(const MCInst &Point, const BitVector &Cur) override {
|
||||
BitVector Next = Cur;
|
||||
// Kill
|
||||
for (auto I = expr_begin(Next), E = expr_end(); I != E; ++I) {
|
||||
assert(*I != nullptr && "Lost pointers");
|
||||
DEBUG(dbgs() << "\t\t\tDoes it kill ");
|
||||
DEBUG((*I)->dump());
|
||||
if (doesXKillsY(&Point, *I)) {
|
||||
DEBUG(dbgs() << "\t\t\t\tYes\n");
|
||||
Next.reset(I.Idx);
|
||||
}
|
||||
};
|
||||
// Gen
|
||||
auto FIEIter = FrameIndexMap.find(&Point);
|
||||
if (FIEIter != FrameIndexMap.end() &&
|
||||
FIEIter->second.IsLoad == false &&
|
||||
FIEIter->second.IsSimple == true)
|
||||
Next.set(ExprToIdx[&Point]);
|
||||
return Next;
|
||||
}
|
||||
};
|
||||
|
||||
class StackPointerTracking : public ForwardDataflow<int> {
|
||||
const BinaryContext &BC;
|
||||
|
||||
void preflight() override {
|
||||
DEBUG(dbgs() << "Starting StackPointerTracking on \""
|
||||
<< Func.getPrintName() << "\"\n");
|
||||
}
|
||||
|
||||
int getStartingStateAtBB(const BinaryBasicBlock &BB) override {
|
||||
// Entry BB start with offset 8 from CFA.
|
||||
// All others start with EMPTY (meaning we don't know anything).
|
||||
if (BB.isEntryPoint())
|
||||
return -8;
|
||||
return EMPTY;
|
||||
}
|
||||
|
||||
int getStartingStateAtPoint(const MCInst &Point) override {
|
||||
return EMPTY;
|
||||
}
|
||||
|
||||
void doConfluence(int &StateOut, const int &StateIn) override {
|
||||
if (StateOut == EMPTY) {
|
||||
StateOut = StateIn;
|
||||
return;
|
||||
}
|
||||
if (StateIn == EMPTY || StateIn == StateOut)
|
||||
return;
|
||||
|
||||
// We can't agree on a specific value from this point on
|
||||
StateOut = SUPERPOSITION;
|
||||
}
|
||||
|
||||
int computeNext(const MCInst &Point, const int &Cur) override {
|
||||
const auto &MIA = BC.MIA;
|
||||
|
||||
if (Cur == EMPTY || Cur == SUPERPOSITION)
|
||||
return Cur;
|
||||
|
||||
if (int Sz = MIA->getPushSize(Point))
|
||||
return Cur - Sz;
|
||||
|
||||
if (int Sz = MIA->getPopSize(Point))
|
||||
return Cur + Sz;
|
||||
|
||||
if (BC.MII->get(Point.getOpcode())
|
||||
.hasDefOfPhysReg(Point, MIA->getStackPointer(), *BC.MRI)) {
|
||||
int64_t Offset = Cur;
|
||||
if (!MIA->evaluateSimple(Point, Offset, std::make_pair(0, 0),
|
||||
std::make_pair(0, 0)))
|
||||
return SUPERPOSITION;
|
||||
|
||||
return static_cast<int>(Offset);
|
||||
}
|
||||
|
||||
return Cur;
|
||||
}
|
||||
public:
|
||||
StackPointerTracking(const BinaryContext &BC, const BinaryFunction &BF)
|
||||
: ForwardDataflow(BF), BC(BC) {}
|
||||
virtual ~StackPointerTracking() {}
|
||||
|
||||
static constexpr int SUPERPOSITION = std::numeric_limits<int>::max();
|
||||
static constexpr int EMPTY = std::numeric_limits<int>::min();
|
||||
};
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
bool FrameOptimizerPass::restoreFrameIndex(const BinaryContext &BC,
|
||||
const BinaryFunction &BF) {
|
||||
StackPointerTracking SPT(BC, BF);
|
||||
|
||||
SPT.run();
|
||||
|
||||
// Vars used for storing useful CFI info to give us a hint about how the stack
|
||||
// is used in this function
|
||||
int64_t CfaOffset{-8};
|
||||
uint16_t CfaReg{7};
|
||||
bool CfaRegLocked{false};
|
||||
uint16_t CfaRegLockedVal{0};
|
||||
std::stack<std::pair<int64_t, uint16_t>> CFIStack;
|
||||
|
||||
DEBUG(dbgs() << "Restoring frame indices for \"" << BF.getPrintName()
|
||||
<< "\"\n");
|
||||
|
||||
// TODO: Implement SP tracking and improve this analysis
|
||||
for (auto &BB : BF) {
|
||||
DEBUG(dbgs() <<"\tNow at BB " << BB.getName() << "\n");
|
||||
|
||||
const MCInst *Prev = nullptr;
|
||||
for (const auto &Inst : BB) {
|
||||
int SPOffset = (Prev ? *SPT.getStateAt(*Prev) : *SPT.getStateAt(BB));
|
||||
DEBUG({
|
||||
dbgs() << "\t\tNow at ";
|
||||
Inst.dump();
|
||||
dbgs() << "\t\t\tSP offset is " << SPOffset << "\n";
|
||||
});
|
||||
Prev = &Inst;
|
||||
// Use CFI information to keep track of which register is being used to
|
||||
// access the frame
|
||||
if (BC.MIA->isCFI(Inst)) {
|
||||
const auto *CFI = BF.getCFIFor(Inst);
|
||||
switch (CFI->getOperation()) {
|
||||
case MCCFIInstruction::OpDefCfa:
|
||||
CfaOffset = CFI->getOffset();
|
||||
// Fall-through
|
||||
case MCCFIInstruction::OpDefCfaRegister:
|
||||
CfaReg = CFI->getRegister();
|
||||
break;
|
||||
case MCCFIInstruction::OpDefCfaOffset:
|
||||
CfaOffset = CFI->getOffset();
|
||||
break;
|
||||
case MCCFIInstruction::OpRememberState:
|
||||
CFIStack.push(std::make_pair(CfaOffset, CfaReg));
|
||||
break;
|
||||
case MCCFIInstruction::OpRestoreState: {
|
||||
assert(!CFIStack.empty() && "Corrupt CFI stack");
|
||||
auto &Elem = CFIStack.top();
|
||||
CFIStack.pop();
|
||||
CfaOffset = Elem.first;
|
||||
CfaReg = Elem.second;
|
||||
break;
|
||||
}
|
||||
case MCCFIInstruction::OpAdjustCfaOffset:
|
||||
llvm_unreachable("Unhandled AdjustCfaOffset");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (BC.MIA->leaksStackAddress(Inst, *BC.MRI, false)) {
|
||||
DEBUG(dbgs() << "Leaked stack address, giving up on this function.\n");
|
||||
DEBUG(dbgs() << "Blame insn: ");
|
||||
DEBUG(Inst.dump());
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsLoad = false;
|
||||
bool IsStore = false;
|
||||
bool IsStoreFromReg = false;
|
||||
bool IsSimple = false;
|
||||
int32_t SrcImm{0};
|
||||
MCPhysReg Reg{0};
|
||||
MCPhysReg StackPtrReg{0};
|
||||
int64_t StackOffset{0};
|
||||
uint8_t Size{0};
|
||||
bool IsIndexed = false;
|
||||
if (BC.MIA->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg,
|
||||
SrcImm, StackPtrReg, StackOffset, Size,
|
||||
IsSimple, IsIndexed)) {
|
||||
assert(Size != 0);
|
||||
if (CfaRegLocked && CfaRegLockedVal != CfaReg) {
|
||||
DEBUG(dbgs() << "CFA reg changed, giving up on this function.\n");
|
||||
return false;
|
||||
}
|
||||
if (StackPtrReg != BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false)) {
|
||||
if (StackPtrReg != BC.MIA->getStackPointer() ||
|
||||
SPOffset == SPT.EMPTY || SPOffset == SPT.SUPERPOSITION) {
|
||||
DEBUG(dbgs()
|
||||
<< "Found stack access with reg different than cfa reg.\n");
|
||||
DEBUG(dbgs() << "\tCurrent CFA reg: " << CfaReg
|
||||
<< "\n\tStack access reg: " << StackPtrReg << "\n");
|
||||
DEBUG(dbgs() << "Blame insn: ");
|
||||
DEBUG(Inst.dump());
|
||||
return false;
|
||||
}
|
||||
DEBUG(dbgs() << "Adding access via SP while CFA reg is another one\n");
|
||||
if (IsStoreFromReg || IsLoad)
|
||||
SrcImm = Reg;
|
||||
// Ignore accesses to the previous stack frame
|
||||
if (SPOffset + StackOffset >= 0)
|
||||
continue;
|
||||
FrameIndexMap.emplace(
|
||||
&Inst, FrameIndexEntry{IsLoad, IsStoreFromReg, SrcImm,
|
||||
SPOffset + StackOffset, Size, IsSimple});
|
||||
} else {
|
||||
CfaRegLocked = true;
|
||||
CfaRegLockedVal = CfaReg;
|
||||
if (IsStoreFromReg || IsLoad)
|
||||
SrcImm = Reg;
|
||||
// Ignore accesses to the previous stack frame
|
||||
if (CfaOffset + StackOffset >= 0)
|
||||
continue;
|
||||
FrameIndexMap.emplace(
|
||||
&Inst, FrameIndexEntry{IsLoad, IsStoreFromReg, SrcImm,
|
||||
CfaOffset + StackOffset, Size, IsSimple});
|
||||
}
|
||||
|
||||
DEBUG_WITH_TYPE("fop",
|
||||
dbgs() << "Frame index annotation added to:\n";
|
||||
BC.printInstruction(dbgs(), Inst, 0, &BF, true);
|
||||
dbgs() << " FrameIndexEntry <IsLoad:" << IsLoad << " StackOffset:";
|
||||
if (FrameIndexMap[&Inst].StackOffset < 0)
|
||||
dbgs() << "-" << Twine::utohexstr(-FrameIndexMap[&Inst].StackOffset);
|
||||
else
|
||||
dbgs() << "+" << Twine::utohexstr(FrameIndexMap[&Inst].StackOffset);
|
||||
dbgs() << " IsStoreFromReg:" << FrameIndexMap[&Inst].IsStoreFromReg
|
||||
<< " RegOrImm:" << FrameIndexMap[&Inst].RegOrImm << ">\n";
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void FrameOptimizerPass::removeUnnecessarySpills(const BinaryContext &BC,
|
||||
BinaryFunction &BF) {
|
||||
StackAvailableExpressions SAE(*this, BC, BF);
|
||||
|
||||
void FrameOptimizerPass::removeUnnecessaryLoads(const FrameAnalysis &FA,
|
||||
const BinaryContext &BC,
|
||||
BinaryFunction &BF) {
|
||||
StackAvailableExpressions SAE(FA, BC, BF);
|
||||
SAE.run();
|
||||
|
||||
DEBUG(dbgs() << "Performing frame optimization\n");
|
||||
DEBUG(dbgs() << "Performing unnecessary loads removal\n");
|
||||
std::deque<std::pair<BinaryBasicBlock *, MCInst *>> ToErase;
|
||||
bool Changed = false;
|
||||
const auto ExprEnd = SAE.expr_end();
|
||||
@@ -648,16 +71,16 @@ void FrameOptimizerPass::removeUnnecessarySpills(const BinaryContext &BC,
|
||||
// if Inst is a load from stack and the current available expressions show
|
||||
// this value is available in a register or immediate, replace this load
|
||||
// with move from register or from immediate.
|
||||
const auto Iter = FrameIndexMap.find(&Inst);
|
||||
if (Iter == FrameIndexMap.end()) {
|
||||
auto FIEX = FA.getFIEFor(BC, Inst);
|
||||
if (!FIEX) {
|
||||
Prev = &Inst;
|
||||
continue;
|
||||
}
|
||||
const FrameIndexEntry &FIEX = Iter->second;
|
||||
// FIXME: Change to remove IsSimple == 0. We're being conservative here,
|
||||
// but once replaceMemOperandWithReg is ready, we should feed it with all
|
||||
// sorts of complex instructions.
|
||||
if (FIEX.IsLoad == 0 || FIEX.IsSimple == 0) {
|
||||
if (FIEX->IsLoad == false || FIEX->IsSimple == false ||
|
||||
FIEX->StackOffset >= 0) {
|
||||
Prev = &Inst;
|
||||
continue;
|
||||
}
|
||||
@@ -665,13 +88,14 @@ void FrameOptimizerPass::removeUnnecessarySpills(const BinaryContext &BC,
|
||||
for (auto I = Prev ? SAE.expr_begin(*Prev) : SAE.expr_begin(BB);
|
||||
I != ExprEnd; ++I) {
|
||||
const MCInst *AvailableInst = *I;
|
||||
const auto Iter = FrameIndexMap.find(AvailableInst);
|
||||
if (Iter == FrameIndexMap.end())
|
||||
auto FIEY = FA.getFIEFor(BC, *AvailableInst);
|
||||
if (!FIEY)
|
||||
continue;
|
||||
|
||||
const FrameIndexEntry &FIEY = Iter->second;
|
||||
assert(FIEY.IsLoad == 0 && FIEY.IsSimple != 0);
|
||||
if (FIEX.StackOffset != FIEY.StackOffset || FIEX.Size != FIEY.Size)
|
||||
assert(FIEY->IsStore && FIEY->IsSimple);
|
||||
if (FIEX->StackOffset != FIEY->StackOffset || FIEX->Size != FIEY->Size)
|
||||
continue;
|
||||
// TODO: Change push/pops to stack adjustment instruction
|
||||
if (BC.MIA->isPop(Inst))
|
||||
continue;
|
||||
|
||||
++NumRedundantLoads;
|
||||
@@ -682,12 +106,13 @@ void FrameOptimizerPass::removeUnnecessarySpills(const BinaryContext &BC,
|
||||
DEBUG(AvailableInst->dump());
|
||||
DEBUG(dbgs() << "@BB: " << BB.getName() << "\n");
|
||||
// Replace load
|
||||
if (FIEY.IsStoreFromReg) {
|
||||
if (!BC.MIA->replaceMemOperandWithReg(Inst, FIEY.RegOrImm)) {
|
||||
if (FIEY->IsStoreFromReg) {
|
||||
if (!BC.MIA->replaceMemOperandWithReg(Inst, FIEY->RegOrImm)) {
|
||||
DEBUG(dbgs() << "FAILED to change operand to a reg\n");
|
||||
break;
|
||||
}
|
||||
++NumLoadsChangedToReg;
|
||||
BC.MIA->removeAnnotation(Inst, "FrameAccessEntry");
|
||||
DEBUG(dbgs() << "Changed operand to a reg\n");
|
||||
if (BC.MIA->isRedundantMove(Inst)) {
|
||||
++NumLoadsDeleted;
|
||||
@@ -697,12 +122,13 @@ void FrameOptimizerPass::removeUnnecessarySpills(const BinaryContext &BC,
|
||||
}
|
||||
} else {
|
||||
char Buf[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||
support::ulittle64_t::ref(Buf + 0) = FIEY.RegOrImm;
|
||||
support::ulittle64_t::ref(Buf + 0) = FIEY->RegOrImm;
|
||||
DEBUG(dbgs() << "Changing operand to an imm... ");
|
||||
if (!BC.MIA->replaceMemOperandWithImm(Inst, StringRef(Buf, 8), 0)) {
|
||||
DEBUG(dbgs() << "FAILED\n");
|
||||
} else {
|
||||
++NumLoadsChangedToImm;
|
||||
BC.MIA->removeAnnotation(Inst, "FrameAccessEntry");
|
||||
DEBUG(dbgs() << "Ok\n");
|
||||
}
|
||||
}
|
||||
@@ -716,71 +142,130 @@ void FrameOptimizerPass::removeUnnecessarySpills(const BinaryContext &BC,
|
||||
if (Changed) {
|
||||
DEBUG(dbgs() << "FOP modified \"" << BF.getPrintName() << "\"\n");
|
||||
}
|
||||
// TODO: Implement an interface of eraseInstruction that works out the
|
||||
// complete list of elements to remove.
|
||||
for (auto I : ToErase) {
|
||||
I.first->eraseInstruction(I.second);
|
||||
}
|
||||
}
|
||||
|
||||
void FrameOptimizerPass::removeUnusedStores(const FrameAnalysis &FA,
|
||||
const BinaryContext &BC,
|
||||
BinaryFunction &BF) {
|
||||
StackReachingUses SRU(FA, BC, BF);
|
||||
SRU.run();
|
||||
|
||||
DEBUG(dbgs() << "Performing unused stores removal\n");
|
||||
std::vector<std::pair<BinaryBasicBlock *, MCInst *>> ToErase;
|
||||
bool Changed = false;
|
||||
for (auto &BB : BF) {
|
||||
DEBUG(dbgs() <<"\tNow at BB " << BB.getName() << "\n");
|
||||
const MCInst *Prev = nullptr;
|
||||
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
|
||||
auto &Inst = *I;
|
||||
DEBUG({
|
||||
dbgs() << "\t\tNow at ";
|
||||
Inst.dump();
|
||||
for (auto I = Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB);
|
||||
I != SRU.expr_end(); ++I) {
|
||||
dbgs() << "\t\t\tReached by: ";
|
||||
(*I)->dump();
|
||||
}
|
||||
});
|
||||
auto FIEX = FA.getFIEFor(BC, Inst);
|
||||
if (!FIEX) {
|
||||
Prev = &Inst;
|
||||
continue;
|
||||
}
|
||||
if (FIEX->IsLoad || !FIEX->IsSimple || FIEX->StackOffset >= 0) {
|
||||
Prev = &Inst;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (SRU.isStoreUsed(*FIEX,
|
||||
Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB))) {
|
||||
Prev = &Inst;
|
||||
continue;
|
||||
}
|
||||
// TODO: Change push/pops to stack adjustment instruction
|
||||
if (BC.MIA->isPush(Inst))
|
||||
continue;
|
||||
|
||||
++NumRedundantStores;
|
||||
Changed = true;
|
||||
DEBUG(dbgs() << "Unused store instruction: ");
|
||||
DEBUG(Inst.dump());
|
||||
DEBUG(dbgs() << "@BB: " << BB.getName() << "\n");
|
||||
// Delete it!
|
||||
ToErase.push_back(std::make_pair(&BB, &Inst));
|
||||
Prev = &Inst;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto I : ToErase) {
|
||||
I.first->eraseInstruction(I.second);
|
||||
}
|
||||
if (Changed) {
|
||||
DEBUG(dbgs() << "FOP modified \"" << BF.getPrintName() << "\"\n");
|
||||
}
|
||||
}
|
||||
|
||||
void FrameOptimizerPass::runOnFunctions(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
std::set<uint64_t> &) {
|
||||
uint64_t NumFunctionsNotOptimized{0};
|
||||
uint64_t NumFunctionsFailedRestoreFI{0};
|
||||
uint64_t CountFunctionsNotOptimized{0};
|
||||
uint64_t CountFunctionsFailedRestoreFI{0};
|
||||
uint64_t CountDenominator{0};
|
||||
Cg = buildCallGraph(BC, BFs);
|
||||
TopologicalCGOrder = Cg.buildTraversalOrder();
|
||||
buildClobberMap(BC);
|
||||
std::set<uint64_t> &LargeFunctions) {
|
||||
if (opts::FrameOptimization == FOP_NONE)
|
||||
return;
|
||||
|
||||
// Run FrameAnalysis pass
|
||||
FrameAnalysis FA(PrintPass);
|
||||
FA.runOnFunctions(BC, BFs, LargeFunctions);
|
||||
|
||||
// Our main loop: perform caller-saved register optimizations, then
|
||||
// callee-saved register optimizations (shrink wrapping).
|
||||
for (auto &I : BFs) {
|
||||
auto Count = I.second.getExecutionCount();
|
||||
if (Count != BinaryFunction::COUNT_NO_PROFILE)
|
||||
CountDenominator += Count;
|
||||
if (!shouldOptimize(I.second)) {
|
||||
++NumFunctionsNotOptimized;
|
||||
if (Count != BinaryFunction::COUNT_NO_PROFILE)
|
||||
CountFunctionsNotOptimized += Count;
|
||||
if (!FA.hasFrameInfo(I.second))
|
||||
continue;
|
||||
// Restrict pass execution if user asked to only run on hot functions
|
||||
if (opts::FrameOptimization == FOP_HOT) {
|
||||
if (I.second.getKnownExecutionCount() < BC.getHotThreshold())
|
||||
continue;
|
||||
DEBUG(dbgs() << "Considering " << I.second.getPrintName()
|
||||
<< " for frame optimizations because its execution count ( "
|
||||
<< I.second.getKnownExecutionCount()
|
||||
<< " ) exceeds our hotness threshold ( "
|
||||
<< BC.getHotThreshold() << " )\n");
|
||||
}
|
||||
if (!restoreFrameIndex(BC, I.second)) {
|
||||
++NumFunctionsFailedRestoreFI;
|
||||
auto Count = I.second.getExecutionCount();
|
||||
if (Count != BinaryFunction::COUNT_NO_PROFILE)
|
||||
CountFunctionsFailedRestoreFI += Count;
|
||||
{
|
||||
NamedRegionTimer T1("remove loads", "FOP breakdown", true);
|
||||
removeUnnecessaryLoads(FA, BC, I.second);
|
||||
}
|
||||
{
|
||||
NamedRegionTimer T1("remove stores", "FOP breakdown", true);
|
||||
removeUnusedStores(FA, BC, I.second);
|
||||
}
|
||||
// Don't even start shrink wrapping if no profiling info is available
|
||||
if (I.second.getKnownExecutionCount() == 0)
|
||||
continue;
|
||||
{
|
||||
NamedRegionTimer T1("move spills", "FOP breakdown", true);
|
||||
DataflowInfoManager Info(&FA, BC, I.second);
|
||||
ShrinkWrapping SW(FA, BC, I.second, Info);
|
||||
SW.perform();
|
||||
}
|
||||
removeUnnecessarySpills(BC, I.second);
|
||||
}
|
||||
|
||||
FA.cleanAnnotations(BC, BFs);
|
||||
|
||||
outs() << "BOLT-INFO: FOP optimized " << NumRedundantLoads
|
||||
<< " redundant load(s).\n";
|
||||
|
||||
if (opts::Verbosity == 0) {
|
||||
#ifndef NDEBUG
|
||||
if (!DebugFlag || !isCurrentDebugType("fop"))
|
||||
return;
|
||||
#else
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
<< " redundant load(s) and " << NumRedundantStores
|
||||
<< " unused store(s)\n";
|
||||
outs() << "BOLT-INFO: FOP changed " << NumLoadsChangedToReg
|
||||
<< " load(s) to use a register instead of a stack access, and "
|
||||
<< NumLoadsChangedToImm << " to use an immediate.\n"
|
||||
<< "BOLT-INFO: FOP deleted " << NumLoadsDeleted << " load(s).\n"
|
||||
<< "BOLT-INFO: FOP: Number of functions conservatively treated as "
|
||||
"clobbering all registers: "
|
||||
<< NumFunctionsAllClobber
|
||||
<< format(" (%.1lf%% dyn cov)\n",
|
||||
(100.0 * CountFunctionsAllClobber / CountDenominator))
|
||||
<< "BOLT-INFO: FOP: " << NumFunctionsNotOptimized << " function(s) "
|
||||
<< format("(%.1lf%% dyn cov)",
|
||||
(100.0 * CountFunctionsNotOptimized / CountDenominator))
|
||||
<< " were not optimized.\n"
|
||||
<< "BOLT-INFO: FOP: " << NumFunctionsFailedRestoreFI << " function(s) "
|
||||
<< format("(%.1lf%% dyn cov)",
|
||||
(100.0 * CountFunctionsFailedRestoreFI / CountDenominator))
|
||||
<< " could not have its frame indices restored.\n";
|
||||
<< "BOLT-INFO: FOP deleted " << NumLoadsDeleted << " load(s) and "
|
||||
<< NumRedundantStores << " store(s).\n";
|
||||
FA.printStats();
|
||||
ShrinkWrapping::printStats();
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
|
||||
@@ -13,31 +13,40 @@
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_FRAMEOPTIMIZER_H
|
||||
|
||||
#include "BinaryPasses.h"
|
||||
#include "BinaryFunctionCallGraph.h"
|
||||
#include "FrameAnalysis.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
/// FrameOptimizerPass strives for removing unnecessary stack frame accesses.
|
||||
/// For example, caller-saved registers may be conservatively pushed to the
|
||||
/// stack because the callee may write to these registers. But if we can prove
|
||||
/// the callee will never touch these registers, we can remove this spill.
|
||||
/// FrameOptimizerPass strives for removing or moving stack frame accesses to
|
||||
/// less frequently executed basic blocks, reducing the pressure on icache
|
||||
/// usage as well as dynamic instruction count.
|
||||
///
|
||||
/// This optimization analyzes the call graph and first compute the set of
|
||||
/// This is accomplished by analyzing both caller-saved register spills and
|
||||
/// callee-saved register spills. This class handles the former while delegating
|
||||
/// the latter to the class ShrinkWrapping. We discuss caller-saved register
|
||||
/// spills optimization below.
|
||||
///
|
||||
/// Caller-saved registers must be conservatively pushed to the stack because
|
||||
/// the callee may write to these registers. If we can prove the callee will
|
||||
/// never touch these registers, we can remove this spill.
|
||||
///
|
||||
/// This optimization analyzes the call graph and first computes the set of
|
||||
/// registers that may get overwritten when executing a function (this includes
|
||||
/// the set of registers touched by all functions this function may call during
|
||||
/// its execution).
|
||||
/// its execution) -- see the FrameAnalysis class for implementation details.
|
||||
///
|
||||
/// The second step is to perform an alias analysis to disambiguate which stack
|
||||
/// position is being accessed by each load/store instruction, and annotate
|
||||
/// these instructions.
|
||||
/// The second step is to perform an analysis to disambiguate which stack
|
||||
/// position is being accessed by each load/store instruction -- see the
|
||||
/// FrameAnalysis class.
|
||||
///
|
||||
/// The third step performs a forward dataflow analysis, using intersection as
|
||||
/// the confluence operator, to propagate information about available
|
||||
/// stack definitions at each point of the program. This definition shows
|
||||
/// an equivalence between the value in a stack position and the value of a
|
||||
/// register or immediate. To have those preserved, both register and the value
|
||||
/// in the stack position cannot be touched by another instruction.
|
||||
/// stack definitions at each point of the program. See the
|
||||
/// StackAvailableExpressions class. This definition shows an equivalence
|
||||
/// between the value in a stack position and the value of a register or
|
||||
/// immediate. To have those preserved, both register and the value in the stack
|
||||
/// position cannot be touched by another instruction.
|
||||
/// These definitions we are tracking occur in the form:
|
||||
///
|
||||
/// stack def: MEM[FRAME - 0x5c] <= RAX
|
||||
@@ -62,86 +71,29 @@ namespace bolt {
|
||||
/// In this example, since the store source register is the same as the load
|
||||
/// destination register, this creates a redundant MOV that can be deleted.
|
||||
///
|
||||
/// Finally, another analysis propagates information about which instructions
|
||||
/// are using (loading from) a stack position -- see StackReachingUses. If a
|
||||
/// store sees no use of the value it is storing, it is eliminated.
|
||||
///
|
||||
class FrameOptimizerPass : public BinaryFunctionPass {
|
||||
/// Stats aggregating variables
|
||||
uint64_t NumRedundantLoads{0};
|
||||
uint64_t NumRedundantStores{0};
|
||||
uint64_t NumLoadsChangedToReg{0};
|
||||
uint64_t NumLoadsChangedToImm{0};
|
||||
uint64_t NumLoadsDeleted{0};
|
||||
/// Number of functions we conservatively marked as clobbering the entire set
|
||||
/// of registers because we couldn't fully understand it.
|
||||
uint64_t NumFunctionsAllClobber{0};
|
||||
/// Execution count of those functions to give us an idea of their dynamic
|
||||
/// coverage
|
||||
uint64_t CountFunctionsAllClobber{0};
|
||||
|
||||
/// Call graph info
|
||||
BinaryFunctionCallGraph Cg;
|
||||
/// Perform a dataflow analysis in \p BF to reveal unnecessary reloads from
|
||||
/// the frame. Use the analysis to convert memory loads to register moves or
|
||||
/// immediate loads. Delete redundant register moves.
|
||||
void removeUnnecessaryLoads(const FrameAnalysis &FA,
|
||||
const BinaryContext &BC,
|
||||
BinaryFunction &BF);
|
||||
|
||||
/// DFS or reverse post-ordering of the call graph nodes to allow us to
|
||||
/// traverse the call graph bottom-up
|
||||
std::deque<BinaryFunction *> TopologicalCGOrder;
|
||||
|
||||
/// Map functions to the set of registers they may overwrite starting at when
|
||||
/// it is called until it returns to the caller.
|
||||
std::map<const BinaryFunction *, BitVector> RegsKilledMap;
|
||||
|
||||
public:
|
||||
/// Alias analysis information attached to each instruction that accesses a
|
||||
/// frame position. This is called a "frame index" by LLVM Target libs when
|
||||
/// it is building a MachineFunction frame, and we use the same name here
|
||||
/// because we are essentially doing the job of frame reconstruction.
|
||||
struct FrameIndexEntry {
|
||||
/// If this is false, this instruction is necessarily a store
|
||||
bool IsLoad;
|
||||
/// If a store, this controls whether the store uses a register os an imm
|
||||
/// as the source value.
|
||||
bool IsStoreFromReg;
|
||||
/// If load, this holds the destination register. If store, this holds
|
||||
/// either the source register or source immediate.
|
||||
int32_t RegOrImm;
|
||||
|
||||
/// StackOffset and Size are the two aspects that identify this frame access
|
||||
/// for the purposes of alias analysis.
|
||||
int64_t StackOffset;
|
||||
uint8_t Size;
|
||||
|
||||
/// If this is false, we will never atempt to remove or optimize this
|
||||
/// instruction. We just use it to keep track of stores we don't fully
|
||||
/// understand but we know it may write to a frame position.
|
||||
bool IsSimple;
|
||||
};
|
||||
typedef std::unordered_map<const MCInst *, const FrameIndexEntry>
|
||||
FrameIndexMapTy;
|
||||
FrameIndexMapTy FrameIndexMap;
|
||||
|
||||
/// Compute the set of registers \p Inst may write to, marking them in
|
||||
/// \p KillSet. If this is a call, try to get the set of registers the call
|
||||
/// target will write to.
|
||||
void getInstClobberList(const BinaryContext &BC, const MCInst &Inst,
|
||||
BitVector &KillSet) const;
|
||||
private:
|
||||
/// Compute the set of registers \p Func may write to during its execution,
|
||||
/// starting at the point when it is called up until when it returns. Returns
|
||||
/// a BitVector the size of the target number of registers, representing the
|
||||
/// set of clobbered registers.
|
||||
BitVector getFunctionClobberList(const BinaryContext &BC,
|
||||
const BinaryFunction *Func);
|
||||
|
||||
/// Perform the step of building the set of registers clobbered by each
|
||||
/// function execution, populating RegsKilledMap.
|
||||
void buildClobberMap(const BinaryContext &BC);
|
||||
|
||||
/// Alias analysis to disambiguate which frame position is accessed by each
|
||||
/// instruction in function \p BF. Populates FrameIndexMap.
|
||||
bool restoreFrameIndex(const BinaryContext &BC, const BinaryFunction &BF);
|
||||
|
||||
/// Uses RegsKilledMap and FrameIndexMap to perform a dataflow analysis in
|
||||
/// \p BF to reveal unnecessary reloads from the frame. Use the analysis
|
||||
/// to convert memory loads to register moves or immediate loads. Delete
|
||||
/// redundant register moves.
|
||||
void removeUnnecessarySpills(const BinaryContext &BC,
|
||||
BinaryFunction &BF);
|
||||
/// Use information from stack frame usage to delete unused stores.
|
||||
void removeUnusedStores(const FrameAnalysis &FA,
|
||||
const BinaryContext &BC,
|
||||
BinaryFunction &BF);
|
||||
|
||||
public:
|
||||
explicit FrameOptimizerPass(const cl::opt<bool> &PrintPass)
|
||||
@@ -158,6 +110,7 @@ public:
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
#include "DataflowAnalysis.h"
|
||||
#include "FrameAnalysis.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
@@ -29,6 +30,18 @@ public:
|
||||
NumRegs(BC.MRI->getNumRegs()) {}
|
||||
virtual ~LivenessAnalysis();
|
||||
|
||||
bool isAlive(ProgramPoint PP, MCPhysReg Reg) const {
|
||||
BitVector BV = (*this->getStateAt(PP));
|
||||
const BitVector &RegAliases = BC.MIA->getAliases(Reg, *BC.MRI);
|
||||
BV &= RegAliases;
|
||||
return BV.any();
|
||||
}
|
||||
|
||||
void run() {
|
||||
NamedRegionTimer T1("LA", "Dataflow", true);
|
||||
DataflowAnalysis<LivenessAnalysis, BitVector, true>::run();
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Reference to the result of stack frame analysis
|
||||
const FrameAnalysis &FA;
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_REACHINGDEFORUSE_H
|
||||
|
||||
#include "DataflowAnalysis.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
@@ -50,6 +51,11 @@ public:
|
||||
return (*this->getStateAt(B))[this->ExprToIdx[&A]];
|
||||
}
|
||||
|
||||
void run() {
|
||||
NamedRegionTimer T1("RD", "Dataflow", true);
|
||||
InstrsDataflowAnalysis<ReachingDefOrUse<Def>, !Def>::run();
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Reference to the result of stack frame analysis
|
||||
const FrameAnalysis &FA;
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_REACHINGINSNS_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_REACHINGINSNS_H
|
||||
|
||||
#include "DataflowAnalysis.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
@@ -37,6 +40,11 @@ public:
|
||||
return isInLoop(*BB);
|
||||
}
|
||||
|
||||
void run() {
|
||||
NamedRegionTimer T1("RI", "Dataflow", true);
|
||||
InstrsDataflowAnalysis<ReachingInsns<Backward>, Backward>::run();
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unordered_map<const MCInst *, BinaryBasicBlock *> InsnToBB;
|
||||
|
||||
|
||||
1785
bolt/Passes/ShrinkWrapping.cpp
Normal file
1785
bolt/Passes/ShrinkWrapping.cpp
Normal file
File diff suppressed because it is too large
Load Diff
477
bolt/Passes/ShrinkWrapping.h
Normal file
477
bolt/Passes/ShrinkWrapping.h
Normal file
@@ -0,0 +1,477 @@
|
||||
//===--- Passes/ShrinkWrapping.h ------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_SHRINKWRAPPING_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_SHRINKWRAPPING_H
|
||||
|
||||
#include "BinaryPasses.h"
|
||||
#include "FrameAnalysis.h"
|
||||
#include "DataflowInfoManager.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
/// Encapsulates logic required to analyze a binary function and detect which
|
||||
/// registers are being saved as callee-saved, where are these saves and where
|
||||
/// are the points where their original value are being restored.
|
||||
class CalleeSavedAnalysis {
|
||||
const FrameAnalysis &FA;
|
||||
const BinaryContext &BC;
|
||||
BinaryFunction &BF;
|
||||
DataflowInfoManager &Info;
|
||||
|
||||
/// Compute all stores of callee-saved regs. Those are the ones that stores a
|
||||
/// register whose definition is not local.
|
||||
void analyzeSaves();
|
||||
|
||||
/// Similar to analyzeSaves, tries to determine all instructions that recover
|
||||
/// the original value of the callee-saved register before exiting the
|
||||
/// function.
|
||||
void analyzeRestores();
|
||||
|
||||
/// Returns the identifying string used to annotate instructions with metadata
|
||||
/// for this analysis. These are deleted in the destructor.
|
||||
static StringRef getSaveTag() {
|
||||
return StringRef("CSA-SavedReg");
|
||||
}
|
||||
static StringRef getRestoreTag() {
|
||||
return StringRef("CSA-RestoredReg");
|
||||
}
|
||||
|
||||
public:
|
||||
BitVector CalleeSaved;
|
||||
std::vector<int64_t> OffsetsByReg;
|
||||
BitVector HasRestores;
|
||||
std::vector<uint64_t> SavingCost;
|
||||
std::vector<const FrameIndexEntry*> SaveFIEByReg;
|
||||
std::vector<const FrameIndexEntry*> LoadFIEByReg;
|
||||
|
||||
CalleeSavedAnalysis(const FrameAnalysis &FA, const BinaryContext &BC,
|
||||
BinaryFunction &BF, DataflowInfoManager &Info)
|
||||
: FA(FA), BC(BC), BF(BF), Info(Info),
|
||||
CalleeSaved(BC.MRI->getNumRegs(), false),
|
||||
OffsetsByReg(BC.MRI->getNumRegs(), 0LL),
|
||||
HasRestores(BC.MRI->getNumRegs(), false),
|
||||
SavingCost(BC.MRI->getNumRegs(), 0ULL),
|
||||
SaveFIEByReg(BC.MRI->getNumRegs(), nullptr),
|
||||
LoadFIEByReg(BC.MRI->getNumRegs(), nullptr) {}
|
||||
|
||||
~CalleeSavedAnalysis();
|
||||
|
||||
void compute() {
|
||||
analyzeSaves();
|
||||
analyzeRestores();
|
||||
}
|
||||
|
||||
/// Retrieves the value of the callee-saved register that is saved by this
|
||||
/// instruction or 0 if this is not a CSR save instruction.
|
||||
uint16_t getSavedReg(const MCInst &Inst) {
|
||||
auto Val = BC.MIA->tryGetAnnotationAs<decltype(FrameIndexEntry::RegOrImm)>(
|
||||
Inst, getSaveTag());
|
||||
if (Val)
|
||||
return *Val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Retrieves the value of the callee-saved register that is restored by this
|
||||
/// instruction or 0 if this is not a CSR restore instruction.
|
||||
uint16_t getRestoredReg(const MCInst &Inst) {
|
||||
auto Val = BC.MIA->tryGetAnnotationAs<decltype(FrameIndexEntry::RegOrImm)>(
|
||||
Inst, getRestoreTag());
|
||||
if (Val)
|
||||
return *Val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Routines to compute all saves/restores for a Reg (needs to traverse all
|
||||
/// instructions).
|
||||
std::vector<MCInst *> getSavesByReg(uint16_t Reg);
|
||||
std::vector<MCInst *> getRestoresByReg(uint16_t Reg);
|
||||
};
|
||||
|
||||
/// Identifies in a given binary function all stack regions being used and allow
|
||||
/// us to edit the layout, removing or inserting new regions. When the layout is
|
||||
/// modified, all affected stack-accessing instructions are updated.
|
||||
class StackLayoutModifier {
|
||||
const FrameAnalysis &FA;
|
||||
const BinaryContext &BC;
|
||||
BinaryFunction &BF;
|
||||
DataflowInfoManager &Info;
|
||||
|
||||
// Keep track of stack slots we know how to safely move
|
||||
std::map<int64_t, int64_t> AvailableRegions;
|
||||
|
||||
DenseSet<int64_t> CollapsedRegions;
|
||||
DenseSet<int64_t> InsertedRegions;
|
||||
|
||||
// A map of chunks of stack memory we don't really know what's happening there
|
||||
// and we need to leave it untouched.
|
||||
std::map<int64_t, int64_t> BlacklistedRegions;
|
||||
|
||||
// Maps stack slots to the regs that are saved to them
|
||||
DenseMap<int64_t, std::set<MCPhysReg>> RegionToRegMap;
|
||||
DenseMap<int, std::set<int64_t>> RegToRegionMap;
|
||||
|
||||
// If we can't understand how to move stack slots, IsSimple will be false
|
||||
bool IsSimple{true};
|
||||
|
||||
bool IsInitialized{false};
|
||||
|
||||
public:
|
||||
// Keep a worklist of operations to perform on the function to perform
|
||||
// the requested layout modifications via collapseRegion()/insertRegion().
|
||||
struct WorklistItem {
|
||||
enum ActionType : uint8_t {
|
||||
None = 0,
|
||||
AdjustLoadStoreOffset,
|
||||
AdjustCFI,
|
||||
} Action;
|
||||
|
||||
int64_t OffsetUpdate{0};
|
||||
WorklistItem() : Action(None) {}
|
||||
WorklistItem(ActionType Action) : Action(Action) {}
|
||||
WorklistItem(ActionType Action, int OffsetUpdate)
|
||||
: Action(Action), OffsetUpdate(OffsetUpdate) {}
|
||||
};
|
||||
private:
|
||||
|
||||
/// Mark the stack region identified by \p Offset and \p Size to be a
|
||||
/// no-touch zone, whose accesses cannot be relocated to another region.
|
||||
void blacklistRegion(int64_t Offset, int64_t Size);
|
||||
|
||||
/// Check if this region overlaps with blacklisted addresses
|
||||
bool isRegionBlacklisted(int64_t Offset, int64_t Size);
|
||||
|
||||
/// Check if the region identified by \p Offset and \p Size has any conflicts
|
||||
/// with available regions so far. If it has, blacklist all involved regions
|
||||
/// and return true.
|
||||
bool blacklistAllInConflictWith(int64_t Offset, int64_t Size);
|
||||
|
||||
/// If \p Point is identified as frame pointer initialization (defining the
|
||||
/// value of FP with SP), check for non-standard initialization that precludes
|
||||
/// us from changing the stack layout. If positive, update blacklisted
|
||||
/// regions.
|
||||
void checkFramePointerInitialization(MCInst &Point);
|
||||
|
||||
/// Make sense of each stack offsets we can freely change
|
||||
void classifyStackAccesses();
|
||||
void classifyCFIs();
|
||||
|
||||
/// Used to keep track of modifications to the function that will later be
|
||||
/// performed by performChanges();
|
||||
void scheduleChange(MCInst &Inst, WorklistItem Item);
|
||||
static StringRef getTodoTagName() {
|
||||
return StringRef("SLM-TodoTag");
|
||||
}
|
||||
static StringRef getSlotTagName() {
|
||||
return StringRef("SLM-SlotTag");
|
||||
}
|
||||
static StringRef getOffsetCFIRegTagName() {
|
||||
return StringRef("SLM-OffsetCFIReg");
|
||||
}
|
||||
|
||||
public:
|
||||
StackLayoutModifier(const FrameAnalysis &FA, const BinaryContext &BC,
|
||||
BinaryFunction &BF, DataflowInfoManager &Info)
|
||||
: FA(FA), BC(BC), BF(BF), Info(Info) {}
|
||||
|
||||
~StackLayoutModifier() {
|
||||
for (auto &BB : BF) {
|
||||
for (auto &Inst : BB) {
|
||||
BC.MIA->removeAnnotation(Inst, getTodoTagName());
|
||||
BC.MIA->removeAnnotation(Inst, getSlotTagName());
|
||||
BC.MIA->removeAnnotation(Inst, getOffsetCFIRegTagName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieves the value of the callee-saved register that is restored by this
|
||||
/// instruction or 0 if this is not a CSR restore instruction.
|
||||
uint16_t getOffsetCFIReg(const MCInst &Inst) {
|
||||
auto Val =
|
||||
BC.MIA->tryGetAnnotationAs<uint16_t>(Inst, getOffsetCFIRegTagName());
|
||||
if (Val)
|
||||
return *Val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Check if it is possible to delete the push instruction \p DeletedPush.
|
||||
/// This involves collapsing the region accessed by this push and updating all
|
||||
/// other instructions that access affected memory regions. Return true if we
|
||||
/// can update this.
|
||||
bool canCollapseRegion(int64_t RegionAddr);
|
||||
bool canCollapseRegion(MCInst *DeletedPush);
|
||||
|
||||
/// Notify the layout manager that \p DeletedPush was deleted and that it
|
||||
/// needs to update other affected stack-accessing instructions.
|
||||
bool collapseRegion(MCInst *Alloc, int64_t RegionAddr, int64_t RegionSize);
|
||||
bool collapseRegion(MCInst *DeletedPush);
|
||||
|
||||
/// Set the new stack address difference for load/store instructions that
|
||||
/// referenced a stack location that was deleted via collapseRegion.
|
||||
void setOffsetForCollapsedAccesses(int64_t NewOffset);
|
||||
|
||||
/// Check if it is possible to insert a push instruction at point \p P.
|
||||
/// This involves inserting a new region in the stack, possibly affecting
|
||||
/// instructions that access the frame. Return true if we can update them all.
|
||||
bool canInsertRegion(ProgramPoint P);
|
||||
|
||||
/// Notify the layout manager that a new push instruction has been inserted
|
||||
/// at point \p P and that it will need to update relevant instructions.
|
||||
bool insertRegion(ProgramPoint P, int64_t RegionSz);
|
||||
|
||||
/// Perform all changes scheduled by collapseRegion()/insertRegion()
|
||||
void performChanges();
|
||||
|
||||
/// Perform initial assessment of the function trying to understand its stack
|
||||
/// accesses.
|
||||
void initialize();
|
||||
};
|
||||
|
||||
/// Implements a pass to optimize callee-saved register spills. These spills
|
||||
/// typically happen at function prologue/epilogue. When these are hot basic
|
||||
/// blocks, this pass will try to move these spills to cold blocks whenever
|
||||
/// possible.
|
||||
class ShrinkWrapping {
|
||||
const FrameAnalysis &FA;
|
||||
const BinaryContext &BC;
|
||||
BinaryFunction &BF;
|
||||
DataflowInfoManager &Info;
|
||||
StackLayoutModifier SLM;
|
||||
/// For each CSR, store a vector of all CFI indexes deleted as a consequence
|
||||
/// of moving this Callee-Saved Reg
|
||||
DenseMap<unsigned, std::vector<uint32_t>> DeletedPushCFIs;
|
||||
DenseMap<unsigned, std::vector<uint32_t>> DeletedPopCFIs;
|
||||
std::vector<bool> HasDeletedOffsetCFIs;
|
||||
SmallPtrSet<const MCCFIInstruction *, 16> UpdatedCFIs;
|
||||
std::vector<BitVector> UsesByReg;
|
||||
std::vector<int64_t> PushOffsetByReg;
|
||||
std::vector<int64_t> PopOffsetByReg;
|
||||
std::vector<MCPhysReg> DomOrder;
|
||||
CalleeSavedAnalysis CSA;
|
||||
std::vector<SmallPtrSet<MCInst *, 4>> SavePos;
|
||||
std::vector<uint64_t> BestSaveCount;
|
||||
std::vector<MCInst *> BestSavePos;
|
||||
|
||||
/// Pass stats
|
||||
static uint64_t SpillsMovedRegularMode;
|
||||
static uint64_t SpillsMovedPushPopMode;
|
||||
|
||||
/// Allow our custom worklist-sensitive analysis
|
||||
/// PredictiveStackPointerTracking to access WorklistItem
|
||||
public:
|
||||
struct WorklistItem {
|
||||
enum ActionType : uint8_t {
|
||||
Erase = 0,
|
||||
ChangeToAdjustment,
|
||||
InsertLoadOrStore,
|
||||
InsertPushOrPop
|
||||
} Action;
|
||||
FrameIndexEntry FIEToInsert;
|
||||
unsigned AffectedReg;
|
||||
int Adjustment{0};
|
||||
WorklistItem(ActionType Action, unsigned AffectedReg)
|
||||
: Action(Action), FIEToInsert(), AffectedReg(AffectedReg) {}
|
||||
WorklistItem(ActionType Action, unsigned AffectedReg, int Adjustment)
|
||||
: Action(Action), FIEToInsert(), AffectedReg(AffectedReg),
|
||||
Adjustment(Adjustment) {}
|
||||
WorklistItem(ActionType Action, const FrameIndexEntry &FIE,
|
||||
unsigned AffectedReg)
|
||||
: Action(Action), FIEToInsert(FIE), AffectedReg(AffectedReg) {}
|
||||
};
|
||||
|
||||
/// Insertion todo items scheduled to happen at the end of BBs. Since we
|
||||
/// can't annotate BBs we maintain this bookkeeping here.
|
||||
DenseMap<BinaryBasicBlock*, std::vector<WorklistItem>> Todo;
|
||||
|
||||
/// Annotation name used to tag instructions with removal or insertion actions
|
||||
static StringRef getAnnotationName() {
|
||||
return StringRef("ShrinkWrap-Todo");
|
||||
}
|
||||
private:
|
||||
using BBIterTy = BinaryBasicBlock::iterator;
|
||||
|
||||
/// Calculate all possible uses/defs of these callee-saved regs
|
||||
void classifyCSRUses();
|
||||
|
||||
// Ensure we don't work on cases where there are no uses of the callee-saved
|
||||
// register. These unnecessary spills should have been removed by previous
|
||||
// passes.
|
||||
void pruneUnwantedCSRs();
|
||||
|
||||
// Map regs to their possible save possibilities (at start of these BBs)
|
||||
void computeSaveLocations();
|
||||
|
||||
/// Look into the best save location found for saving callee-saved reg
|
||||
/// \p CSR and evaluates whether we would benefit by moving the spill to this
|
||||
/// new save location. Returns true in case it is profitable to perform the
|
||||
/// move.
|
||||
bool validateBestSavePos(unsigned CSR, MCInst *&BestPosSave,
|
||||
uint64_t &TotalEstimatedWin);
|
||||
|
||||
/// Populate the Todo map with worklistitems to change the function
|
||||
template <typename ...T>
|
||||
void scheduleChange(ProgramPoint PP, T&& ...Item) {
|
||||
if (PP.isInst()) {
|
||||
auto &WList = BC.MIA->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
|
||||
BC.Ctx.get(), *PP.getInst(), getAnnotationName());
|
||||
WList.emplace_back(std::forward<T>(Item)...);
|
||||
return;
|
||||
}
|
||||
// Avoid inserting on BBs with no instructions because we have a dataflow
|
||||
// analysis that depends on insertions happening before real instructions
|
||||
// (PredictiveStackPointerTracking)
|
||||
BinaryBasicBlock *BB = PP.getBB();
|
||||
if (BB->size() != 0) {
|
||||
Todo[BB].emplace_back(std::forward<T>(Item)...);
|
||||
return;
|
||||
}
|
||||
while (BB->size() == 0) {
|
||||
assert (BB->succ_size() == 1);
|
||||
BB = *BB->succ_begin();
|
||||
}
|
||||
auto &WList = BC.MIA->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
|
||||
BC.Ctx.get(), *BB->begin(), getAnnotationName());
|
||||
WList.emplace_back(std::forward<T>(Item)...);
|
||||
}
|
||||
|
||||
/// Determine the POP ordering according to which CSR save is the dominator.
|
||||
void computeDomOrder();
|
||||
|
||||
/// Check that the best possible location for a spill save (as determined by
|
||||
/// computeSaveLocations) is cold enough to be worth moving the save to it.
|
||||
/// \p CSR is the callee-saved register number, \p BestPosSave returns the
|
||||
/// pointer to the cold location in case the function returns true, while
|
||||
/// \p TotalEstimatedWin contains the ins dyn count reduction after moving.
|
||||
bool isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
|
||||
uint64_t &TotalEstimatedWin);
|
||||
|
||||
/// Auxiliary function used to create basic blocks for critical edges and
|
||||
/// update the dominance frontier with these new locations
|
||||
void splitFrontierCritEdges(
|
||||
BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
|
||||
const SmallVector<bool, 4> &IsCritEdge,
|
||||
const SmallVector<BinaryBasicBlock *, 4> &From,
|
||||
const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To);
|
||||
|
||||
/// After the best save location for a spill has been established in
|
||||
/// \p BestPosSave for reg \p CSR, compute adequate locations to restore
|
||||
/// the spilled value. This will be at the dominance frontier.
|
||||
/// Returns an empty vector if we failed. In case of success, set
|
||||
/// \p UsePushPops to true if we can operate in the push/pops mode.
|
||||
SmallVector<ProgramPoint, 4> doRestorePlacement(MCInst *BestPosSave,
|
||||
unsigned CSR,
|
||||
uint64_t TotalEstimatedWin);
|
||||
|
||||
/// Checks whether using push and pops (instead of the longer load-store
|
||||
/// counterparts) is correct for reg \p CSR
|
||||
bool validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
|
||||
int64_t SaveOffset);
|
||||
|
||||
/// Adjust restore locations to the correct SP offset if we are using POPs
|
||||
/// instead of random-access load instructions.
|
||||
SmallVector<ProgramPoint, 4>
|
||||
fixPopsPlacements(const SmallVector<ProgramPoint, 4> &RestorePoints,
|
||||
int64_t SaveOffset, unsigned CSR);
|
||||
|
||||
/// When moving spills, mark all old spill locations to be deleted
|
||||
void scheduleOldSaveRestoresRemoval(unsigned CSR, bool UsePushPops);
|
||||
/// Return true if \p Inst uses reg \p CSR
|
||||
bool doesInstUsesCSR(const MCInst &Inst, uint16_t CSR);
|
||||
/// When moving spills, mark all new spill locations for insertion
|
||||
void
|
||||
scheduleSaveRestoreInsertions(unsigned CSR, MCInst *BestPosSave,
|
||||
SmallVector<ProgramPoint, 4> &RestorePoints,
|
||||
bool UsePushPops);
|
||||
|
||||
/// Coordinate the replacement of callee-saved spills from their original
|
||||
/// place (at prologue and epilogues) to colder basic blocks as determined
|
||||
/// by computeSaveLocations().
|
||||
void moveSaveRestores();
|
||||
|
||||
/// After the spill locations for reg \p CSR has been moved and all affected
|
||||
/// CFI has been removed, insert new updated CFI information for these
|
||||
/// locations.
|
||||
void insertUpdatedCFI(unsigned CSR, int SPValPush, int SPValPop);
|
||||
|
||||
/// In case the function anchors the CFA reg as SP and we inserted pushes/pops
|
||||
/// insert def_cfa_offsets at appropriate places (and delete old
|
||||
/// def_cfa_offsets)
|
||||
void rebuildCFIForSP();
|
||||
|
||||
/// Rebuild all CFI for affected Callee-Saved Registers.
|
||||
void rebuildCFI();
|
||||
|
||||
/// Create a load-store instruction (depending on the contents of \p FIE).
|
||||
/// If \p CreatePushOrPop is true, create a push/pop instead. Current SP/FP
|
||||
/// values, as determined by StackPointerTracking, should be informed via
|
||||
/// \p SPVal and \p FPVal in order to emit the correct offset form SP/FP.
|
||||
MCInst createStackAccess(int SPVal, int FPVal, const FrameIndexEntry &FIE,
|
||||
bool CreatePushOrPop);
|
||||
|
||||
/// Update the CFI referenced by \p Inst with \p NewOffset, if the CFI has
|
||||
/// an offset.
|
||||
void updateCFIInstOffset(MCInst &Inst, int64_t NewOffset);
|
||||
|
||||
/// Insert any CFI that should be attached to a register spill save/restore.
|
||||
BBIterTy insertCFIsForPushOrPop(BinaryBasicBlock &BB, BBIterTy Pos,
|
||||
unsigned Reg, bool isPush, int Sz,
|
||||
int64_t NewOffset);
|
||||
|
||||
/// Auxiliary function to processInsertionsList, adding a new instruction
|
||||
/// before \p InsertionPoint as requested by \p Item. Return an updated
|
||||
/// InsertionPoint for other instructions that need to be inserted at the same
|
||||
/// original location, since this insertion may have invalidated the previous
|
||||
/// location.
|
||||
BBIterTy processInsertion(BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
|
||||
const WorklistItem &Item, int64_t SPVal,
|
||||
int64_t FPVal);
|
||||
|
||||
/// Auxiliary function to processInsertions(), helping perform all the
|
||||
/// insertion tasks in the todo list associated with a single insertion point.
|
||||
/// Return true if at least one insertion was performed.
|
||||
BBIterTy processInsertionsList(BBIterTy InsertionPoint,
|
||||
BinaryBasicBlock *CurBB,
|
||||
std::vector<WorklistItem> &TodoList,
|
||||
int64_t SPVal, int64_t FPVal);
|
||||
|
||||
/// Apply all insertion todo tasks regarding insertion of new stores/loads or
|
||||
/// push/pops at annotated points. Return false if the entire function had
|
||||
/// no todo tasks annotation and this pass has nothing to do.
|
||||
bool processInsertions();
|
||||
|
||||
/// Apply all deletion todo tasks (or tasks to change a push/pop to a memory
|
||||
/// access no-op)
|
||||
void processDeletions();
|
||||
|
||||
public:
|
||||
ShrinkWrapping(const FrameAnalysis &FA, const BinaryContext &BC,
|
||||
BinaryFunction &BF, DataflowInfoManager &Info)
|
||||
: FA(FA), BC(BC), BF(BF), Info(Info), SLM(FA, BC, BF, Info),
|
||||
CSA(FA, BC, BF, Info) {}
|
||||
|
||||
~ShrinkWrapping() {
|
||||
for (auto &BB : BF) {
|
||||
for (auto &Inst : BB) {
|
||||
BC.MIA->removeAnnotation(Inst, getAnnotationName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void perform();
|
||||
|
||||
static void printStats();
|
||||
};
|
||||
|
||||
} // end namespace bolt
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
153
bolt/Passes/StackAllocationAnalysis.cpp
Normal file
153
bolt/Passes/StackAllocationAnalysis.cpp
Normal file
@@ -0,0 +1,153 @@
|
||||
//===--- Passes/StackAllocationAnalysis.cpp -------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "StackAllocationAnalysis.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#define DEBUG_TYPE "saa"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
void StackAllocationAnalysis::preflight() {
|
||||
DEBUG(dbgs() << "Starting StackAllocationAnalysis on \""
|
||||
<< Func.getPrintName() << "\"\n");
|
||||
|
||||
for (auto &BB : this->Func) {
|
||||
for (auto &Inst : BB) {
|
||||
MCPhysReg From, To;
|
||||
if (!BC.MIA->isPush(Inst) && (!BC.MIA->isRegToRegMove(Inst, From, To) ||
|
||||
To != BC.MIA->getStackPointer() ||
|
||||
From != BC.MIA->getFramePointer()) &&
|
||||
!BC.MII->get(Inst.getOpcode())
|
||||
.hasDefOfPhysReg(Inst, BC.MIA->getStackPointer(), *BC.MRI))
|
||||
continue;
|
||||
this->Expressions.push_back(&Inst);
|
||||
this->ExprToIdx[&Inst] = this->NumInstrs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BitVector
|
||||
StackAllocationAnalysis::getStartingStateAtBB(const BinaryBasicBlock &BB) {
|
||||
return BitVector(this->NumInstrs, false);
|
||||
}
|
||||
|
||||
BitVector
|
||||
StackAllocationAnalysis::getStartingStateAtPoint(const MCInst &Point) {
|
||||
return BitVector(this->NumInstrs, false);
|
||||
}
|
||||
|
||||
void StackAllocationAnalysis::doConfluence(BitVector &StateOut,
|
||||
const BitVector &StateIn) {
|
||||
StateOut |= StateIn;
|
||||
}
|
||||
|
||||
BitVector StackAllocationAnalysis::doKill(const MCInst &Point,
|
||||
const BitVector &StateIn,
|
||||
int DeallocSize) {
|
||||
int64_t SPOffset = SPT.getStateAt(Point)->first;
|
||||
BitVector Next = StateIn;
|
||||
if (SPOffset == SPT.SUPERPOSITION || SPOffset == SPT.EMPTY)
|
||||
return Next;
|
||||
for (auto I = this->expr_begin(Next), E = this->expr_end(); I != E; ++I) {
|
||||
const MCInst *Instr = *I;
|
||||
int64_t InstrOffset = SPT.getStateAt(*Instr)->first;
|
||||
if (InstrOffset == SPT.SUPERPOSITION || InstrOffset == SPT.EMPTY)
|
||||
continue;
|
||||
if (InstrOffset < SPOffset) {
|
||||
Next.reset(I.getBitVectorIndex());
|
||||
DEBUG({
|
||||
dbgs() << "SAA FYI: Killed: ";
|
||||
Instr->dump();
|
||||
dbgs() << "by: ";
|
||||
Point.dump();
|
||||
dbgs() << " (more info: Killed instr offset = " << InstrOffset
|
||||
<< ". SPOffset = " << SPOffset
|
||||
<< "; DeallocSize= " << DeallocSize << "\n";
|
||||
});
|
||||
}
|
||||
}
|
||||
return Next;
|
||||
}
|
||||
|
||||
void StackAllocationAnalysis::doConfluenceWithLP(BitVector &StateOut,
|
||||
const BitVector &StateIn,
|
||||
const MCInst &Invoke) {
|
||||
BitVector NewIn = StateIn;
|
||||
for (const auto &Operand : Invoke) {
|
||||
if (Operand.isGnuArgsSize()) {
|
||||
auto ArgsSize = Operand.getGnuArgsSize();
|
||||
NewIn = doKill(Invoke, NewIn, ArgsSize);
|
||||
}
|
||||
}
|
||||
StateOut |= NewIn;
|
||||
}
|
||||
|
||||
BitVector StackAllocationAnalysis::computeNext(const MCInst &Point,
|
||||
const BitVector &Cur) {
|
||||
const auto &MIA = BC.MIA;
|
||||
BitVector Next = Cur;
|
||||
if (int Sz = MIA->getPopSize(Point)) {
|
||||
Next = doKill(Point, Next, Sz);
|
||||
return Next;
|
||||
}
|
||||
if (MIA->isPush(Point)) {
|
||||
Next.set(this->ExprToIdx[&Point]);
|
||||
return Next;
|
||||
}
|
||||
|
||||
MCPhysReg From, To;
|
||||
int64_t SPOffset, FPOffset;
|
||||
std::tie(SPOffset, FPOffset) = *SPT.getStateBefore(Point);
|
||||
if (MIA->isRegToRegMove(Point, From, To) && To == MIA->getStackPointer() &&
|
||||
From == MIA->getFramePointer()) {
|
||||
if (MIA->isLeave(Point))
|
||||
FPOffset += 8;
|
||||
if (SPOffset < FPOffset) {
|
||||
Next = doKill(Point, Next, FPOffset - SPOffset);
|
||||
return Next;
|
||||
}
|
||||
if (SPOffset > FPOffset) {
|
||||
Next.set(this->ExprToIdx[&Point]);
|
||||
return Next;
|
||||
}
|
||||
}
|
||||
if (BC.MII->get(Point.getOpcode())
|
||||
.hasDefOfPhysReg(Point, MIA->getStackPointer(), *BC.MRI)) {
|
||||
std::pair<MCPhysReg, int64_t> SP;
|
||||
if (SPOffset != SPT.EMPTY && SPOffset != SPT.SUPERPOSITION)
|
||||
SP = std::make_pair(MIA->getStackPointer(), SPOffset);
|
||||
else
|
||||
SP = std::make_pair(0, 0);
|
||||
std::pair<MCPhysReg, int64_t> FP;
|
||||
if (FPOffset != SPT.EMPTY && FPOffset != SPT.SUPERPOSITION)
|
||||
FP = std::make_pair(MIA->getFramePointer(), FPOffset);
|
||||
else
|
||||
FP = std::make_pair(0, 0);
|
||||
int64_t Output;
|
||||
if (!MIA->evaluateSimple(Point, Output, SP, FP))
|
||||
return Next;
|
||||
|
||||
if (SPOffset < Output) {
|
||||
Next = doKill(Point, Next, Output - SPOffset);
|
||||
return Next;
|
||||
}
|
||||
if (SPOffset > Output) {
|
||||
Next.set(this->ExprToIdx[&Point]);
|
||||
return Next;
|
||||
}
|
||||
}
|
||||
return Next;
|
||||
}
|
||||
|
||||
} // end namespace bolt
|
||||
} // end namespace llvm
|
||||
68
bolt/Passes/StackAllocationAnalysis.h
Normal file
68
bolt/Passes/StackAllocationAnalysis.h
Normal file
@@ -0,0 +1,68 @@
|
||||
//===--- Passes/StackAllocationAnalysis.h ---------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_STACKALLOCATIONANALYSIS_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_STACKALLOCATIONANALYSIS_H
|
||||
|
||||
#include "DataflowAnalysis.h"
|
||||
#include "StackPointerTracking.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
/// Perform a dataflow analysis to track the value of SP as an offset relative
|
||||
/// to the CFA.
|
||||
class StackAllocationAnalysis
|
||||
: public InstrsDataflowAnalysis<StackAllocationAnalysis,
|
||||
/*Backward=*/false> {
|
||||
friend class DataflowAnalysis<StackAllocationAnalysis, BitVector>;
|
||||
|
||||
StackPointerTracking &SPT;
|
||||
|
||||
public:
|
||||
StackAllocationAnalysis(const BinaryContext &BC, BinaryFunction &BF,
|
||||
StackPointerTracking &SPT)
|
||||
: InstrsDataflowAnalysis<StackAllocationAnalysis, false>(BC, BF),
|
||||
SPT(SPT) {}
|
||||
virtual ~StackAllocationAnalysis() {}
|
||||
|
||||
void run() {
|
||||
NamedRegionTimer T1("SAA", "Dataflow", true);
|
||||
InstrsDataflowAnalysis<StackAllocationAnalysis, false>::run();
|
||||
}
|
||||
|
||||
protected:
|
||||
void preflight();
|
||||
|
||||
BitVector getStartingStateAtBB(const BinaryBasicBlock &BB);
|
||||
|
||||
BitVector getStartingStateAtPoint(const MCInst &Point);
|
||||
|
||||
void doConfluence(BitVector &StateOut, const BitVector &StateIn);
|
||||
|
||||
BitVector doKill(const MCInst &Point, const BitVector &StateIn,
|
||||
int DeallocSize);
|
||||
|
||||
void doConfluenceWithLP(BitVector &StateOut, const BitVector &StateIn,
|
||||
const MCInst &Invoke);
|
||||
|
||||
BitVector computeNext(const MCInst &Point, const BitVector &Cur);
|
||||
|
||||
StringRef getAnnotationName() const {
|
||||
return StringRef("StackAllocationAnalysis");
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace bolt
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
132
bolt/Passes/StackAvailableExpressions.cpp
Normal file
132
bolt/Passes/StackAvailableExpressions.cpp
Normal file
@@ -0,0 +1,132 @@
|
||||
//===--- Passes/StackAvailableExpressions.cpp -----------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "StackAvailableExpressions.h"
|
||||
#include "FrameAnalysis.h"
|
||||
|
||||
#define DEBUG_TYPE "sae"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
StackAvailableExpressions::StackAvailableExpressions(const FrameAnalysis &FA,
|
||||
const BinaryContext &BC,
|
||||
BinaryFunction &BF)
|
||||
: InstrsDataflowAnalysis(BC, BF), FA(FA) {}
|
||||
|
||||
void StackAvailableExpressions::preflight() {
|
||||
DEBUG(dbgs() << "Starting StackAvailableExpressions on \""
|
||||
<< Func.getPrintName() << "\"\n");
|
||||
|
||||
// Populate our universe of tracked expressions. We are interested in
|
||||
// tracking available stores to frame position at any given point of the
|
||||
// program.
|
||||
for (auto &BB : Func) {
|
||||
for (auto &Inst : BB) {
|
||||
auto FIE = FA.getFIEFor(BC, Inst);
|
||||
if (!FIE)
|
||||
continue;
|
||||
if (FIE->IsStore == true && FIE->IsSimple == true) {
|
||||
Expressions.push_back(&Inst);
|
||||
ExprToIdx[&Inst] = NumInstrs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BitVector
|
||||
StackAvailableExpressions::getStartingStateAtBB(const BinaryBasicBlock &BB) {
|
||||
// Entry points start with empty set
|
||||
// All others start with the full set.
|
||||
if (BB.pred_size() == 0 && BB.throw_size() == 0)
|
||||
return BitVector(NumInstrs, false);
|
||||
return BitVector(NumInstrs, true);
|
||||
}
|
||||
|
||||
BitVector
|
||||
StackAvailableExpressions::getStartingStateAtPoint(const MCInst &Point) {
|
||||
return BitVector(NumInstrs, true);
|
||||
}
|
||||
|
||||
void StackAvailableExpressions::doConfluence(BitVector &StateOut,
|
||||
const BitVector &StateIn) {
|
||||
StateOut &= StateIn;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
bool isLoadRedundant(const FrameIndexEntry &LoadFIE,
|
||||
const FrameIndexEntry &StoreFIE) {
|
||||
if (LoadFIE.IsLoad == false || LoadFIE.IsSimple == false) {
|
||||
return false;
|
||||
}
|
||||
if (LoadFIE.StackOffset == StoreFIE.StackOffset &&
|
||||
LoadFIE.Size == StoreFIE.Size) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool StackAvailableExpressions::doesXKillsY(const MCInst *X, const MCInst *Y) {
|
||||
// if both are stores, and both store to the same stack location, return
|
||||
// true
|
||||
auto FIEX = FA.getFIEFor(BC, *X);
|
||||
auto FIEY = FA.getFIEFor(BC, *Y);
|
||||
if (FIEX && FIEY) {
|
||||
if (isLoadRedundant(*FIEX, *FIEY))
|
||||
return false;
|
||||
if (FIEX->IsStore == true && FIEY->IsStore == true &&
|
||||
FIEX->StackOffset + FIEX->Size > FIEY->StackOffset &&
|
||||
FIEX->StackOffset < FIEY->StackOffset + FIEY->Size)
|
||||
return true;
|
||||
}
|
||||
// getClobberedRegs for X and Y. If they intersect, return true
|
||||
BitVector XClobbers = BitVector(BC.MRI->getNumRegs(), false);
|
||||
BitVector YClobbers = BitVector(BC.MRI->getNumRegs(), false);
|
||||
FA.getInstClobberList(BC, *X, XClobbers);
|
||||
// If Y is a store to stack, its clobber list is its source reg. This is
|
||||
// different than the rest because we want to check if the store source
|
||||
// reaches its corresponding load untouched.
|
||||
if (FIEY && FIEY->IsStore == true && FIEY->IsStoreFromReg) {
|
||||
YClobbers.set(FIEY->RegOrImm);
|
||||
} else {
|
||||
FA.getInstClobberList(BC, *Y, YClobbers);
|
||||
}
|
||||
XClobbers &= YClobbers;
|
||||
return XClobbers.any();
|
||||
}
|
||||
|
||||
BitVector StackAvailableExpressions::computeNext(const MCInst &Point,
|
||||
const BitVector &Cur) {
|
||||
BitVector Next = Cur;
|
||||
// Kill
|
||||
for (auto I = expr_begin(Next), E = expr_end(); I != E; ++I) {
|
||||
assert(*I != nullptr && "Lost pointers");
|
||||
DEBUG(dbgs() << "\t\t\tDoes it kill ");
|
||||
DEBUG((*I)->dump());
|
||||
if (doesXKillsY(&Point, *I)) {
|
||||
DEBUG(dbgs() << "\t\t\t\tKilling ");
|
||||
DEBUG((*I)->dump());
|
||||
Next.reset(I.getBitVectorIndex());
|
||||
}
|
||||
}
|
||||
// Gen
|
||||
if (auto FIE = FA.getFIEFor(BC, Point)) {
|
||||
if (FIE->IsStore == true && FIE->IsSimple == true)
|
||||
Next.set(ExprToIdx[&Point]);
|
||||
}
|
||||
return Next;
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
58
bolt/Passes/StackAvailableExpressions.h
Normal file
58
bolt/Passes/StackAvailableExpressions.h
Normal file
@@ -0,0 +1,58 @@
|
||||
//===--- Passes/StackAvailableExpressions.h -------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_STACKAVAILABLEEXPRESSIONS_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_STACKAVAILABLEEXPRESSIONS_H
|
||||
|
||||
#include "DataflowAnalysis.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class FrameAnalysis;
|
||||
|
||||
class StackAvailableExpressions
|
||||
: public InstrsDataflowAnalysis<StackAvailableExpressions> {
|
||||
friend class DataflowAnalysis<StackAvailableExpressions, BitVector>;
|
||||
|
||||
public:
|
||||
StackAvailableExpressions(const FrameAnalysis &FA,
|
||||
const BinaryContext &BC, BinaryFunction &BF);
|
||||
virtual ~StackAvailableExpressions() {}
|
||||
|
||||
void run() {
|
||||
NamedRegionTimer T1("SAE", "Dataflow", true);
|
||||
InstrsDataflowAnalysis<StackAvailableExpressions>::run();
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Reference to the result of stack frame analysis
|
||||
const FrameAnalysis &FA;
|
||||
|
||||
void preflight();
|
||||
BitVector getStartingStateAtBB(const BinaryBasicBlock &BB);
|
||||
BitVector getStartingStateAtPoint(const MCInst &Point);
|
||||
void doConfluence(BitVector &StateOut, const BitVector &StateIn);
|
||||
/// Define the function computing the kill set -- whether expression Y, a
|
||||
/// tracked expression, will be considered to be dead after executing X.
|
||||
bool doesXKillsY(const MCInst *X, const MCInst *Y);
|
||||
BitVector computeNext(const MCInst &Point, const BitVector &Cur);
|
||||
|
||||
StringRef getAnnotationName() const {
|
||||
return StringRef("StackAvailableExpressions");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
@@ -13,6 +13,7 @@
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_STACKPOINTERTRACKING_H
|
||||
|
||||
#include "DataflowAnalysis.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
@@ -190,6 +191,11 @@ class StackPointerTracking
|
||||
public:
|
||||
StackPointerTracking(const BinaryContext &BC, BinaryFunction &BF);
|
||||
virtual ~StackPointerTracking() {}
|
||||
|
||||
void run() {
|
||||
NamedRegionTimer T1("SPT", "Dataflow", true);
|
||||
StackPointerTrackingBase<StackPointerTracking>::run();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace bolt
|
||||
|
||||
112
bolt/Passes/StackReachingUses.cpp
Normal file
112
bolt/Passes/StackReachingUses.cpp
Normal file
@@ -0,0 +1,112 @@
|
||||
//===--- Passes/StackReachingUses.cpp -------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "StackReachingUses.h"
|
||||
#include "FrameAnalysis.h"
|
||||
|
||||
#define DEBUG_TYPE "sru"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
bool StackReachingUses::isStoreUsed(const FrameIndexEntry &StoreFIE,
|
||||
ExprIterator Candidates,
|
||||
bool IncludeLocalAccesses) const {
|
||||
for (auto I = Candidates; I != expr_end(); ++I) {
|
||||
const MCInst *ReachingInst = *I;
|
||||
if (IncludeLocalAccesses) {
|
||||
if (auto FIEY = FA.getFIEFor(BC, *ReachingInst)) {
|
||||
assert(FIEY->IsLoad == 1);
|
||||
if (StoreFIE.StackOffset + StoreFIE.Size > FIEY->StackOffset &&
|
||||
StoreFIE.StackOffset < FIEY->StackOffset + FIEY->Size) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
auto Args = FA.getArgAccessesFor(BC, *ReachingInst);
|
||||
if (!Args)
|
||||
continue;
|
||||
if (Args->AssumeEverything) {
|
||||
return true;
|
||||
}
|
||||
for (auto FIEY : Args->Set) {
|
||||
if (StoreFIE.StackOffset + StoreFIE.Size > FIEY.StackOffset &&
|
||||
StoreFIE.StackOffset < FIEY.StackOffset + FIEY.Size) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void StackReachingUses::preflight() {
|
||||
DEBUG(dbgs() << "Starting StackReachingUses on \"" << Func.getPrintName()
|
||||
<< "\"\n");
|
||||
|
||||
// Populate our universe of tracked expressions. We are interested in
|
||||
// tracking reaching loads from frame position at any given point of the
|
||||
// program.
|
||||
for (auto &BB : Func) {
|
||||
for (auto &Inst : BB) {
|
||||
if (auto FIE = FA.getFIEFor(BC, Inst)) {
|
||||
if (FIE->IsLoad == true) {
|
||||
Expressions.push_back(&Inst);
|
||||
ExprToIdx[&Inst] = NumInstrs++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
auto AA = FA.getArgAccessesFor(BC, Inst);
|
||||
if (AA && (!AA->Set.empty() || AA->AssumeEverything)) {
|
||||
Expressions.push_back(&Inst);
|
||||
ExprToIdx[&Inst] = NumInstrs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool StackReachingUses::doesXKillsY(const MCInst *X, const MCInst *Y) {
|
||||
// if X is a store to the same stack location and the bytes fetched is a
|
||||
// superset of those bytes affected by the load in Y, return true
|
||||
auto FIEX = FA.getFIEFor(BC, *X);
|
||||
auto FIEY = FA.getFIEFor(BC, *Y);
|
||||
if (FIEX && FIEY) {
|
||||
if (FIEX->IsStore == true && FIEY->IsLoad == true &&
|
||||
FIEX->StackOffset <= FIEY->StackOffset &&
|
||||
FIEX->StackOffset + FIEX->Size >= FIEY->StackOffset + FIEY->Size)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BitVector StackReachingUses::computeNext(const MCInst &Point,
|
||||
const BitVector &Cur) {
|
||||
BitVector Next = Cur;
|
||||
// Kill
|
||||
for (auto I = expr_begin(Next), E = expr_end(); I != E; ++I) {
|
||||
assert(*I != nullptr && "Lost pointers");
|
||||
if (doesXKillsY(&Point, *I)) {
|
||||
DEBUG(dbgs() << "\t\t\tKilling ");
|
||||
DEBUG((*I)->dump());
|
||||
Next.reset(I.getBitVectorIndex());
|
||||
}
|
||||
};
|
||||
// Gen
|
||||
if (auto FIE = FA.getFIEFor(BC, Point)) {
|
||||
if (FIE->IsLoad == true)
|
||||
Next.set(ExprToIdx[&Point]);
|
||||
}
|
||||
auto AA = FA.getArgAccessesFor(BC, Point);
|
||||
if (AA && (!AA->Set.empty() || AA->AssumeEverything))
|
||||
Next.set(ExprToIdx[&Point]);
|
||||
return Next;
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
71
bolt/Passes/StackReachingUses.h
Normal file
71
bolt/Passes/StackReachingUses.h
Normal file
@@ -0,0 +1,71 @@
|
||||
//===--- Passes/StackReachingUses.h ---------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_STACKREACHINGUSES_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PASSES_STACKREACHINGUSES_H
|
||||
|
||||
#include "DataflowAnalysis.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class FrameAnalysis;
|
||||
struct FrameIndexEntry;
|
||||
|
||||
class StackReachingUses
|
||||
: public InstrsDataflowAnalysis<StackReachingUses, /*Backward=*/true> {
|
||||
friend class DataflowAnalysis<StackReachingUses, BitVector, true>;
|
||||
|
||||
public:
|
||||
StackReachingUses(const FrameAnalysis &FA, const BinaryContext &BC,
|
||||
BinaryFunction &BF)
|
||||
: InstrsDataflowAnalysis(BC, BF), FA(FA) {}
|
||||
virtual ~StackReachingUses() {}
|
||||
|
||||
bool isStoreUsed(const FrameIndexEntry &StoreFIE, ExprIterator Candidates,
|
||||
bool IncludeLocalAccesses = true) const;
|
||||
|
||||
void run() {
|
||||
NamedRegionTimer T1("SRU", "Dataflow", true);
|
||||
InstrsDataflowAnalysis<StackReachingUses, true>::run();
|
||||
}
|
||||
|
||||
protected:
|
||||
// Reference to the result of stack frame analysis
|
||||
const FrameAnalysis &FA;
|
||||
|
||||
void preflight();
|
||||
|
||||
BitVector getStartingStateAtBB(const BinaryBasicBlock &BB) {
|
||||
return BitVector(NumInstrs, false);
|
||||
}
|
||||
|
||||
BitVector getStartingStateAtPoint(const MCInst &Point) {
|
||||
return BitVector(NumInstrs, false);
|
||||
}
|
||||
|
||||
void doConfluence(BitVector &StateOut, const BitVector &StateIn) {
|
||||
StateOut |= StateIn;
|
||||
}
|
||||
|
||||
// Define the function computing the kill set -- whether expression Y, a
|
||||
// tracked expression, will be considered to be dead after executing X.
|
||||
bool doesXKillsY(const MCInst *X, const MCInst *Y);
|
||||
BitVector computeNext(const MCInst &Point, const BitVector &Cur);
|
||||
|
||||
StringRef getAnnotationName() const { return StringRef("StackReachingUses"); }
|
||||
};
|
||||
|
||||
} // end namespace bolt
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
@@ -1659,6 +1659,7 @@ void RewriteInstance::readDebugInfo() {
|
||||
void RewriteInstance::disassembleFunctions() {
|
||||
// Disassemble every function and build it's control flow graph.
|
||||
TotalScore = 0;
|
||||
BC->SumExecutionCount = 0;
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
BinaryFunction &Function = BFI.second;
|
||||
|
||||
@@ -1803,6 +1804,7 @@ void RewriteInstance::disassembleFunctions() {
|
||||
}
|
||||
|
||||
TotalScore += Function.getFunctionScore();
|
||||
BC->SumExecutionCount += Function.getKnownExecutionCount();
|
||||
|
||||
} // Iterate over all functions
|
||||
|
||||
@@ -1821,6 +1823,7 @@ void RewriteInstance::disassembleFunctions() {
|
||||
else
|
||||
++NumStaleProfileFunctions;
|
||||
}
|
||||
BC->NumProfiledFuncs = ProfiledFunctions.size();
|
||||
|
||||
const auto NumAllProfiledFunctions =
|
||||
ProfiledFunctions.size() + NumStaleProfileFunctions;
|
||||
|
||||
Reference in New Issue
Block a user