mirror of
https://github.com/intel/llvm.git
synced 2026-01-27 06:06:34 +08:00
Revert "[FuncSpec] Replace LoopInfo with BlockFrequencyInfo"
As reported on https://reviews.llvm.org/D150375#4367861 and following, this change causes PDT invalidation issues. Revert it and dependent commits. This reverts commit0524534d52. This reverts commitced90d1ff6. This reverts commit9f992cc935. This reverts commit1b1232047e.
This commit is contained in:
@@ -48,11 +48,10 @@
|
||||
#ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
|
||||
#define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
|
||||
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/CodeMetrics.h"
|
||||
#include "llvm/Analysis/InlineCost.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
#include "llvm/Transforms/Scalar/SCCP.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
#include "llvm/Transforms/Utils/SCCPSolver.h"
|
||||
@@ -70,9 +69,6 @@ using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
|
||||
// Just a shorter abbreviation to improve indentation.
|
||||
using Cost = InstructionCost;
|
||||
|
||||
// Map of known constants found during the specialization bonus estimation.
|
||||
using ConstMap = DenseMap<Value *, Constant *>;
|
||||
|
||||
// Specialization signature, used to uniquely designate a specialization within
|
||||
// a function.
|
||||
struct SpecSig {
|
||||
@@ -119,39 +115,6 @@ struct Spec {
|
||||
: F(F), Sig(S), Score(Score) {}
|
||||
};
|
||||
|
||||
class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
|
||||
const DataLayout &DL;
|
||||
BlockFrequencyInfo &BFI;
|
||||
TargetTransformInfo &TTI;
|
||||
SCCPSolver &Solver;
|
||||
|
||||
ConstMap KnownConstants;
|
||||
|
||||
ConstMap::iterator LastVisited;
|
||||
|
||||
public:
|
||||
InstCostVisitor(const DataLayout &DL, BlockFrequencyInfo &BFI,
|
||||
TargetTransformInfo &TTI, SCCPSolver &Solver)
|
||||
: DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {}
|
||||
|
||||
Cost getUserBonus(Instruction *User, Value *Use, Constant *C);
|
||||
|
||||
private:
|
||||
friend class InstVisitor<InstCostVisitor, Constant *>;
|
||||
|
||||
Cost estimateSwitchInst(SwitchInst &I);
|
||||
Cost estimateBranchInst(BranchInst &I);
|
||||
|
||||
Constant *visitInstruction(Instruction &I) { return nullptr; }
|
||||
Constant *visitLoadInst(LoadInst &I);
|
||||
Constant *visitGetElementPtrInst(GetElementPtrInst &I);
|
||||
Constant *visitSelectInst(SelectInst &I);
|
||||
Constant *visitCastInst(CastInst &I);
|
||||
Constant *visitCmpInst(CmpInst &I);
|
||||
Constant *visitUnaryOperator(UnaryOperator &I);
|
||||
Constant *visitBinaryOperator(BinaryOperator &I);
|
||||
};
|
||||
|
||||
class FunctionSpecializer {
|
||||
|
||||
/// The IPSCCP Solver.
|
||||
@@ -163,7 +126,6 @@ class FunctionSpecializer {
|
||||
FunctionAnalysisManager *FAM;
|
||||
|
||||
/// Analyses used to help determine if a function should be specialized.
|
||||
std::function<BlockFrequencyInfo &(Function &)> GetBFI;
|
||||
std::function<const TargetLibraryInfo &(Function &)> GetTLI;
|
||||
std::function<TargetTransformInfo &(Function &)> GetTTI;
|
||||
std::function<AssumptionCache &(Function &)> GetAC;
|
||||
@@ -175,12 +137,11 @@ class FunctionSpecializer {
|
||||
public:
|
||||
FunctionSpecializer(
|
||||
SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM,
|
||||
std::function<BlockFrequencyInfo &(Function &)> GetBFI,
|
||||
std::function<const TargetLibraryInfo &(Function &)> GetTLI,
|
||||
std::function<TargetTransformInfo &(Function &)> GetTTI,
|
||||
std::function<AssumptionCache &(Function &)> GetAC)
|
||||
: Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI),
|
||||
GetTTI(GetTTI), GetAC(GetAC) {}
|
||||
: Solver(Solver), M(M), FAM(FAM), GetTLI(GetTLI), GetTTI(GetTTI),
|
||||
GetAC(GetAC) {}
|
||||
|
||||
~FunctionSpecializer();
|
||||
|
||||
@@ -188,18 +149,6 @@ public:
|
||||
|
||||
bool run();
|
||||
|
||||
static unsigned getBlockFreqMultiplier();
|
||||
|
||||
InstCostVisitor getInstCostVisitorFor(Function *F) {
|
||||
auto &BFI = (GetBFI)(*F);
|
||||
auto &TTI = (GetTTI)(*F);
|
||||
return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver);
|
||||
}
|
||||
|
||||
/// Compute a bonus for replacing argument \p A with constant \p C.
|
||||
Cost getSpecializationBonus(Argument *A, Constant *C,
|
||||
InstCostVisitor &Visitor);
|
||||
|
||||
private:
|
||||
Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);
|
||||
|
||||
@@ -243,6 +192,9 @@ private:
|
||||
/// Compute and return the cost of specializing function \p F.
|
||||
Cost getSpecializationCost(Function *F);
|
||||
|
||||
/// Compute a bonus for replacing argument \p A with constant \p C.
|
||||
Cost getSpecializationBonus(Argument *A, Constant *C, const LoopInfo &LI);
|
||||
|
||||
/// Determine if it is possible to specialise the function for constant values
|
||||
/// of the formal parameter \p A.
|
||||
bool isArgumentInteresting(Argument *A);
|
||||
|
||||
@@ -44,6 +44,7 @@ struct AnalysisResultsForFn {
|
||||
std::unique_ptr<PredicateInfo> PredInfo;
|
||||
DominatorTree *DT;
|
||||
PostDominatorTree *PDT;
|
||||
LoopInfo *LI;
|
||||
};
|
||||
|
||||
/// Helper struct shared between Function Specialization and SCCP Solver.
|
||||
@@ -90,6 +91,8 @@ public:
|
||||
|
||||
const PredicateBase *getPredicateInfoFor(Instruction *I);
|
||||
|
||||
const LoopInfo &getLoopInfo(Function &F);
|
||||
|
||||
DomTreeUpdater getDTU(Function &F);
|
||||
|
||||
/// trackValueOfGlobalVariable - Clients can use this method to
|
||||
|
||||
@@ -48,14 +48,12 @@
|
||||
#include "llvm/Transforms/IPO/FunctionSpecialization.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/CodeMetrics.h"
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Analysis/InlineCost.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/ValueLattice.h"
|
||||
#include "llvm/Analysis/ValueLatticeUtils.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/ConstantFold.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/Transforms/Scalar/SCCP.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
@@ -74,22 +72,6 @@ static cl::opt<bool> ForceSpecialization(
|
||||
"Force function specialization for every call site with a constant "
|
||||
"argument"));
|
||||
|
||||
// Set to 2^3 to model three levels of if-else nest.
|
||||
static cl::opt<unsigned> BlockFreqMultiplier(
|
||||
"funcspec-block-freq-multiplier", cl::init(8), cl::Hidden, cl::desc(
|
||||
"Multiplier to scale block frequency of user instructions during "
|
||||
"specialization bonus estimation"));
|
||||
|
||||
static cl::opt<unsigned> MinEntryFreq(
|
||||
"funcspec-min-entry-freq", cl::init(450), cl::Hidden, cl::desc(
|
||||
"Do not specialize functions with entry block frequency lower than "
|
||||
"this value"));
|
||||
|
||||
static cl::opt<unsigned> MinScore(
|
||||
"funcspec-min-score", cl::init(2), cl::Hidden, cl::desc(
|
||||
"Do not specialize functions with score lower than this value "
|
||||
"(the ratio of specialization bonus over specialization cost)"));
|
||||
|
||||
static cl::opt<unsigned> MaxClones(
|
||||
"funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
|
||||
"The maximum number of clones allowed for a single function "
|
||||
@@ -100,225 +82,23 @@ static cl::opt<unsigned> MinFunctionSize(
|
||||
"Don't specialize functions that have less than this number of "
|
||||
"instructions"));
|
||||
|
||||
static cl::opt<unsigned> AvgLoopIters(
|
||||
"funcspec-avg-loop-iters", cl::init(10), cl::Hidden, cl::desc(
|
||||
"Average loop iteration count"));
|
||||
|
||||
static cl::opt<bool> SpecializeOnAddress(
|
||||
"funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
|
||||
"Enable function specialization on the address of global values"));
|
||||
|
||||
// Disabled by default as it can significantly increase compilation times.
|
||||
//
|
||||
// https://llvm-compile-time-tracker.com
|
||||
// https://github.com/nikic/llvm-compile-time-tracker
|
||||
static cl::opt<bool> SpecializeLiteralConstant(
|
||||
"funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc(
|
||||
"funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
|
||||
"Enable specialization of functions that take a literal constant as an "
|
||||
"argument"));
|
||||
|
||||
unsigned FunctionSpecializer::getBlockFreqMultiplier() {
|
||||
return BlockFreqMultiplier;
|
||||
}
|
||||
|
||||
// Estimates the instruction cost of all the basic blocks in \p WorkList.
|
||||
// The successors of such blocks are added to the list as long as they are
|
||||
// executable and they have a unique predecessor. \p WorkList represents
|
||||
// the basic blocks of a specialization which become dead once we replace
|
||||
// instructions that are known to be constants. The aim here is to estimate
|
||||
// the combination of size and latency savings in comparison to the non
|
||||
// specialized version of the function.
|
||||
static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
|
||||
ConstMap &KnownConstants, SCCPSolver &Solver,
|
||||
BlockFrequencyInfo &BFI,
|
||||
TargetTransformInfo &TTI) {
|
||||
Cost Bonus = 0;
|
||||
|
||||
// Accumulate the instruction cost of each basic block weighted by frequency.
|
||||
while (!WorkList.empty()) {
|
||||
BasicBlock *BB = WorkList.pop_back_val();
|
||||
|
||||
uint64_t Weight = BlockFreqMultiplier *
|
||||
BFI.getBlockFreq(BB).getFrequency() /
|
||||
BFI.getEntryFreq();
|
||||
if (!Weight)
|
||||
continue;
|
||||
|
||||
for (Instruction &I : *BB) {
|
||||
// Disregard SSA copies.
|
||||
if (auto *II = dyn_cast<IntrinsicInst>(&I))
|
||||
if (II->getIntrinsicID() == Intrinsic::ssa_copy)
|
||||
continue;
|
||||
// If it's a known constant we have already accounted for it.
|
||||
if (KnownConstants.contains(&I))
|
||||
continue;
|
||||
|
||||
Bonus += Weight *
|
||||
TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus
|
||||
<< " after user " << I << "\n");
|
||||
}
|
||||
|
||||
// Keep adding dead successors to the list as long as they are
|
||||
// executable and they have a unique predecessor.
|
||||
for (BasicBlock *SuccBB : successors(BB))
|
||||
if (Solver.isBlockExecutable(SuccBB) &&
|
||||
SuccBB->getUniquePredecessor() == BB)
|
||||
WorkList.push_back(SuccBB);
|
||||
}
|
||||
return Bonus;
|
||||
}
|
||||
|
||||
static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
|
||||
if (auto It = KnownConstants.find(V); It != KnownConstants.end())
|
||||
return It->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
|
||||
// Cache the iterator before visiting.
|
||||
LastVisited = KnownConstants.insert({Use, C}).first;
|
||||
|
||||
if (auto *I = dyn_cast<SwitchInst>(User))
|
||||
return estimateSwitchInst(*I);
|
||||
|
||||
if (auto *I = dyn_cast<BranchInst>(User))
|
||||
return estimateBranchInst(*I);
|
||||
|
||||
C = visit(*User);
|
||||
if (!C)
|
||||
return 0;
|
||||
|
||||
KnownConstants.insert({User, C});
|
||||
|
||||
uint64_t Weight = BlockFreqMultiplier *
|
||||
BFI.getBlockFreq(User->getParent()).getFrequency() /
|
||||
BFI.getEntryFreq();
|
||||
if (!Weight)
|
||||
return 0;
|
||||
|
||||
Cost Bonus = Weight *
|
||||
TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus
|
||||
<< " for user " << *User << "\n");
|
||||
|
||||
for (auto *U : User->users())
|
||||
if (auto *UI = dyn_cast<Instruction>(U))
|
||||
if (Solver.isBlockExecutable(UI->getParent()))
|
||||
Bonus += getUserBonus(UI, User, C);
|
||||
|
||||
return Bonus;
|
||||
}
|
||||
|
||||
Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
|
||||
if (I.getCondition() != LastVisited->first)
|
||||
return 0;
|
||||
|
||||
auto *C = cast<ConstantInt>(LastVisited->second);
|
||||
BasicBlock *Succ = I.findCaseValue(C)->getCaseSuccessor();
|
||||
// Initialize the worklist with the dead basic blocks. These are the
|
||||
// destination labels which are different from the one corresponding
|
||||
// to \p C. They should be executable and have a unique predecessor.
|
||||
SmallVector<BasicBlock *> WorkList;
|
||||
for (const auto &Case : I.cases()) {
|
||||
BasicBlock *BB = Case.getCaseSuccessor();
|
||||
if (BB == Succ || !Solver.isBlockExecutable(BB) ||
|
||||
BB->getUniquePredecessor() != I.getParent())
|
||||
continue;
|
||||
WorkList.push_back(BB);
|
||||
}
|
||||
|
||||
return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI);
|
||||
}
|
||||
|
||||
Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
|
||||
if (I.getCondition() != LastVisited->first)
|
||||
return 0;
|
||||
|
||||
BasicBlock *Succ = I.getSuccessor(LastVisited->second->isOneValue());
|
||||
// Initialize the worklist with the dead successor as long as
|
||||
// it is executable and has a unique predecessor.
|
||||
SmallVector<BasicBlock *> WorkList;
|
||||
if (Solver.isBlockExecutable(Succ) &&
|
||||
Succ->getUniquePredecessor() == I.getParent())
|
||||
WorkList.push_back(Succ);
|
||||
|
||||
return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI);
|
||||
}
|
||||
|
||||
Constant *InstCostVisitor::visitLoadInst(LoadInst &I) {
|
||||
if (isa<ConstantPointerNull>(LastVisited->second))
|
||||
return nullptr;
|
||||
return ConstantFoldLoadFromConstPtr(LastVisited->second, I.getType(), DL);
|
||||
}
|
||||
|
||||
Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
|
||||
SmallVector<Value *, 8> Operands;
|
||||
Operands.reserve(I.getNumOperands());
|
||||
|
||||
for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) {
|
||||
Value *V = I.getOperand(Idx);
|
||||
auto *C = dyn_cast<Constant>(V);
|
||||
if (!C)
|
||||
C = findConstantFor(V, KnownConstants);
|
||||
if (!C)
|
||||
return nullptr;
|
||||
Operands.push_back(C);
|
||||
}
|
||||
|
||||
auto *Ptr = cast<Constant>(Operands[0]);
|
||||
auto Ops = ArrayRef(Operands.begin() + 1, Operands.end());
|
||||
return ConstantFoldGetElementPtr(I.getSourceElementType(), Ptr,
|
||||
I.isInBounds(), std::nullopt, Ops);
|
||||
}
|
||||
|
||||
Constant *InstCostVisitor::visitSelectInst(SelectInst &I) {
|
||||
if (I.getCondition() != LastVisited->first)
|
||||
return nullptr;
|
||||
|
||||
Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue()
|
||||
: I.getTrueValue();
|
||||
auto *C = dyn_cast<Constant>(V);
|
||||
if (!C)
|
||||
C = findConstantFor(V, KnownConstants);
|
||||
return C;
|
||||
}
|
||||
|
||||
Constant *InstCostVisitor::visitCastInst(CastInst &I) {
|
||||
return ConstantFoldCastOperand(I.getOpcode(), LastVisited->second,
|
||||
I.getType(), DL);
|
||||
}
|
||||
|
||||
Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
|
||||
bool Swap = I.getOperand(1) == LastVisited->first;
|
||||
Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
|
||||
auto *Other = dyn_cast<Constant>(V);
|
||||
if (!Other)
|
||||
Other = findConstantFor(V, KnownConstants);
|
||||
|
||||
if (!Other)
|
||||
return nullptr;
|
||||
|
||||
Constant *Const = LastVisited->second;
|
||||
return Swap ?
|
||||
ConstantFoldCompareInstOperands(I.getPredicate(), Other, Const, DL)
|
||||
: ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL);
|
||||
}
|
||||
|
||||
Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) {
|
||||
return ConstantFoldUnaryOpOperand(I.getOpcode(), LastVisited->second, DL);
|
||||
}
|
||||
|
||||
Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) {
|
||||
bool Swap = I.getOperand(1) == LastVisited->first;
|
||||
Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
|
||||
auto *Other = dyn_cast<Constant>(V);
|
||||
if (!Other)
|
||||
Other = findConstantFor(V, KnownConstants);
|
||||
|
||||
if (!Other)
|
||||
return nullptr;
|
||||
|
||||
Constant *Const = LastVisited->second;
|
||||
return dyn_cast_or_null<Constant>(Swap ?
|
||||
simplifyBinOp(I.getOpcode(), Other, Const, SimplifyQuery(DL))
|
||||
: simplifyBinOp(I.getOpcode(), Const, Other, SimplifyQuery(DL)));
|
||||
}
|
||||
|
||||
Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca,
|
||||
CallInst *Call) {
|
||||
Value *StoreValue = nullptr;
|
||||
@@ -637,6 +417,10 @@ CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) {
|
||||
CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
|
||||
for (BasicBlock &BB : *F)
|
||||
Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function "
|
||||
<< F->getName() << " is " << Metrics.NumInsts
|
||||
<< " instructions\n");
|
||||
}
|
||||
return Metrics;
|
||||
}
|
||||
@@ -667,7 +451,6 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
|
||||
if (Args.empty())
|
||||
return false;
|
||||
|
||||
bool HasCheckedEntryFreq = false;
|
||||
for (User *U : F->users()) {
|
||||
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
|
||||
continue;
|
||||
@@ -703,21 +486,6 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
|
||||
if (S.Args.empty())
|
||||
continue;
|
||||
|
||||
// Check the function entry frequency only once. We sink this code here to
|
||||
// postpone running the Block Frequency Analysis until we know for sure
|
||||
// there are Specialization candidates, otherwise we are adding unnecessary
|
||||
// overhead.
|
||||
if (!HasCheckedEntryFreq) {
|
||||
// Reject cold functions (for some definition of 'cold').
|
||||
uint64_t EntryFreq = (GetBFI)(*F).getEntryFreq();
|
||||
if (!ForceSpecialization && EntryFreq < MinEntryFreq)
|
||||
return false;
|
||||
|
||||
HasCheckedEntryFreq = true;
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Entry block frequency for "
|
||||
<< F->getName() << " = " << EntryFreq << "\n");
|
||||
}
|
||||
|
||||
// Check if we have encountered the same specialisation already.
|
||||
if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
|
||||
// Existing specialisation. Add the call to the list to rewrite, unless
|
||||
@@ -732,14 +500,13 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
|
||||
AllSpecs[Index].CallSites.push_back(&CS);
|
||||
} else {
|
||||
// Calculate the specialisation gain.
|
||||
Cost Score = 0;
|
||||
InstCostVisitor Visitor = getInstCostVisitorFor(F);
|
||||
Cost Score = 0 - SpecCost;
|
||||
for (ArgInfo &A : S.Args)
|
||||
Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
|
||||
Score /= SpecCost;
|
||||
Score +=
|
||||
getSpecializationBonus(A.Formal, A.Actual, Solver.getLoopInfo(*F));
|
||||
|
||||
// Discard unprofitable specialisations.
|
||||
if (!ForceSpecialization && Score < MinScore)
|
||||
if (!ForceSpecialization && Score <= 0)
|
||||
continue;
|
||||
|
||||
// Create a new specialisation entry.
|
||||
@@ -823,23 +590,48 @@ Cost FunctionSpecializer::getSpecializationCost(Function *F) {
|
||||
|
||||
// Otherwise, set the specialization cost to be the cost of all the
|
||||
// instructions in the function.
|
||||
return Metrics.NumInsts;
|
||||
return Metrics.NumInsts * InlineConstants::getInstrCost();
|
||||
}
|
||||
|
||||
static Cost getUserBonus(User *U, TargetTransformInfo &TTI,
|
||||
const LoopInfo &LI) {
|
||||
auto *I = dyn_cast_or_null<Instruction>(U);
|
||||
// If not an instruction we do not know how to evaluate.
|
||||
// Keep minimum possible cost for now so that it doesnt affect
|
||||
// specialization.
|
||||
if (!I)
|
||||
return std::numeric_limits<unsigned>::min();
|
||||
|
||||
Cost Bonus =
|
||||
TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency);
|
||||
|
||||
// Increase the cost if it is inside the loop.
|
||||
unsigned LoopDepth = LI.getLoopDepth(I->getParent());
|
||||
Bonus *= std::pow((double)AvgLoopIters, LoopDepth);
|
||||
|
||||
// Traverse recursively if there are more uses.
|
||||
// TODO: Any other instructions to be added here?
|
||||
if (I->mayReadFromMemory() || I->isCast())
|
||||
for (auto *User : I->users())
|
||||
Bonus += getUserBonus(User, TTI, LI);
|
||||
|
||||
return Bonus;
|
||||
}
|
||||
|
||||
/// Compute a bonus for replacing argument \p A with constant \p C.
|
||||
Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
|
||||
InstCostVisitor &Visitor) {
|
||||
const LoopInfo &LI) {
|
||||
Function *F = A->getParent();
|
||||
auto &TTI = (GetTTI)(*F);
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
|
||||
<< C->getNameOrAsOperand() << "\n");
|
||||
|
||||
Cost TotalCost = 0;
|
||||
for (auto *U : A->users())
|
||||
if (auto *UI = dyn_cast<Instruction>(U))
|
||||
if (Solver.isBlockExecutable(UI->getParent()))
|
||||
TotalCost += Visitor.getUserBonus(UI, A, C);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated user bonus "
|
||||
<< TotalCost << " for argument " << *A << "\n");
|
||||
for (auto *U : A->users()) {
|
||||
TotalCost += getUserBonus(U, TTI, LI);
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
|
||||
TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n");
|
||||
}
|
||||
|
||||
// The below heuristic is only concerned with exposing inlining
|
||||
// opportunities via indirect call promotion. If the argument is not a
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#include "llvm/Transforms/IPO/SCCP.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/PostDominators.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
@@ -107,15 +107,13 @@ static void findReturnsToZap(Function &F,
|
||||
|
||||
static bool runIPSCCP(
|
||||
Module &M, const DataLayout &DL, FunctionAnalysisManager *FAM,
|
||||
std::function<BlockFrequencyInfo &(Function &)> GetBFI,
|
||||
std::function<const TargetLibraryInfo &(Function &)> GetTLI,
|
||||
std::function<TargetTransformInfo &(Function &)> GetTTI,
|
||||
std::function<AssumptionCache &(Function &)> GetAC,
|
||||
function_ref<AnalysisResultsForFn(Function &)> getAnalysis,
|
||||
bool IsFuncSpecEnabled) {
|
||||
SCCPSolver Solver(DL, GetTLI, M.getContext());
|
||||
FunctionSpecializer Specializer(Solver, M, FAM, GetBFI, GetTLI, GetTTI,
|
||||
GetAC);
|
||||
FunctionSpecializer Specializer(Solver, M, FAM, GetTLI, GetTTI, GetAC);
|
||||
|
||||
// Loop over all functions, marking arguments to those with their addresses
|
||||
// taken or that are external as overdefined.
|
||||
@@ -383,23 +381,21 @@ PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) {
|
||||
auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
|
||||
return FAM.getResult<TargetLibraryAnalysis>(F);
|
||||
};
|
||||
auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
|
||||
return FAM.getResult<BlockFrequencyAnalysis>(F);
|
||||
};
|
||||
auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
|
||||
return FAM.getResult<TargetIRAnalysis>(F);
|
||||
};
|
||||
auto GetAC = [&FAM](Function &F) -> AssumptionCache & {
|
||||
return FAM.getResult<AssumptionAnalysis>(F);
|
||||
};
|
||||
auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn {
|
||||
auto getAnalysis = [&FAM, this](Function &F) -> AnalysisResultsForFn {
|
||||
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
|
||||
return {
|
||||
std::make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)),
|
||||
&DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F) };
|
||||
&DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F),
|
||||
isFuncSpecEnabled() ? &FAM.getResult<LoopAnalysis>(F) : nullptr };
|
||||
};
|
||||
|
||||
if (!runIPSCCP(M, DL, &FAM, GetBFI, GetTLI, GetTTI, GetAC, getAnalysis,
|
||||
if (!runIPSCCP(M, DL, &FAM, GetTLI, GetTTI, GetAC, getAnalysis,
|
||||
isFuncSpecEnabled()))
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
|
||||
@@ -664,6 +664,13 @@ public:
|
||||
return A->second.PredInfo->getPredicateInfoFor(I);
|
||||
}
|
||||
|
||||
const LoopInfo &getLoopInfo(Function &F) {
|
||||
auto A = AnalysisResults.find(&F);
|
||||
assert(A != AnalysisResults.end() && A->second.LI &&
|
||||
"Need LoopInfo analysis results for function.");
|
||||
return *A->second.LI;
|
||||
}
|
||||
|
||||
DomTreeUpdater getDTU(Function &F) {
|
||||
auto A = AnalysisResults.find(&F);
|
||||
assert(A != AnalysisResults.end() && "Need analysis results for function.");
|
||||
@@ -1955,6 +1962,10 @@ const PredicateBase *SCCPSolver::getPredicateInfoFor(Instruction *I) {
|
||||
return Visitor->getPredicateInfoFor(I);
|
||||
}
|
||||
|
||||
const LoopInfo &SCCPSolver::getLoopInfo(Function &F) {
|
||||
return Visitor->getLoopInfo(F);
|
||||
}
|
||||
|
||||
DomTreeUpdater SCCPSolver::getDTU(Function &F) { return Visitor->getDTU(F); }
|
||||
|
||||
void SCCPSolver::trackValueOfGlobalVariable(GlobalVariable *GV) {
|
||||
|
||||
@@ -9,83 +9,83 @@
|
||||
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='default<O1>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O1,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O1,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='default<O2>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='default<Os>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-Os,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-NO-FUNC-SPEC,CHECK-Os,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='default<Oz>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-Oz,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-NO-FUNC-SPEC,CHECK-Oz,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='lto-pre-link<O2>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-NO-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-peephole='no-op-function' \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PEEPHOLE,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PEEPHOLE,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-late-loop-optimizations='no-op-loop' \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-LATE,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-LATE,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-loop-optimizer-end='no-op-loop' \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-END,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-END,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-scalar-optimizer-late='no-op-function' \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-SCALAR-LATE,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-SCALAR-LATE,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-cgscc-optimizer-late='no-op-cgscc' \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-CGSCC-LATE,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-CGSCC-LATE,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-vectorizer-start='no-op-function' \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-VECTORIZER-START,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-VECTORIZER-START,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-pipeline-start='no-op-module' \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-pipeline-early-simplification='no-op-module' \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-pipeline-start='no-op-module' \
|
||||
; RUN: -passes='lto-pre-link<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-NO-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-optimizer-early='no-op-module' \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-EARLY,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-EARLY,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes-ep-optimizer-last='no-op-module' \
|
||||
; RUN: -passes='default<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-LAST,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-LAST,CHECK-O23SZ
|
||||
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='default<O3>' -enable-matrix -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MATRIX
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MATRIX
|
||||
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='default<O3>' -enable-merge-functions -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MERGE-FUNCS
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MERGE-FUNCS
|
||||
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='default<O3>' -ir-outliner -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-IR-OUTLINER
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-IR-OUTLINER
|
||||
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='default<O3>' -hot-cold-split -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-HOT-COLD-SPLIT
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-HOT-COLD-SPLIT
|
||||
|
||||
; Suppress FileCheck --allow-unused-prefixes=false diagnostics.
|
||||
; CHECK-Oz: {{^}}
|
||||
@@ -109,6 +109,7 @@
|
||||
; CHECK-O-NEXT: Running pass: OpenMPOptPass
|
||||
; CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION-NEXT: Running pass: NoOpModulePass
|
||||
; CHECK-O-NEXT: Running pass: IPSCCPPass
|
||||
; CHECK-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis
|
||||
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
|
||||
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
||||
; CHECK-O-NEXT: Running pass: PromotePass
|
||||
@@ -163,7 +164,7 @@
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Running pass: ReassociatePass
|
||||
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
||||
; CHECK-O-NEXT: Running analysis: LoopAnalysis
|
||||
; CHECK-NO-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis
|
||||
; CHECK-O-NEXT: Running pass: LCSSAPass
|
||||
; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
||||
|
||||
@@ -9,23 +9,23 @@
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-EP
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='lto<O2>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='lto<O2>' -S %s -passes-ep-full-link-time-optimization-early=no-op-module \
|
||||
; RUN: -passes-ep-full-link-time-optimization-last=no-op-module 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-EP
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23,CHECK-O23SZ,CHECK-EP
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='lto<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='lto<Os>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OS,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OS,CHECK-OSZ,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='lto<Oz>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-O23SZ
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='lto<O3>' -S %s -passes-ep-peephole='no-op-function' 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-EP-Peephole
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23,CHECK-O23SZ,CHECK-EP-Peephole
|
||||
|
||||
; CHECK-EP: Running pass: NoOpModulePass
|
||||
; CHECK-O: Running pass: CrossDSOCFIPass
|
||||
@@ -43,6 +43,7 @@
|
||||
; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
|
||||
; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
|
||||
; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo
|
||||
; CHECK-O23-NEXT: Running analysis: LoopAnalysis on foo
|
||||
; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass
|
||||
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
|
||||
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
|
||||
@@ -93,7 +94,7 @@
|
||||
; CHECK-O23SZ-NEXT: Invalidating analysis: AAManager on foo
|
||||
; CHECK-O23SZ-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
|
||||
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
|
||||
; CHECK-O23SZ-NEXT: Running analysis: LoopAnalysis on foo
|
||||
; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo
|
||||
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo
|
||||
; CHECK-O23SZ-NEXT: Running analysis: MemorySSAAnalysis on foo
|
||||
; CHECK-O23SZ-NEXT: Running analysis: AAManager on foo
|
||||
|
||||
@@ -10,28 +10,28 @@
|
||||
; Postlink pipelines:
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='thinlto<O1>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-POSTLINK-O,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O1,CHECK-POSTLINK-O,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \
|
||||
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-optimizer-early='no-op-module' \
|
||||
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-EARLY
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-EARLY
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-optimizer-last='no-op-module' \
|
||||
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-LAST
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-LAST
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='thinlto<Os>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-Os
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-Os
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='thinlto<Oz>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \
|
||||
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2
|
||||
|
||||
; Suppress FileCheck --allow-unused-prefixes=false diagnostics.
|
||||
; CHECK-NOEXT: {{^}}
|
||||
@@ -49,6 +49,7 @@
|
||||
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
|
||||
; CHECK-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis
|
||||
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
|
||||
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
||||
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
|
||||
@@ -100,7 +101,7 @@
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Running pass: ReassociatePass
|
||||
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
||||
; CHECK-O-NEXT: Running analysis: LoopAnalysis
|
||||
; CHECK-NO-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis
|
||||
; CHECK-O-NEXT: Running pass: LCSSAPass
|
||||
; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
||||
|
||||
@@ -3,22 +3,22 @@
|
||||
; Postlink pipelines:
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='thinlto<O1>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O1,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \
|
||||
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O3,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='thinlto<Os>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-Os,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -passes='thinlto<Oz>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \
|
||||
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
|
||||
; Suppress FileCheck --allow-unused-prefixes=false diagnostics.
|
||||
; CHECK-NOEXT: {{^}}
|
||||
@@ -34,6 +34,7 @@
|
||||
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
|
||||
; CHECK-O123-NEXT: Running analysis: LoopAnalysis on foo
|
||||
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
|
||||
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
||||
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
|
||||
@@ -47,7 +48,7 @@
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo
|
||||
; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo
|
||||
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
|
||||
; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo
|
||||
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
|
||||
|
||||
@@ -3,27 +3,27 @@
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
||||
; RUN: -passes='thinlto<O1>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O1,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
||||
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \
|
||||
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
||||
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O3,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
||||
; RUN: -passes='thinlto<Os>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-Os,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
||||
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
||||
; RUN: -passes='thinlto<Oz>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \
|
||||
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
||||
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
||||
|
||||
; Suppress FileCheck --allow-unused-prefixes=false diagnostics.
|
||||
; CHECK-NOEXT: {{^}}
|
||||
@@ -43,6 +43,7 @@
|
||||
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
|
||||
; CHECK-O123-NEXT: Running analysis: LoopAnalysis on foo
|
||||
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
|
||||
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
||||
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
|
||||
@@ -55,7 +56,7 @@
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo
|
||||
; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo
|
||||
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
|
||||
; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo
|
||||
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo
|
||||
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -S --passes="default<O3>" -force-specialization < %s | FileCheck %s
|
||||
; RUN: opt -S --passes="default<O3>" < %s | FileCheck %s
|
||||
|
||||
define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr {
|
||||
entry:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-avg-loop-iters=3 -funcspec-min-function-size=10 -S < %s | FileCheck %s
|
||||
|
||||
; CHECK-NOT: foo.{{[0-9]+}}
|
||||
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
|
||||
; Test function specialization wouldn't crash due to constant expression.
|
||||
; Note that this test case shows that function specialization pass would
|
||||
; transform the function even if no specialization happened.
|
||||
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
|
||||
%struct = type { i8, i16, i32, i64, i64}
|
||||
@Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4}
|
||||
|
||||
@@ -24,6 +26,19 @@ entry:
|
||||
}
|
||||
|
||||
define internal i64 @zoo(i1 %flag) {
|
||||
; CHECK-LABEL: @zoo(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
|
||||
; CHECK: plus:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
|
||||
; CHECK-NEXT: br label [[MERGE:%.*]]
|
||||
; CHECK: minus:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 4))
|
||||
; CHECK-NEXT: br label [[MERGE]]
|
||||
; CHECK: merge:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3) to i64), [[PLUS]] ], [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), [[MINUS]] ]
|
||||
; CHECK-NEXT: ret i64 [[TMP2]]
|
||||
;
|
||||
entry:
|
||||
br i1 %flag, label %plus, label %minus
|
||||
|
||||
@@ -45,9 +60,10 @@ merge:
|
||||
|
||||
define i64 @main() {
|
||||
; CHECK-LABEL: @main(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo.4(i1 false)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo.3(i1 true)
|
||||
; CHECK-NEXT: ret i64 add (i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 3) to i64))
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo(i1 false)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo(i1 true)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%1 = call i64 @zoo(i1 0)
|
||||
%2 = call i64 @zoo(i1 1)
|
||||
@@ -55,29 +71,3 @@ define i64 @main() {
|
||||
ret i64 %3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @func2.1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret i64 undef
|
||||
|
||||
; CHECK-LABEL: @func2.2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret i64 undef
|
||||
|
||||
; CHECK-LABEL: @zoo.3(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[PLUS:%.*]]
|
||||
; CHECK: plus:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
|
||||
; CHECK-NEXT: br label [[MERGE:%.*]]
|
||||
; CHECK: merge:
|
||||
; CHECK-NEXT: ret i64 undef
|
||||
|
||||
; CHECK-LABEL: @zoo.4(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[MINUS:%.*]]
|
||||
; CHECK: minus:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4))
|
||||
; CHECK-NEXT: br label [[MERGE:%.*]]
|
||||
; CHECK: merge:
|
||||
; CHECK-NEXT: ret i64 undef
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=true -force-specialization -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=true -funcspec-min-function-size=10 -S < %s | FileCheck %s
|
||||
|
||||
; Check that the literal constant parameter could be specialized.
|
||||
; CHECK: @foo.1(
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-avg-loop-iters=5 -funcspec-min-function-size=10 -S < %s | FileCheck %s
|
||||
|
||||
; Check that the loop depth results in a larger specialization bonus.
|
||||
; CHECK: @foo.1(
|
||||
; CHECK: @foo.2(
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
@A = external dso_local constant i32, align 4
|
||||
@B = external dso_local constant i32, align 4
|
||||
@C = external dso_local constant i32, align 4
|
||||
@D = external dso_local constant i32, align 4
|
||||
|
||||
declare i1 @cond_begin()
|
||||
declare i1 @cond_end()
|
||||
declare i1 @getCond()
|
||||
|
||||
define internal i32 @foo(i32 %x, ptr %b, ptr %c) {
|
||||
entry:
|
||||
br label %loop.entry
|
||||
|
||||
loop.entry:
|
||||
br label %loop2.entry
|
||||
|
||||
loop2.entry:
|
||||
br label %loop2.body
|
||||
|
||||
loop2.body:
|
||||
%0 = load i32, ptr %b, align 4
|
||||
%1 = load i32, ptr %c, align 4
|
||||
%add.0 = add nsw i32 %0, %1
|
||||
%add = add nsw i32 %add.0, %x
|
||||
br label %loop2.end
|
||||
|
||||
loop2.end:
|
||||
%cond.end = call i1 @cond_end()
|
||||
br i1 %cond.end, label %loop2.entry, label %loop.end
|
||||
|
||||
loop.end:
|
||||
%cond2.end = call i1 @getCond()
|
||||
br i1 %cond2.end, label %loop.entry, label %return
|
||||
|
||||
return:
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
define dso_local i32 @bar(i32 %x, i32 %y) {
|
||||
entry:
|
||||
%tobool = icmp ne i32 %x, 0
|
||||
br i1 %tobool, label %if.then, label %if.else
|
||||
|
||||
if.then:
|
||||
%call = call i32 @foo(i32 %x, ptr @A, ptr @C)
|
||||
br label %return
|
||||
|
||||
if.else:
|
||||
%call1 = call i32 @foo(i32 %y, ptr @B, ptr @D)
|
||||
br label %return
|
||||
|
||||
return:
|
||||
%retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
|
||||
|
||||
; Checks for callsites that have been annotated with MinSize. We only expect
|
||||
; specialisation for the call that does not have the attribute:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<no-func-spec>" -force-specialization -S < %s | FileCheck %s --check-prefix=NOFSPEC
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<no-func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s --check-prefix=NOFSPEC
|
||||
|
||||
define i64 @main(i64 %x, i1 %flag) {
|
||||
;
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-avg-loop-iters=1 -force-specialization -S < %s | FileCheck %s
|
||||
|
||||
; DISABLED-NOT: @func.1(
|
||||
; DISABLED-NOT: @func.2(
|
||||
|
||||
define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
|
||||
%4 = alloca i32, align 4
|
||||
store i32 %1, ptr %4, align 4
|
||||
%5 = load i32, ptr %4, align 4
|
||||
%6 = icmp slt i32 %5, 1
|
||||
br i1 %6, label %14, label %7
|
||||
|
||||
7: ; preds = %3
|
||||
%8 = load i32, ptr %4, align 4
|
||||
%9 = sext i32 %8 to i64
|
||||
%10 = getelementptr inbounds i32, ptr %0, i64 %9
|
||||
call void %2(ptr %10)
|
||||
%11 = load i32, ptr %4, align 4
|
||||
%12 = add nsw i32 %11, -1
|
||||
%13 = call i32 @func(ptr %0, i32 %12, ptr %2)
|
||||
br label %14
|
||||
|
||||
14: ; preds = %3, %7
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define internal void @increment(ptr nocapture %0) {
|
||||
%2 = load i32, ptr %0, align 4
|
||||
%3 = add nsw i32 %2, 1
|
||||
store i32 %3, ptr %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @decrement(ptr nocapture %0) {
|
||||
%2 = load i32, ptr %0, align 4
|
||||
%3 = add nsw i32 %2, -1
|
||||
store i32 %3, ptr %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @main(ptr %0, i32 %1) {
|
||||
; CHECK: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
|
||||
%3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
|
||||
; CHECK: call void @func.1(ptr [[TMP0]], i32 0)
|
||||
%4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
|
||||
; CHECK: ret i32 0
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
; CHECK: @func.1(
|
||||
; CHECK: [[TMP3:%.*]] = alloca i32, align 4
|
||||
; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
|
||||
; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
|
||||
; CHECK: 6:
|
||||
; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
|
||||
; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
|
||||
; CHECK: call void @decrement(ptr [[TMP9]])
|
||||
; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
|
||||
; CHECK: call void @func.1(ptr [[TMP0]], i32 [[TMP11]])
|
||||
; CHECK: br label [[TMP12:%.*]]
|
||||
; CHECK: 12:
|
||||
; CHECK: ret void
|
||||
;
|
||||
;
|
||||
; CHECK: @func.2(
|
||||
; CHECK: [[TMP3:%.*]] = alloca i32, align 4
|
||||
; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
|
||||
; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
|
||||
; CHECK: 6:
|
||||
; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
|
||||
; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
|
||||
; CHECK: call void @increment(ptr [[TMP9]])
|
||||
; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
|
||||
; CHECK: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
|
||||
; CHECK: br label [[TMP12:%.*]]
|
||||
; CHECK: 12:
|
||||
; CHECK: ret void
|
||||
@@ -1,7 +1,9 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -S < %s | \
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-avg-loop-iters=3 -S < %s | \
|
||||
; RUN: FileCheck %s --check-prefixes=COMMON,DISABLED
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | \
|
||||
; RUN: FileCheck %s --check-prefixes=COMMON,FORCE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-avg-loop-iters=3 -force-specialization -S < %s | \
|
||||
; RUN: FileCheck %s --check-prefixes=COMMON,FORCE
|
||||
|
||||
; Test for specializing a constant global.
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" -force-specialization < %s | FileCheck %s
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
|
||||
define dso_local i32 @p0(i32 noundef %x) {
|
||||
entry:
|
||||
%add = add nsw i32 %x, 1
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization < %s | FileCheck %s
|
||||
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 < %s | FileCheck %s
|
||||
define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
|
||||
entry:
|
||||
%call = tail call i32 %p(i32 noundef %x)
|
||||
|
||||
@@ -6,10 +6,10 @@ define i64 @main(i64 %x, i64 %y, i1 %flag) {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
|
||||
; CHECK: plus:
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 42, ptr @plus, ptr @minus)
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
|
||||
; CHECK-NEXT: br label [[MERGE:%.*]]
|
||||
; CHECK: minus:
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y:%.*]], ptr @minus, ptr @plus)
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
|
||||
; CHECK-NEXT: br label [[MERGE]]
|
||||
; CHECK: merge:
|
||||
; CHECK-NEXT: [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ]
|
||||
@@ -20,7 +20,7 @@ entry:
|
||||
br i1 %flag, label %plus, label %minus
|
||||
|
||||
plus:
|
||||
%cmp0 = call i64 @compute(i64 %x, i64 42, ptr @plus, ptr @minus)
|
||||
%cmp0 = call i64 @compute(i64 %x, i64 %y, ptr @plus, ptr @minus)
|
||||
br label %merge
|
||||
|
||||
minus:
|
||||
@@ -68,9 +68,9 @@ entry:
|
||||
|
||||
; CHECK-LABEL: @compute.2
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 42)
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 42)
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 42, ptr @plus, ptr @plus)
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
|
||||
|
||||
; CHECK-LABEL: @compute.3
|
||||
; CHECK-NEXT: entry:
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" \
|
||||
; RUN: -funcspec-for-literal-constant=0 \
|
||||
; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" \
|
||||
; RUN: -funcspec-for-literal-constant=1 \
|
||||
; RUN: -funcspec-for-literal-constant \
|
||||
; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT
|
||||
|
||||
define i32 @f0(i32 noundef %x) {
|
||||
|
||||
@@ -1,110 +0,0 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=2 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS2
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
|
||||
|
||||
; DISABLED-NOT: @func.1(
|
||||
; DISABLED-NOT: @func.2(
|
||||
; DISABLED-NOT: @func.3(
|
||||
|
||||
define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
|
||||
%4 = alloca i32, align 4
|
||||
store i32 %1, ptr %4, align 4
|
||||
%5 = load i32, ptr %4, align 4
|
||||
%6 = icmp slt i32 %5, 1
|
||||
br i1 %6, label %14, label %7
|
||||
|
||||
7: ; preds = %3
|
||||
%8 = load i32, ptr %4, align 4
|
||||
%9 = sext i32 %8 to i64
|
||||
%10 = getelementptr inbounds i32, ptr %0, i64 %9
|
||||
call void %2(ptr %10)
|
||||
%11 = load i32, ptr %4, align 4
|
||||
%12 = add nsw i32 %11, -1
|
||||
%13 = call i32 @func(ptr %0, i32 %12, ptr %2)
|
||||
br label %14
|
||||
|
||||
14: ; preds = %3, %7
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define internal void @increment(ptr nocapture %0) {
|
||||
%2 = load i32, ptr %0, align 4
|
||||
%3 = add nsw i32 %2, 1
|
||||
store i32 %3, ptr %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @decrement(ptr nocapture %0) {
|
||||
%2 = load i32, ptr %0, align 4
|
||||
%3 = add nsw i32 %2, -1
|
||||
store i32 %3, ptr %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @main(ptr %0, i32 %1) {
|
||||
; COMMON: define i32 @main(
|
||||
; COMMON-NEXT: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
|
||||
; COMMON-NEXT: call void @func.1(ptr [[TMP0]])
|
||||
; COMMON-NEXT: ret i32 0
|
||||
;
|
||||
%3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
|
||||
%4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
; COMMON: define internal void @func.1(
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = alloca i32, align 4
|
||||
; COMMON-NEXT: store i32 0, ptr [[TMP2]], align 4
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
|
||||
; COMMON-NEXT: br i1 [[TMP4]], label [[TMP11:%.*]], label [[TMP5:%.*]]
|
||||
; COMMON: 5:
|
||||
; COMMON-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; COMMON-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
|
||||
; COMMON-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP7]]
|
||||
; COMMON-NEXT: call void @decrement(ptr [[TMP8]])
|
||||
; COMMON-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; COMMON-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
|
||||
; ITERS1-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement)
|
||||
; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP10]])
|
||||
; COMMON-NEXT: br label [[TMP11:%.*]]
|
||||
; COMMON: 11:
|
||||
; COMMON-NEXT: ret void
|
||||
;
|
||||
; COMMON: define internal void @func.2(
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = alloca i32, align 4
|
||||
; COMMON-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; COMMON-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
|
||||
; COMMON-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
|
||||
; COMMON: 6:
|
||||
; COMMON-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; COMMON-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
|
||||
; COMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
|
||||
; COMMON-NEXT: call void @increment(ptr [[TMP9]])
|
||||
; COMMON-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; COMMON-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
|
||||
; COMMON-NEXT: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
|
||||
; COMMON-NEXT: br label [[TMP12:%.*]]
|
||||
; COMMON: 12:
|
||||
; COMMON-NEXT: ret void
|
||||
;
|
||||
; ITERS2: define internal void @func.3(
|
||||
; ITERS2-NEXT: [[TMP3:%.*]] = alloca i32, align 4
|
||||
; ITERS2-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
|
||||
; ITERS2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; ITERS2-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
|
||||
; ITERS2-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
|
||||
; ITERS2: 6:
|
||||
; ITERS2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; ITERS2-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
|
||||
; ITERS2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
|
||||
; ITERS2-NEXT: call void @decrement(ptr [[TMP9]])
|
||||
; ITERS2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; ITERS2-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
|
||||
; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP11]])
|
||||
; ITERS2-NEXT: br label [[TMP12:%.*]]
|
||||
; ITERS2: 12:
|
||||
; ITERS2-NEXT: ret void
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-min-entry-freq=1 < %s | FileCheck %s
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
|
||||
define dso_local i32 @p0(i32 noundef %x) {
|
||||
entry:
|
||||
%add = add nsw i32 %x, 1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
|
||||
|
||||
define i64 @main(i64 %x, i1 %flag) {
|
||||
entry:
|
||||
|
||||
@@ -1,12 +1,20 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -force-specialization -S < %s | FileCheck %s --check-prefix=NONE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -force-specialization -S < %s | FileCheck %s --check-prefix=TWO
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -force-specialization -S < %s | FileCheck %s --check-prefix=THREE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE
|
||||
|
||||
; Make sure that we iterate correctly after sorting the specializations:
|
||||
;
|
||||
; Score(@plus, @minus) > Score(42, @minus, @power) > Score(@power, @mul)
|
||||
; FnSpecialization: Specializations for function compute
|
||||
; FnSpecialization: Gain = 608
|
||||
; FnSpecialization: FormalArg = binop1, ActualArg = power
|
||||
; FnSpecialization: FormalArg = binop2, ActualArg = mul
|
||||
; FnSpecialization: Gain = 982
|
||||
; FnSpecialization: FormalArg = binop1, ActualArg = plus
|
||||
; FnSpecialization: FormalArg = binop2, ActualArg = minus
|
||||
; FnSpecialization: Gain = 795
|
||||
; FnSpecialization: FormalArg = binop1, ActualArg = minus
|
||||
; FnSpecialization: FormalArg = binop2, ActualArg = power
|
||||
|
||||
define i64 @main(i64 %x, i64 %y, i1 %flag) {
|
||||
; NONE-LABEL: @main(
|
||||
@@ -108,11 +116,11 @@ merge:
|
||||
;
|
||||
; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
|
||||
; THREE-NEXT: entry:
|
||||
; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 42)
|
||||
; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 42)
|
||||
; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
|
||||
; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
|
||||
; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
|
||||
; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
|
||||
; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], 42
|
||||
; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
|
||||
; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
|
||||
; THREE-NEXT: ret i64 [[TMP5]]
|
||||
; THREE-NEXT: }
|
||||
|
||||
@@ -12,7 +12,6 @@ add_llvm_unittest(IPOTests
|
||||
LowerTypeTests.cpp
|
||||
WholeProgramDevirt.cpp
|
||||
AttributorTest.cpp
|
||||
FunctionSpecializationTest.cpp
|
||||
)
|
||||
|
||||
set_property(TARGET IPOTests PROPERTY FOLDER "Tests/UnitTests/TransformsTests")
|
||||
|
||||
@@ -1,261 +0,0 @@
|
||||
//===- FunctionSpecializationTest.cpp - Cost model unit tests -------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/PostDominators.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/AsmParser/Parser.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/Support/SourceMgr.h"
|
||||
#include "llvm/Transforms/IPO/FunctionSpecialization.h"
|
||||
#include "llvm/Transforms/Utils/SCCPSolver.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include <memory>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class FunctionSpecializationTest : public testing::Test {
|
||||
protected:
|
||||
LLVMContext Ctx;
|
||||
FunctionAnalysisManager FAM;
|
||||
std::unique_ptr<Module> M;
|
||||
std::unique_ptr<SCCPSolver> Solver;
|
||||
|
||||
FunctionSpecializationTest() {
|
||||
FAM.registerPass([&] { return TargetLibraryAnalysis(); });
|
||||
FAM.registerPass([&] { return TargetIRAnalysis(); });
|
||||
FAM.registerPass([&] { return BlockFrequencyAnalysis(); });
|
||||
FAM.registerPass([&] { return BranchProbabilityAnalysis(); });
|
||||
FAM.registerPass([&] { return LoopAnalysis(); });
|
||||
FAM.registerPass([&] { return AssumptionAnalysis(); });
|
||||
FAM.registerPass([&] { return DominatorTreeAnalysis(); });
|
||||
FAM.registerPass([&] { return PostDominatorTreeAnalysis(); });
|
||||
FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
|
||||
}
|
||||
|
||||
Module &parseModule(const char *ModuleString) {
|
||||
SMDiagnostic Err;
|
||||
M = parseAssemblyString(ModuleString, Err, Ctx);
|
||||
EXPECT_TRUE(M);
|
||||
return *M;
|
||||
}
|
||||
|
||||
FunctionSpecializer getSpecializerFor(Function *F) {
|
||||
auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
|
||||
return FAM.getResult<TargetLibraryAnalysis>(F);
|
||||
};
|
||||
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
|
||||
return FAM.getResult<TargetIRAnalysis>(F);
|
||||
};
|
||||
auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
|
||||
return FAM.getResult<BlockFrequencyAnalysis>(F);
|
||||
};
|
||||
auto GetAC = [this](Function &F) -> AssumptionCache & {
|
||||
return FAM.getResult<AssumptionAnalysis>(F);
|
||||
};
|
||||
auto GetAnalysis = [this](Function &F) -> AnalysisResultsForFn {
|
||||
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
|
||||
return { std::make_unique<PredicateInfo>(F, DT,
|
||||
FAM.getResult<AssumptionAnalysis>(F)),
|
||||
&DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F) };
|
||||
};
|
||||
|
||||
Solver = std::make_unique<SCCPSolver>(M->getDataLayout(), GetTLI, Ctx);
|
||||
|
||||
Solver->addAnalysis(*F, GetAnalysis(*F));
|
||||
Solver->markBlockExecutable(&F->front());
|
||||
for (Argument &Arg : F->args())
|
||||
Solver->markOverdefined(&Arg);
|
||||
Solver->solveWhileResolvedUndefsIn(*M);
|
||||
|
||||
return FunctionSpecializer(*Solver, *M, &FAM, GetBFI, GetTLI, GetTTI,
|
||||
GetAC);
|
||||
}
|
||||
|
||||
Cost getInstCost(Instruction &I) {
|
||||
auto &TTI = FAM.getResult<TargetIRAnalysis>(*I.getFunction());
|
||||
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*I.getFunction());
|
||||
|
||||
uint64_t Weight = FunctionSpecializer::getBlockFreqMultiplier() *
|
||||
BFI.getBlockFreq(I.getParent()).getFrequency() /
|
||||
BFI.getEntryFreq();
|
||||
return Weight *
|
||||
TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
TEST_F(FunctionSpecializationTest, SwitchInst) {
|
||||
const char *ModuleString = R"(
|
||||
define void @foo(i32 %a, i32 %b, i32 %i) {
|
||||
entry:
|
||||
switch i32 %i, label %default
|
||||
[ i32 1, label %case1
|
||||
i32 2, label %case2 ]
|
||||
case1:
|
||||
%0 = mul i32 %a, 2
|
||||
%1 = sub i32 6, 5
|
||||
br label %bb1
|
||||
case2:
|
||||
%2 = and i32 %b, 3
|
||||
%3 = sdiv i32 8, 2
|
||||
br label %bb2
|
||||
bb1:
|
||||
%4 = add i32 %0, %b
|
||||
br label %default
|
||||
bb2:
|
||||
%5 = or i32 %2, %a
|
||||
br label %default
|
||||
default:
|
||||
ret void
|
||||
}
|
||||
)";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
Function *F = M.getFunction("foo");
|
||||
FunctionSpecializer Specializer = getSpecializerFor(F);
|
||||
InstCostVisitor Visitor = Specializer.getInstCostVisitorFor(F);
|
||||
|
||||
Constant *One = ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1);
|
||||
|
||||
auto FuncIter = F->begin();
|
||||
BasicBlock &Case1 = *++FuncIter;
|
||||
BasicBlock &Case2 = *++FuncIter;
|
||||
BasicBlock &BB1 = *++FuncIter;
|
||||
BasicBlock &BB2 = *++FuncIter;
|
||||
|
||||
Instruction &Mul = Case1.front();
|
||||
Instruction &And = Case2.front();
|
||||
Instruction &Sdiv = *++Case2.begin();
|
||||
Instruction &BrBB2 = Case2.back();
|
||||
Instruction &Add = BB1.front();
|
||||
Instruction &Or = BB2.front();
|
||||
Instruction &BrDefault = BB2.back();
|
||||
|
||||
// mul
|
||||
Cost Ref = getInstCost(Mul);
|
||||
Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
|
||||
EXPECT_EQ(Bonus, Ref);
|
||||
|
||||
// and + or + add
|
||||
Ref = getInstCost(And) + getInstCost(Or) + getInstCost(Add);
|
||||
Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
|
||||
EXPECT_EQ(Bonus, Ref);
|
||||
|
||||
// sdiv + br + br
|
||||
Ref = getInstCost(Sdiv) + getInstCost(BrBB2) + getInstCost(BrDefault);
|
||||
Bonus = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
|
||||
EXPECT_EQ(Bonus, Ref);
|
||||
}
|
||||
|
||||
TEST_F(FunctionSpecializationTest, BranchInst) {
|
||||
const char *ModuleString = R"(
|
||||
define void @foo(i32 %a, i32 %b, i1 %cond) {
|
||||
entry:
|
||||
br i1 %cond, label %bb0, label %bb2
|
||||
bb0:
|
||||
%0 = mul i32 %a, 2
|
||||
%1 = sub i32 6, 5
|
||||
br label %bb1
|
||||
bb1:
|
||||
%2 = add i32 %0, %b
|
||||
%3 = sdiv i32 8, 2
|
||||
br label %bb2
|
||||
bb2:
|
||||
ret void
|
||||
}
|
||||
)";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
Function *F = M.getFunction("foo");
|
||||
FunctionSpecializer Specializer = getSpecializerFor(F);
|
||||
InstCostVisitor Visitor = Specializer.getInstCostVisitorFor(F);
|
||||
|
||||
Constant *One = ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1);
|
||||
Constant *False = ConstantInt::getFalse(M.getContext());
|
||||
|
||||
auto FuncIter = F->begin();
|
||||
BasicBlock &BB0 = *++FuncIter;
|
||||
BasicBlock &BB1 = *++FuncIter;
|
||||
|
||||
Instruction &Mul = BB0.front();
|
||||
Instruction &Sub = *++BB0.begin();
|
||||
Instruction &BrBB1 = BB0.back();
|
||||
Instruction &Add = BB1.front();
|
||||
Instruction &Sdiv = *++BB1.begin();
|
||||
Instruction &BrBB2 = BB1.back();
|
||||
|
||||
// mul
|
||||
Cost Ref = getInstCost(Mul);
|
||||
Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
|
||||
EXPECT_EQ(Bonus, Ref);
|
||||
|
||||
// add
|
||||
Ref = getInstCost(Add);
|
||||
Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
|
||||
EXPECT_EQ(Bonus, Ref);
|
||||
|
||||
// sub + br + sdiv + br
|
||||
Ref = getInstCost(Sub) + getInstCost(BrBB1) + getInstCost(Sdiv) +
|
||||
getInstCost(BrBB2);
|
||||
Bonus = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor);
|
||||
EXPECT_EQ(Bonus, Ref);
|
||||
}
|
||||
|
||||
TEST_F(FunctionSpecializationTest, Misc) {
|
||||
const char *ModuleString = R"(
|
||||
@g = constant [2 x i32] zeroinitializer, align 4
|
||||
|
||||
define i32 @foo(i8 %a, i1 %cond, ptr %b) {
|
||||
%cmp = icmp eq i8 %a, 10
|
||||
%ext = zext i1 %cmp to i32
|
||||
%sel = select i1 %cond, i32 %ext, i32 1
|
||||
%gep = getelementptr i32, ptr %b, i32 %sel
|
||||
%ld = load i32, ptr %gep
|
||||
ret i32 %ld
|
||||
}
|
||||
)";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
Function *F = M.getFunction("foo");
|
||||
FunctionSpecializer Specializer = getSpecializerFor(F);
|
||||
InstCostVisitor Visitor = Specializer.getInstCostVisitorFor(F);
|
||||
|
||||
GlobalVariable *GV = M.getGlobalVariable("g");
|
||||
Constant *One = ConstantInt::get(IntegerType::getInt8Ty(M.getContext()), 1);
|
||||
Constant *True = ConstantInt::getTrue(M.getContext());
|
||||
|
||||
auto BlockIter = F->front().begin();
|
||||
Instruction &Icmp = *BlockIter++;
|
||||
Instruction &Zext = *BlockIter++;
|
||||
Instruction &Select = *BlockIter++;
|
||||
Instruction &Gep = *BlockIter++;
|
||||
Instruction &Load = *BlockIter++;
|
||||
|
||||
// icmp + zext
|
||||
Cost Ref = getInstCost(Icmp) + getInstCost(Zext);
|
||||
Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
|
||||
EXPECT_EQ(Bonus, Ref);
|
||||
|
||||
// select
|
||||
Ref = getInstCost(Select);
|
||||
Bonus = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor);
|
||||
EXPECT_EQ(Bonus, Ref);
|
||||
|
||||
// gep + load
|
||||
Ref = getInstCost(Gep) + getInstCost(Load);
|
||||
Bonus = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor);
|
||||
EXPECT_EQ(Bonus, Ref);
|
||||
}
|
||||
Reference in New Issue
Block a user