Revert "[FuncSpec] Replace LoopInfo with BlockFrequencyInfo"

As reported on https://reviews.llvm.org/D150375#4367861 and
following, this change causes PDT invalidation issues. Revert
it and dependent commits.

This reverts commit 0524534d52.
This reverts commit ced90d1ff6.
This reverts commit 9f992cc935.
This reverts commit 1b1232047e.
This commit is contained in:
Nikita Popov
2023-05-30 14:40:18 +02:00
parent f81f32adc9
commit 96a14f388b
29 changed files with 344 additions and 807 deletions

View File

@@ -48,11 +48,10 @@
#ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
#define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/SCCPSolver.h"
@@ -70,9 +69,6 @@ using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
// Just a shorter abbreviation to improve indentation.
using Cost = InstructionCost;
// Map of known constants found during the specialization bonus estimation.
using ConstMap = DenseMap<Value *, Constant *>;
// Specialization signature, used to uniquely designate a specialization within
// a function.
struct SpecSig {
@@ -119,39 +115,6 @@ struct Spec {
: F(F), Sig(S), Score(Score) {}
};
class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
const DataLayout &DL;
BlockFrequencyInfo &BFI;
TargetTransformInfo &TTI;
SCCPSolver &Solver;
ConstMap KnownConstants;
ConstMap::iterator LastVisited;
public:
InstCostVisitor(const DataLayout &DL, BlockFrequencyInfo &BFI,
TargetTransformInfo &TTI, SCCPSolver &Solver)
: DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {}
Cost getUserBonus(Instruction *User, Value *Use, Constant *C);
private:
friend class InstVisitor<InstCostVisitor, Constant *>;
Cost estimateSwitchInst(SwitchInst &I);
Cost estimateBranchInst(BranchInst &I);
Constant *visitInstruction(Instruction &I) { return nullptr; }
Constant *visitLoadInst(LoadInst &I);
Constant *visitGetElementPtrInst(GetElementPtrInst &I);
Constant *visitSelectInst(SelectInst &I);
Constant *visitCastInst(CastInst &I);
Constant *visitCmpInst(CmpInst &I);
Constant *visitUnaryOperator(UnaryOperator &I);
Constant *visitBinaryOperator(BinaryOperator &I);
};
class FunctionSpecializer {
/// The IPSCCP Solver.
@@ -163,7 +126,6 @@ class FunctionSpecializer {
FunctionAnalysisManager *FAM;
/// Analyses used to help determine if a function should be specialized.
std::function<BlockFrequencyInfo &(Function &)> GetBFI;
std::function<const TargetLibraryInfo &(Function &)> GetTLI;
std::function<TargetTransformInfo &(Function &)> GetTTI;
std::function<AssumptionCache &(Function &)> GetAC;
@@ -175,12 +137,11 @@ class FunctionSpecializer {
public:
FunctionSpecializer(
SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM,
std::function<BlockFrequencyInfo &(Function &)> GetBFI,
std::function<const TargetLibraryInfo &(Function &)> GetTLI,
std::function<TargetTransformInfo &(Function &)> GetTTI,
std::function<AssumptionCache &(Function &)> GetAC)
: Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI),
GetTTI(GetTTI), GetAC(GetAC) {}
: Solver(Solver), M(M), FAM(FAM), GetTLI(GetTLI), GetTTI(GetTTI),
GetAC(GetAC) {}
~FunctionSpecializer();
@@ -188,18 +149,6 @@ public:
bool run();
static unsigned getBlockFreqMultiplier();
InstCostVisitor getInstCostVisitorFor(Function *F) {
auto &BFI = (GetBFI)(*F);
auto &TTI = (GetTTI)(*F);
return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver);
}
/// Compute a bonus for replacing argument \p A with constant \p C.
Cost getSpecializationBonus(Argument *A, Constant *C,
InstCostVisitor &Visitor);
private:
Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);
@@ -243,6 +192,9 @@ private:
/// Compute and return the cost of specializing function \p F.
Cost getSpecializationCost(Function *F);
/// Compute a bonus for replacing argument \p A with constant \p C.
Cost getSpecializationBonus(Argument *A, Constant *C, const LoopInfo &LI);
/// Determine if it is possible to specialise the function for constant values
/// of the formal parameter \p A.
bool isArgumentInteresting(Argument *A);

View File

@@ -44,6 +44,7 @@ struct AnalysisResultsForFn {
std::unique_ptr<PredicateInfo> PredInfo;
DominatorTree *DT;
PostDominatorTree *PDT;
LoopInfo *LI;
};
/// Helper struct shared between Function Specialization and SCCP Solver.
@@ -90,6 +91,8 @@ public:
const PredicateBase *getPredicateInfoFor(Instruction *I);
const LoopInfo &getLoopInfo(Function &F);
DomTreeUpdater getDTU(Function &F);
/// trackValueOfGlobalVariable - Clients can use this method to

View File

@@ -48,14 +48,12 @@
#include "llvm/Transforms/IPO/FunctionSpecialization.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueLattice.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantFold.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -74,22 +72,6 @@ static cl::opt<bool> ForceSpecialization(
"Force function specialization for every call site with a constant "
"argument"));
// Set to 2^3 to model three levels of if-else nest.
static cl::opt<unsigned> BlockFreqMultiplier(
"funcspec-block-freq-multiplier", cl::init(8), cl::Hidden, cl::desc(
"Multiplier to scale block frequency of user instructions during "
"specialization bonus estimation"));
static cl::opt<unsigned> MinEntryFreq(
"funcspec-min-entry-freq", cl::init(450), cl::Hidden, cl::desc(
"Do not specialize functions with entry block frequency lower than "
"this value"));
static cl::opt<unsigned> MinScore(
"funcspec-min-score", cl::init(2), cl::Hidden, cl::desc(
"Do not specialize functions with score lower than this value "
"(the ratio of specialization bonus over specialization cost)"));
static cl::opt<unsigned> MaxClones(
"funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
"The maximum number of clones allowed for a single function "
@@ -100,225 +82,23 @@ static cl::opt<unsigned> MinFunctionSize(
"Don't specialize functions that have less than this number of "
"instructions"));
static cl::opt<unsigned> AvgLoopIters(
"funcspec-avg-loop-iters", cl::init(10), cl::Hidden, cl::desc(
"Average loop iteration count"));
static cl::opt<bool> SpecializeOnAddress(
"funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
"Enable function specialization on the address of global values"));
// Disabled by default as it can significantly increase compilation times.
//
// https://llvm-compile-time-tracker.com
// https://github.com/nikic/llvm-compile-time-tracker
static cl::opt<bool> SpecializeLiteralConstant(
"funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc(
"funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
"Enable specialization of functions that take a literal constant as an "
"argument"));
unsigned FunctionSpecializer::getBlockFreqMultiplier() {
return BlockFreqMultiplier;
}
// Estimates the instruction cost of all the basic blocks in \p WorkList.
// The successors of such blocks are added to the list as long as they are
// executable and they have a unique predecessor. \p WorkList represents
// the basic blocks of a specialization which become dead once we replace
// instructions that are known to be constants. The aim here is to estimate
// the combination of size and latency savings in comparison to the non
// specialized version of the function.
static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
ConstMap &KnownConstants, SCCPSolver &Solver,
BlockFrequencyInfo &BFI,
TargetTransformInfo &TTI) {
Cost Bonus = 0;
// Accumulate the instruction cost of each basic block weighted by frequency.
while (!WorkList.empty()) {
BasicBlock *BB = WorkList.pop_back_val();
uint64_t Weight = BlockFreqMultiplier *
BFI.getBlockFreq(BB).getFrequency() /
BFI.getEntryFreq();
if (!Weight)
continue;
for (Instruction &I : *BB) {
// Disregard SSA copies.
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::ssa_copy)
continue;
// If it's a known constant we have already accounted for it.
if (KnownConstants.contains(&I))
continue;
Bonus += Weight *
TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus
<< " after user " << I << "\n");
}
// Keep adding dead successors to the list as long as they are
// executable and they have a unique predecessor.
for (BasicBlock *SuccBB : successors(BB))
if (Solver.isBlockExecutable(SuccBB) &&
SuccBB->getUniquePredecessor() == BB)
WorkList.push_back(SuccBB);
}
return Bonus;
}
static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
if (auto It = KnownConstants.find(V); It != KnownConstants.end())
return It->second;
return nullptr;
}
Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
// Cache the iterator before visiting.
LastVisited = KnownConstants.insert({Use, C}).first;
if (auto *I = dyn_cast<SwitchInst>(User))
return estimateSwitchInst(*I);
if (auto *I = dyn_cast<BranchInst>(User))
return estimateBranchInst(*I);
C = visit(*User);
if (!C)
return 0;
KnownConstants.insert({User, C});
uint64_t Weight = BlockFreqMultiplier *
BFI.getBlockFreq(User->getParent()).getFrequency() /
BFI.getEntryFreq();
if (!Weight)
return 0;
Cost Bonus = Weight *
TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency);
LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus
<< " for user " << *User << "\n");
for (auto *U : User->users())
if (auto *UI = dyn_cast<Instruction>(U))
if (Solver.isBlockExecutable(UI->getParent()))
Bonus += getUserBonus(UI, User, C);
return Bonus;
}
Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
if (I.getCondition() != LastVisited->first)
return 0;
auto *C = cast<ConstantInt>(LastVisited->second);
BasicBlock *Succ = I.findCaseValue(C)->getCaseSuccessor();
// Initialize the worklist with the dead basic blocks. These are the
// destination labels which are different from the one corresponding
// to \p C. They should be executable and have a unique predecessor.
SmallVector<BasicBlock *> WorkList;
for (const auto &Case : I.cases()) {
BasicBlock *BB = Case.getCaseSuccessor();
if (BB == Succ || !Solver.isBlockExecutable(BB) ||
BB->getUniquePredecessor() != I.getParent())
continue;
WorkList.push_back(BB);
}
return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI);
}
Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
if (I.getCondition() != LastVisited->first)
return 0;
BasicBlock *Succ = I.getSuccessor(LastVisited->second->isOneValue());
// Initialize the worklist with the dead successor as long as
// it is executable and has a unique predecessor.
SmallVector<BasicBlock *> WorkList;
if (Solver.isBlockExecutable(Succ) &&
Succ->getUniquePredecessor() == I.getParent())
WorkList.push_back(Succ);
return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI);
}
Constant *InstCostVisitor::visitLoadInst(LoadInst &I) {
if (isa<ConstantPointerNull>(LastVisited->second))
return nullptr;
return ConstantFoldLoadFromConstPtr(LastVisited->second, I.getType(), DL);
}
Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
SmallVector<Value *, 8> Operands;
Operands.reserve(I.getNumOperands());
for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) {
Value *V = I.getOperand(Idx);
auto *C = dyn_cast<Constant>(V);
if (!C)
C = findConstantFor(V, KnownConstants);
if (!C)
return nullptr;
Operands.push_back(C);
}
auto *Ptr = cast<Constant>(Operands[0]);
auto Ops = ArrayRef(Operands.begin() + 1, Operands.end());
return ConstantFoldGetElementPtr(I.getSourceElementType(), Ptr,
I.isInBounds(), std::nullopt, Ops);
}
Constant *InstCostVisitor::visitSelectInst(SelectInst &I) {
if (I.getCondition() != LastVisited->first)
return nullptr;
Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue()
: I.getTrueValue();
auto *C = dyn_cast<Constant>(V);
if (!C)
C = findConstantFor(V, KnownConstants);
return C;
}
Constant *InstCostVisitor::visitCastInst(CastInst &I) {
return ConstantFoldCastOperand(I.getOpcode(), LastVisited->second,
I.getType(), DL);
}
Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
bool Swap = I.getOperand(1) == LastVisited->first;
Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
auto *Other = dyn_cast<Constant>(V);
if (!Other)
Other = findConstantFor(V, KnownConstants);
if (!Other)
return nullptr;
Constant *Const = LastVisited->second;
return Swap ?
ConstantFoldCompareInstOperands(I.getPredicate(), Other, Const, DL)
: ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL);
}
Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) {
return ConstantFoldUnaryOpOperand(I.getOpcode(), LastVisited->second, DL);
}
Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) {
bool Swap = I.getOperand(1) == LastVisited->first;
Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
auto *Other = dyn_cast<Constant>(V);
if (!Other)
Other = findConstantFor(V, KnownConstants);
if (!Other)
return nullptr;
Constant *Const = LastVisited->second;
return dyn_cast_or_null<Constant>(Swap ?
simplifyBinOp(I.getOpcode(), Other, Const, SimplifyQuery(DL))
: simplifyBinOp(I.getOpcode(), Const, Other, SimplifyQuery(DL)));
}
Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca,
CallInst *Call) {
Value *StoreValue = nullptr;
@@ -637,6 +417,10 @@ CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) {
CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
for (BasicBlock &BB : *F)
Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function "
<< F->getName() << " is " << Metrics.NumInsts
<< " instructions\n");
}
return Metrics;
}
@@ -667,7 +451,6 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
if (Args.empty())
return false;
bool HasCheckedEntryFreq = false;
for (User *U : F->users()) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
@@ -703,21 +486,6 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
if (S.Args.empty())
continue;
// Check the function entry frequency only once. We sink this code here to
// postpone running the Block Frequency Analysis until we know for sure
// there are Specialization candidates, otherwise we are adding unnecessary
// overhead.
if (!HasCheckedEntryFreq) {
// Reject cold functions (for some definition of 'cold').
uint64_t EntryFreq = (GetBFI)(*F).getEntryFreq();
if (!ForceSpecialization && EntryFreq < MinEntryFreq)
return false;
HasCheckedEntryFreq = true;
LLVM_DEBUG(dbgs() << "FnSpecialization: Entry block frequency for "
<< F->getName() << " = " << EntryFreq << "\n");
}
// Check if we have encountered the same specialisation already.
if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
// Existing specialisation. Add the call to the list to rewrite, unless
@@ -732,14 +500,13 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
AllSpecs[Index].CallSites.push_back(&CS);
} else {
// Calculate the specialisation gain.
Cost Score = 0;
InstCostVisitor Visitor = getInstCostVisitorFor(F);
Cost Score = 0 - SpecCost;
for (ArgInfo &A : S.Args)
Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
Score /= SpecCost;
Score +=
getSpecializationBonus(A.Formal, A.Actual, Solver.getLoopInfo(*F));
// Discard unprofitable specialisations.
if (!ForceSpecialization && Score < MinScore)
if (!ForceSpecialization && Score <= 0)
continue;
// Create a new specialisation entry.
@@ -823,23 +590,48 @@ Cost FunctionSpecializer::getSpecializationCost(Function *F) {
// Otherwise, set the specialization cost to be the cost of all the
// instructions in the function.
return Metrics.NumInsts;
return Metrics.NumInsts * InlineConstants::getInstrCost();
}
static Cost getUserBonus(User *U, TargetTransformInfo &TTI,
const LoopInfo &LI) {
auto *I = dyn_cast_or_null<Instruction>(U);
// If not an instruction we do not know how to evaluate.
// Keep minimum possible cost for now so that it doesnt affect
// specialization.
if (!I)
return std::numeric_limits<unsigned>::min();
Cost Bonus =
TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency);
// Increase the cost if it is inside the loop.
unsigned LoopDepth = LI.getLoopDepth(I->getParent());
Bonus *= std::pow((double)AvgLoopIters, LoopDepth);
// Traverse recursively if there are more uses.
// TODO: Any other instructions to be added here?
if (I->mayReadFromMemory() || I->isCast())
for (auto *User : I->users())
Bonus += getUserBonus(User, TTI, LI);
return Bonus;
}
/// Compute a bonus for replacing argument \p A with constant \p C.
Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
InstCostVisitor &Visitor) {
const LoopInfo &LI) {
Function *F = A->getParent();
auto &TTI = (GetTTI)(*F);
LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
<< C->getNameOrAsOperand() << "\n");
Cost TotalCost = 0;
for (auto *U : A->users())
if (auto *UI = dyn_cast<Instruction>(U))
if (Solver.isBlockExecutable(UI->getParent()))
TotalCost += Visitor.getUserBonus(UI, A, C);
LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated user bonus "
<< TotalCost << " for argument " << *A << "\n");
for (auto *U : A->users()) {
TotalCost += getUserBonus(U, TTI, LI);
LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n");
}
// The below heuristic is only concerned with exposing inlining
// opportunities via indirect call promotion. If the argument is not a

View File

@@ -13,7 +13,7 @@
#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -107,15 +107,13 @@ static void findReturnsToZap(Function &F,
static bool runIPSCCP(
Module &M, const DataLayout &DL, FunctionAnalysisManager *FAM,
std::function<BlockFrequencyInfo &(Function &)> GetBFI,
std::function<const TargetLibraryInfo &(Function &)> GetTLI,
std::function<TargetTransformInfo &(Function &)> GetTTI,
std::function<AssumptionCache &(Function &)> GetAC,
function_ref<AnalysisResultsForFn(Function &)> getAnalysis,
bool IsFuncSpecEnabled) {
SCCPSolver Solver(DL, GetTLI, M.getContext());
FunctionSpecializer Specializer(Solver, M, FAM, GetBFI, GetTLI, GetTTI,
GetAC);
FunctionSpecializer Specializer(Solver, M, FAM, GetTLI, GetTTI, GetAC);
// Loop over all functions, marking arguments to those with their addresses
// taken or that are external as overdefined.
@@ -383,23 +381,21 @@ PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) {
auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
return FAM.getResult<TargetLibraryAnalysis>(F);
};
auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
auto GetAC = [&FAM](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn {
auto getAnalysis = [&FAM, this](Function &F) -> AnalysisResultsForFn {
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
return {
std::make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)),
&DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F) };
&DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F),
isFuncSpecEnabled() ? &FAM.getResult<LoopAnalysis>(F) : nullptr };
};
if (!runIPSCCP(M, DL, &FAM, GetBFI, GetTLI, GetTTI, GetAC, getAnalysis,
if (!runIPSCCP(M, DL, &FAM, GetTLI, GetTTI, GetAC, getAnalysis,
isFuncSpecEnabled()))
return PreservedAnalyses::all();

View File

@@ -664,6 +664,13 @@ public:
return A->second.PredInfo->getPredicateInfoFor(I);
}
const LoopInfo &getLoopInfo(Function &F) {
auto A = AnalysisResults.find(&F);
assert(A != AnalysisResults.end() && A->second.LI &&
"Need LoopInfo analysis results for function.");
return *A->second.LI;
}
DomTreeUpdater getDTU(Function &F) {
auto A = AnalysisResults.find(&F);
assert(A != AnalysisResults.end() && "Need analysis results for function.");
@@ -1955,6 +1962,10 @@ const PredicateBase *SCCPSolver::getPredicateInfoFor(Instruction *I) {
return Visitor->getPredicateInfoFor(I);
}
const LoopInfo &SCCPSolver::getLoopInfo(Function &F) {
return Visitor->getLoopInfo(F);
}
DomTreeUpdater SCCPSolver::getDTU(Function &F) { return Visitor->getDTU(F); }
void SCCPSolver::trackValueOfGlobalVariable(GlobalVariable *GV) {

View File

@@ -9,83 +9,83 @@
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='default<O1>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O1,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O1,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='default<O2>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='default<Os>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-Os,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-NO-FUNC-SPEC,CHECK-Os,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='default<Oz>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-Oz,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-NO-FUNC-SPEC,CHECK-Oz,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='lto-pre-link<O2>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-NO-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-peephole='no-op-function' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PEEPHOLE,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PEEPHOLE,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-late-loop-optimizations='no-op-loop' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-LATE,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-LATE,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-loop-optimizer-end='no-op-loop' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-END,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-END,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-scalar-optimizer-late='no-op-function' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-SCALAR-LATE,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-SCALAR-LATE,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-cgscc-optimizer-late='no-op-cgscc' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-CGSCC-LATE,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-CGSCC-LATE,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-vectorizer-start='no-op-function' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-VECTORIZER-START,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-VECTORIZER-START,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-pipeline-start='no-op-module' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-pipeline-early-simplification='no-op-module' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-pipeline-start='no-op-module' \
; RUN: -passes='lto-pre-link<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-NO-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-optimizer-early='no-op-module' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-EARLY,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-EARLY,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes-ep-optimizer-last='no-op-module' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-LAST,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-LAST,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='default<O3>' -enable-matrix -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MATRIX
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MATRIX
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='default<O3>' -enable-merge-functions -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MERGE-FUNCS
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MERGE-FUNCS
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='default<O3>' -ir-outliner -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-IR-OUTLINER
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-IR-OUTLINER
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='default<O3>' -hot-cold-split -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-HOT-COLD-SPLIT
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-HOT-COLD-SPLIT
; Suppress FileCheck --allow-unused-prefixes=false diagnostics.
; CHECK-Oz: {{^}}
@@ -109,6 +109,7 @@
; CHECK-O-NEXT: Running pass: OpenMPOptPass
; CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION-NEXT: Running pass: NoOpModulePass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running pass: PromotePass
@@ -163,7 +164,7 @@
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: ReassociatePass
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
; CHECK-O-NEXT: Running analysis: LoopAnalysis
; CHECK-NO-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis
; CHECK-O-NEXT: Running pass: LCSSAPass
; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy

View File

@@ -9,23 +9,23 @@
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-EP
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='lto<O2>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='lto<O2>' -S %s -passes-ep-full-link-time-optimization-early=no-op-module \
; RUN: -passes-ep-full-link-time-optimization-last=no-op-module 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-EP
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23,CHECK-O23SZ,CHECK-EP
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='lto<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='lto<Os>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OS,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OS,CHECK-OSZ,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='lto<Oz>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-O23SZ
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='lto<O3>' -S %s -passes-ep-peephole='no-op-function' 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-EP-Peephole
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23,CHECK-O23SZ,CHECK-EP-Peephole
; CHECK-EP: Running pass: NoOpModulePass
; CHECK-O: Running pass: CrossDSOCFIPass
@@ -43,6 +43,7 @@
; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo
; CHECK-O23-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
@@ -93,7 +94,7 @@
; CHECK-O23SZ-NEXT: Invalidating analysis: AAManager on foo
; CHECK-O23SZ-NEXT: Running pass: OpenMPOptCGSCCPass on (foo)
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
; CHECK-O23SZ-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo
; CHECK-O23SZ-NEXT: Running analysis: MemorySSAAnalysis on foo
; CHECK-O23SZ-NEXT: Running analysis: AAManager on foo

View File

@@ -10,28 +10,28 @@
; Postlink pipelines:
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='thinlto<O1>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-POSTLINK-O,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O1,CHECK-POSTLINK-O,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-optimizer-early='no-op-module' \
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-EARLY
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-EARLY
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-optimizer-last='no-op-module' \
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-LAST
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-LAST
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='thinlto<Os>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-Os
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-Os
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='thinlto<Oz>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2
; Suppress FileCheck --allow-unused-prefixes=false diagnostics.
; CHECK-NOEXT: {{^}}
@@ -49,6 +49,7 @@
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
@@ -100,7 +101,7 @@
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: ReassociatePass
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
; CHECK-O-NEXT: Running analysis: LoopAnalysis
; CHECK-NO-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis
; CHECK-O-NEXT: Running pass: LCSSAPass
; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy

View File

@@ -3,22 +3,22 @@
; Postlink pipelines:
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='thinlto<O1>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O1,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O3,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='thinlto<Os>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-Os,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -passes='thinlto<Oz>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; Suppress FileCheck --allow-unused-prefixes=false diagnostics.
; CHECK-NOEXT: {{^}}
@@ -34,6 +34,7 @@
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O123-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
@@ -47,7 +48,7 @@
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo
; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass

View File

@@ -3,27 +3,27 @@
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
; RUN: -passes='thinlto<O1>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O1,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O3,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
; RUN: -passes='thinlto<Os>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-Os,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
; RUN: -passes='thinlto<Oz>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-O23SZ,%llvmcheckext
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext
; Suppress FileCheck --allow-unused-prefixes=false diagnostics.
; CHECK-NOEXT: {{^}}
@@ -43,6 +43,7 @@
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O123-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
@@ -55,7 +56,7 @@
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo
; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass

View File

@@ -1,4 +1,4 @@
; RUN: opt -S --passes="default<O3>" -force-specialization < %s | FileCheck %s
; RUN: opt -S --passes="default<O3>" < %s | FileCheck %s
define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr {
entry:

View File

@@ -1,4 +1,4 @@
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-avg-loop-iters=3 -funcspec-min-function-size=10 -S < %s | FileCheck %s
; CHECK-NOT: foo.{{[0-9]+}}

View File

@@ -1,9 +1,11 @@
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; Test function specialization wouldn't crash due to constant expression.
; Note that this test case shows that function specialization pass would
; transform the function even if no specialization happened.
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
%struct = type { i8, i16, i32, i64, i64}
@Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4}
@@ -24,6 +26,19 @@ entry:
}
define internal i64 @zoo(i1 %flag) {
; CHECK-LABEL: @zoo(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
; CHECK: plus:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: minus:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 4))
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3) to i64), [[PLUS]] ], [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), [[MINUS]] ]
; CHECK-NEXT: ret i64 [[TMP2]]
;
entry:
br i1 %flag, label %plus, label %minus
@@ -45,9 +60,10 @@ merge:
define i64 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo.4(i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo.3(i1 true)
; CHECK-NEXT: ret i64 add (i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 3) to i64))
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo(i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo(i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i64 [[TMP3]]
;
%1 = call i64 @zoo(i1 0)
%2 = call i64 @zoo(i1 1)
@@ -55,29 +71,3 @@ define i64 @main() {
ret i64 %3
}
; CHECK-LABEL: @func2.1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i64 undef
; CHECK-LABEL: @func2.2(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i64 undef
; CHECK-LABEL: @zoo.3(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[PLUS:%.*]]
; CHECK: plus:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: merge:
; CHECK-NEXT: ret i64 undef
; CHECK-LABEL: @zoo.4(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[MINUS:%.*]]
; CHECK: minus:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4))
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: merge:
; CHECK-NEXT: ret i64 undef

View File

@@ -1,4 +1,4 @@
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=true -force-specialization -S < %s | FileCheck %s
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=true -funcspec-min-function-size=10 -S < %s | FileCheck %s
; Check that the literal constant parameter could be specialized.
; CHECK: @foo.1(

View File

@@ -0,0 +1,63 @@
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-avg-loop-iters=5 -funcspec-min-function-size=10 -S < %s | FileCheck %s
; Check that the loop depth results in a larger specialization bonus.
; CHECK: @foo.1(
; CHECK: @foo.2(
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@A = external dso_local constant i32, align 4
@B = external dso_local constant i32, align 4
@C = external dso_local constant i32, align 4
@D = external dso_local constant i32, align 4
declare i1 @cond_begin()
declare i1 @cond_end()
declare i1 @getCond()
define internal i32 @foo(i32 %x, ptr %b, ptr %c) {
entry:
br label %loop.entry
loop.entry:
br label %loop2.entry
loop2.entry:
br label %loop2.body
loop2.body:
%0 = load i32, ptr %b, align 4
%1 = load i32, ptr %c, align 4
%add.0 = add nsw i32 %0, %1
%add = add nsw i32 %add.0, %x
br label %loop2.end
loop2.end:
%cond.end = call i1 @cond_end()
br i1 %cond.end, label %loop2.entry, label %loop.end
loop.end:
%cond2.end = call i1 @getCond()
br i1 %cond2.end, label %loop.entry, label %return
return:
ret i32 %add
}
define dso_local i32 @bar(i32 %x, i32 %y) {
entry:
%tobool = icmp ne i32 %x, 0
br i1 %tobool, label %if.then, label %if.else
if.then:
%call = call i32 @foo(i32 %x, ptr @A, ptr @C)
br label %return
if.else:
%call1 = call i32 @foo(i32 %y, ptr @B, ptr @D)
br label %return
return:
%retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
ret i32 %retval.0
}

View File

@@ -1,4 +1,4 @@
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
; Checks for callsites that have been annotated with MinSize. We only expect
; specialisation for the call that does not have the attribute:

View File

@@ -1,5 +1,5 @@
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
; RUN: opt -passes="ipsccp<no-func-spec>" -force-specialization -S < %s | FileCheck %s --check-prefix=NOFSPEC
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
; RUN: opt -passes="ipsccp<no-func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s --check-prefix=NOFSPEC
define i64 @main(i64 %x, i1 %flag) {
;

View File

@@ -0,0 +1,89 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-avg-loop-iters=1 -force-specialization -S < %s | FileCheck %s
; DISABLED-NOT: @func.1(
; DISABLED-NOT: @func.2(
define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
%4 = alloca i32, align 4
store i32 %1, ptr %4, align 4
%5 = load i32, ptr %4, align 4
%6 = icmp slt i32 %5, 1
br i1 %6, label %14, label %7
7: ; preds = %3
%8 = load i32, ptr %4, align 4
%9 = sext i32 %8 to i64
%10 = getelementptr inbounds i32, ptr %0, i64 %9
call void %2(ptr %10)
%11 = load i32, ptr %4, align 4
%12 = add nsw i32 %11, -1
%13 = call i32 @func(ptr %0, i32 %12, ptr %2)
br label %14
14: ; preds = %3, %7
ret i32 0
}
define internal void @increment(ptr nocapture %0) {
%2 = load i32, ptr %0, align 4
%3 = add nsw i32 %2, 1
store i32 %3, ptr %0, align 4
ret void
}
define internal void @decrement(ptr nocapture %0) {
%2 = load i32, ptr %0, align 4
%3 = add nsw i32 %2, -1
store i32 %3, ptr %0, align 4
ret void
}
define i32 @main(ptr %0, i32 %1) {
; CHECK: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
%3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
; CHECK: call void @func.1(ptr [[TMP0]], i32 0)
%4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
; CHECK: ret i32 0
ret i32 %4
}
; CHECK: @func.1(
; CHECK: [[TMP3:%.*]] = alloca i32, align 4
; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
; CHECK: 6:
; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
; CHECK: call void @decrement(ptr [[TMP9]])
; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
; CHECK: call void @func.1(ptr [[TMP0]], i32 [[TMP11]])
; CHECK: br label [[TMP12:%.*]]
; CHECK: 12:
; CHECK: ret void
;
;
; CHECK: @func.2(
; CHECK: [[TMP3:%.*]] = alloca i32, align 4
; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
; CHECK: 6:
; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
; CHECK: call void @increment(ptr [[TMP9]])
; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
; CHECK: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
; CHECK: br label [[TMP12:%.*]]
; CHECK: 12:
; CHECK: ret void

View File

@@ -1,7 +1,9 @@
; RUN: opt -passes="ipsccp<func-spec>" -S < %s | \
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-avg-loop-iters=3 -S < %s | \
; RUN: FileCheck %s --check-prefixes=COMMON,DISABLED
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | \
; RUN: FileCheck %s --check-prefixes=COMMON,FORCE
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-avg-loop-iters=3 -force-specialization -S < %s | \
; RUN: FileCheck %s --check-prefixes=COMMON,FORCE
; Test for specializing a constant global.

View File

@@ -1,4 +1,4 @@
; RUN: opt -S --passes="ipsccp<func-spec>" -force-specialization < %s | FileCheck %s
; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
define dso_local i32 @p0(i32 noundef %x) {
entry:
%add = add nsw i32 %x, 1

View File

@@ -1,5 +1,4 @@
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization < %s | FileCheck %s
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 < %s | FileCheck %s
define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
entry:
%call = tail call i32 %p(i32 noundef %x)

View File

@@ -6,10 +6,10 @@ define i64 @main(i64 %x, i64 %y, i1 %flag) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
; CHECK: plus:
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 42, ptr @plus, ptr @minus)
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: minus:
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y:%.*]], ptr @minus, ptr @plus)
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ]
@@ -20,7 +20,7 @@ entry:
br i1 %flag, label %plus, label %minus
plus:
%cmp0 = call i64 @compute(i64 %x, i64 42, ptr @plus, ptr @minus)
%cmp0 = call i64 @compute(i64 %x, i64 %y, ptr @plus, ptr @minus)
br label %merge
minus:
@@ -68,9 +68,9 @@ entry:
; CHECK-LABEL: @compute.2
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 42)
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 42)
; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 42, ptr @plus, ptr @plus)
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
; CHECK-LABEL: @compute.3
; CHECK-NEXT: entry:

View File

@@ -1,8 +1,7 @@
; RUN: opt -S --passes="ipsccp<func-spec>" \
; RUN: -funcspec-for-literal-constant=0 \
; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT
; RUN: opt -S --passes="ipsccp<func-spec>" \
; RUN: -funcspec-for-literal-constant=1 \
; RUN: -funcspec-for-literal-constant \
; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT
define i32 @f0(i32 noundef %x) {

View File

@@ -1,110 +0,0 @@
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=2 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS2
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
; DISABLED-NOT: @func.1(
; DISABLED-NOT: @func.2(
; DISABLED-NOT: @func.3(
define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
%4 = alloca i32, align 4
store i32 %1, ptr %4, align 4
%5 = load i32, ptr %4, align 4
%6 = icmp slt i32 %5, 1
br i1 %6, label %14, label %7
7: ; preds = %3
%8 = load i32, ptr %4, align 4
%9 = sext i32 %8 to i64
%10 = getelementptr inbounds i32, ptr %0, i64 %9
call void %2(ptr %10)
%11 = load i32, ptr %4, align 4
%12 = add nsw i32 %11, -1
%13 = call i32 @func(ptr %0, i32 %12, ptr %2)
br label %14
14: ; preds = %3, %7
ret i32 0
}
define internal void @increment(ptr nocapture %0) {
%2 = load i32, ptr %0, align 4
%3 = add nsw i32 %2, 1
store i32 %3, ptr %0, align 4
ret void
}
define internal void @decrement(ptr nocapture %0) {
%2 = load i32, ptr %0, align 4
%3 = add nsw i32 %2, -1
store i32 %3, ptr %0, align 4
ret void
}
define i32 @main(ptr %0, i32 %1) {
; COMMON: define i32 @main(
; COMMON-NEXT: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
; COMMON-NEXT: call void @func.1(ptr [[TMP0]])
; COMMON-NEXT: ret i32 0
;
%3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
%4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
ret i32 %4
}
; COMMON: define internal void @func.1(
; COMMON-NEXT: [[TMP2:%.*]] = alloca i32, align 4
; COMMON-NEXT: store i32 0, ptr [[TMP2]], align 4
; COMMON-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; COMMON-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
; COMMON-NEXT: br i1 [[TMP4]], label [[TMP11:%.*]], label [[TMP5:%.*]]
; COMMON: 5:
; COMMON-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
; COMMON-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
; COMMON-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP7]]
; COMMON-NEXT: call void @decrement(ptr [[TMP8]])
; COMMON-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
; COMMON-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
; ITERS1-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement)
; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP10]])
; COMMON-NEXT: br label [[TMP11:%.*]]
; COMMON: 11:
; COMMON-NEXT: ret void
;
; COMMON: define internal void @func.2(
; COMMON-NEXT: [[TMP3:%.*]] = alloca i32, align 4
; COMMON-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
; COMMON-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; COMMON-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
; COMMON-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
; COMMON: 6:
; COMMON-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
; COMMON-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
; COMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
; COMMON-NEXT: call void @increment(ptr [[TMP9]])
; COMMON-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
; COMMON-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
; COMMON-NEXT: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
; COMMON-NEXT: br label [[TMP12:%.*]]
; COMMON: 12:
; COMMON-NEXT: ret void
;
; ITERS2: define internal void @func.3(
; ITERS2-NEXT: [[TMP3:%.*]] = alloca i32, align 4
; ITERS2-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
; ITERS2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; ITERS2-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
; ITERS2-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
; ITERS2: 6:
; ITERS2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
; ITERS2-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
; ITERS2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
; ITERS2-NEXT: call void @decrement(ptr [[TMP9]])
; ITERS2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
; ITERS2-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP11]])
; ITERS2-NEXT: br label [[TMP12:%.*]]
; ITERS2: 12:
; ITERS2-NEXT: ret void

View File

@@ -1,4 +1,4 @@
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-min-entry-freq=1 < %s | FileCheck %s
; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
define dso_local i32 @p0(i32 noundef %x) {
entry:
%add = add nsw i32 %x, 1

View File

@@ -1,4 +1,4 @@
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
define i64 @main(i64 %x, i1 %flag) {
entry:

View File

@@ -1,12 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -force-specialization -S < %s | FileCheck %s --check-prefix=NONE
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -force-specialization -S < %s | FileCheck %s --check-prefix=TWO
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -force-specialization -S < %s | FileCheck %s --check-prefix=THREE
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE
; Make sure that we iterate correctly after sorting the specializations:
;
; Score(@plus, @minus) > Score(42, @minus, @power) > Score(@power, @mul)
; FnSpecialization: Specializations for function compute
; FnSpecialization: Gain = 608
; FnSpecialization: FormalArg = binop1, ActualArg = power
; FnSpecialization: FormalArg = binop2, ActualArg = mul
; FnSpecialization: Gain = 982
; FnSpecialization: FormalArg = binop1, ActualArg = plus
; FnSpecialization: FormalArg = binop2, ActualArg = minus
; FnSpecialization: Gain = 795
; FnSpecialization: FormalArg = binop1, ActualArg = minus
; FnSpecialization: FormalArg = binop2, ActualArg = power
define i64 @main(i64 %x, i64 %y, i1 %flag) {
; NONE-LABEL: @main(
@@ -108,11 +116,11 @@ merge:
;
; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
; THREE-NEXT: entry:
; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 42)
; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 42)
; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], 42
; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
; THREE-NEXT: ret i64 [[TMP5]]
; THREE-NEXT: }

View File

@@ -12,7 +12,6 @@ add_llvm_unittest(IPOTests
LowerTypeTests.cpp
WholeProgramDevirt.cpp
AttributorTest.cpp
FunctionSpecializationTest.cpp
)
set_property(TARGET IPOTests PROPERTY FOLDER "Tests/UnitTests/TransformsTests")

View File

@@ -1,261 +0,0 @@
//===- FunctionSpecializationTest.cpp - Cost model unit tests -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Transforms/IPO/FunctionSpecialization.h"
#include "llvm/Transforms/Utils/SCCPSolver.h"
#include "gtest/gtest.h"
#include <memory>
namespace llvm {
class FunctionSpecializationTest : public testing::Test {
protected:
LLVMContext Ctx;
FunctionAnalysisManager FAM;
std::unique_ptr<Module> M;
std::unique_ptr<SCCPSolver> Solver;
FunctionSpecializationTest() {
FAM.registerPass([&] { return TargetLibraryAnalysis(); });
FAM.registerPass([&] { return TargetIRAnalysis(); });
FAM.registerPass([&] { return BlockFrequencyAnalysis(); });
FAM.registerPass([&] { return BranchProbabilityAnalysis(); });
FAM.registerPass([&] { return LoopAnalysis(); });
FAM.registerPass([&] { return AssumptionAnalysis(); });
FAM.registerPass([&] { return DominatorTreeAnalysis(); });
FAM.registerPass([&] { return PostDominatorTreeAnalysis(); });
FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
}
Module &parseModule(const char *ModuleString) {
SMDiagnostic Err;
M = parseAssemblyString(ModuleString, Err, Ctx);
EXPECT_TRUE(M);
return *M;
}
FunctionSpecializer getSpecializerFor(Function *F) {
auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
return FAM.getResult<TargetLibraryAnalysis>(F);
};
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
auto GetAC = [this](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
auto GetAnalysis = [this](Function &F) -> AnalysisResultsForFn {
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
return { std::make_unique<PredicateInfo>(F, DT,
FAM.getResult<AssumptionAnalysis>(F)),
&DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F) };
};
Solver = std::make_unique<SCCPSolver>(M->getDataLayout(), GetTLI, Ctx);
Solver->addAnalysis(*F, GetAnalysis(*F));
Solver->markBlockExecutable(&F->front());
for (Argument &Arg : F->args())
Solver->markOverdefined(&Arg);
Solver->solveWhileResolvedUndefsIn(*M);
return FunctionSpecializer(*Solver, *M, &FAM, GetBFI, GetTLI, GetTTI,
GetAC);
}
Cost getInstCost(Instruction &I) {
auto &TTI = FAM.getResult<TargetIRAnalysis>(*I.getFunction());
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*I.getFunction());
uint64_t Weight = FunctionSpecializer::getBlockFreqMultiplier() *
BFI.getBlockFreq(I.getParent()).getFrequency() /
BFI.getEntryFreq();
return Weight *
TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
}
};
} // namespace llvm
using namespace llvm;
TEST_F(FunctionSpecializationTest, SwitchInst) {
const char *ModuleString = R"(
define void @foo(i32 %a, i32 %b, i32 %i) {
entry:
switch i32 %i, label %default
[ i32 1, label %case1
i32 2, label %case2 ]
case1:
%0 = mul i32 %a, 2
%1 = sub i32 6, 5
br label %bb1
case2:
%2 = and i32 %b, 3
%3 = sdiv i32 8, 2
br label %bb2
bb1:
%4 = add i32 %0, %b
br label %default
bb2:
%5 = or i32 %2, %a
br label %default
default:
ret void
}
)";
Module &M = parseModule(ModuleString);
Function *F = M.getFunction("foo");
FunctionSpecializer Specializer = getSpecializerFor(F);
InstCostVisitor Visitor = Specializer.getInstCostVisitorFor(F);
Constant *One = ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1);
auto FuncIter = F->begin();
BasicBlock &Case1 = *++FuncIter;
BasicBlock &Case2 = *++FuncIter;
BasicBlock &BB1 = *++FuncIter;
BasicBlock &BB2 = *++FuncIter;
Instruction &Mul = Case1.front();
Instruction &And = Case2.front();
Instruction &Sdiv = *++Case2.begin();
Instruction &BrBB2 = Case2.back();
Instruction &Add = BB1.front();
Instruction &Or = BB2.front();
Instruction &BrDefault = BB2.back();
// mul
Cost Ref = getInstCost(Mul);
Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
EXPECT_EQ(Bonus, Ref);
// and + or + add
Ref = getInstCost(And) + getInstCost(Or) + getInstCost(Add);
Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
EXPECT_EQ(Bonus, Ref);
// sdiv + br + br
Ref = getInstCost(Sdiv) + getInstCost(BrBB2) + getInstCost(BrDefault);
Bonus = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
EXPECT_EQ(Bonus, Ref);
}
TEST_F(FunctionSpecializationTest, BranchInst) {
const char *ModuleString = R"(
define void @foo(i32 %a, i32 %b, i1 %cond) {
entry:
br i1 %cond, label %bb0, label %bb2
bb0:
%0 = mul i32 %a, 2
%1 = sub i32 6, 5
br label %bb1
bb1:
%2 = add i32 %0, %b
%3 = sdiv i32 8, 2
br label %bb2
bb2:
ret void
}
)";
Module &M = parseModule(ModuleString);
Function *F = M.getFunction("foo");
FunctionSpecializer Specializer = getSpecializerFor(F);
InstCostVisitor Visitor = Specializer.getInstCostVisitorFor(F);
Constant *One = ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1);
Constant *False = ConstantInt::getFalse(M.getContext());
auto FuncIter = F->begin();
BasicBlock &BB0 = *++FuncIter;
BasicBlock &BB1 = *++FuncIter;
Instruction &Mul = BB0.front();
Instruction &Sub = *++BB0.begin();
Instruction &BrBB1 = BB0.back();
Instruction &Add = BB1.front();
Instruction &Sdiv = *++BB1.begin();
Instruction &BrBB2 = BB1.back();
// mul
Cost Ref = getInstCost(Mul);
Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
EXPECT_EQ(Bonus, Ref);
// add
Ref = getInstCost(Add);
Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
EXPECT_EQ(Bonus, Ref);
// sub + br + sdiv + br
Ref = getInstCost(Sub) + getInstCost(BrBB1) + getInstCost(Sdiv) +
getInstCost(BrBB2);
Bonus = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor);
EXPECT_EQ(Bonus, Ref);
}
TEST_F(FunctionSpecializationTest, Misc) {
const char *ModuleString = R"(
@g = constant [2 x i32] zeroinitializer, align 4
define i32 @foo(i8 %a, i1 %cond, ptr %b) {
%cmp = icmp eq i8 %a, 10
%ext = zext i1 %cmp to i32
%sel = select i1 %cond, i32 %ext, i32 1
%gep = getelementptr i32, ptr %b, i32 %sel
%ld = load i32, ptr %gep
ret i32 %ld
}
)";
Module &M = parseModule(ModuleString);
Function *F = M.getFunction("foo");
FunctionSpecializer Specializer = getSpecializerFor(F);
InstCostVisitor Visitor = Specializer.getInstCostVisitorFor(F);
GlobalVariable *GV = M.getGlobalVariable("g");
Constant *One = ConstantInt::get(IntegerType::getInt8Ty(M.getContext()), 1);
Constant *True = ConstantInt::getTrue(M.getContext());
auto BlockIter = F->front().begin();
Instruction &Icmp = *BlockIter++;
Instruction &Zext = *BlockIter++;
Instruction &Select = *BlockIter++;
Instruction &Gep = *BlockIter++;
Instruction &Load = *BlockIter++;
// icmp + zext
Cost Ref = getInstCost(Icmp) + getInstCost(Zext);
Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
EXPECT_EQ(Bonus, Ref);
// select
Ref = getInstCost(Select);
Bonus = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor);
EXPECT_EQ(Bonus, Ref);
// gep + load
Ref = getInstCost(Gep) + getInstCost(Load);
Bonus = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor);
EXPECT_EQ(Bonus, Ref);
}