mirror of
https://github.com/intel/llvm.git
synced 2026-01-25 10:55:58 +08:00
Split IndirectBr critical edges before PGO gen/use passes.
Summary: The PGO gen/use passes currently fail with an assert failure if there's a critical edge whose source is an IndirectBr instruction and that edge needs to be instrumented. To avoid this in certain cases, split IndirectBr critical edges in the PGO gen/use passes. This works for blocks with single indirectbr predecessors, but not for those with multiple indirectbr predecessors (splitting an IndirectBr critical edge isn't always possible.) Reviewers: davidxl, xur Reviewed By: davidxl Subscribers: efriedma, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D40699 llvm-svn: 320511
This commit is contained in:
@@ -25,6 +25,8 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class BlockFrequencyInfo;
|
||||
class BranchProbabilityInfo;
|
||||
class DominatorTree;
|
||||
class Function;
|
||||
class Instruction;
|
||||
@@ -301,7 +303,10 @@ Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
|
||||
// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
|
||||
// create the following structure:
|
||||
// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
|
||||
bool SplitIndirectBrCriticalEdges(Function &F);
|
||||
// If BPI and BFI aren't non-null, BPI/BFI will be updated accordingly.
|
||||
bool SplitIndirectBrCriticalEdges(Function &F,
|
||||
BranchProbabilityInfo *BPI = nullptr,
|
||||
BlockFrequencyInfo *BFI = nullptr);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
|
||||
@@ -716,6 +716,9 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
|
||||
static void instrumentOneFunc(
|
||||
Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
|
||||
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
|
||||
// Split indirectbr critical edges here before computing the MST rather than
|
||||
// later in getInstrBB() to avoid invalidating it.
|
||||
SplitIndirectBrCriticalEdges(F, BPI, BFI);
|
||||
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
|
||||
BFI);
|
||||
unsigned NumCounters = FuncInfo.getNumCounters();
|
||||
@@ -1463,6 +1466,9 @@ static bool annotateAllFunctions(
|
||||
continue;
|
||||
auto *BPI = LookupBPI(F);
|
||||
auto *BFI = LookupBFI(F);
|
||||
// Split indirectbr critical edges here before computing the MST rather than
|
||||
// later in getInstrBB() to avoid invalidating it.
|
||||
SplitIndirectBrCriticalEdges(F, BPI, BFI);
|
||||
PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
|
||||
if (!Func.readCounters(PGOReader.get()))
|
||||
continue;
|
||||
|
||||
@@ -20,6 +20,8 @@
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
@@ -331,7 +333,9 @@ findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
|
||||
return IBB;
|
||||
}
|
||||
|
||||
bool llvm::SplitIndirectBrCriticalEdges(Function &F) {
|
||||
bool llvm::SplitIndirectBrCriticalEdges(Function &F,
|
||||
BranchProbabilityInfo *BPI,
|
||||
BlockFrequencyInfo *BFI) {
|
||||
// Check whether the function has any indirectbrs, and collect which blocks
|
||||
// they may jump to. Since most functions don't have indirect branches,
|
||||
// this lowers the common case's overhead to O(Blocks) instead of O(Edges).
|
||||
@@ -348,6 +352,7 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F) {
|
||||
if (Targets.empty())
|
||||
return false;
|
||||
|
||||
bool ShouldUpdateAnalysis = BPI && BFI;
|
||||
bool Changed = false;
|
||||
for (BasicBlock *Target : Targets) {
|
||||
SmallVector<BasicBlock *, 16> OtherPreds;
|
||||
@@ -363,6 +368,14 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F) {
|
||||
continue;
|
||||
|
||||
BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
|
||||
if (ShouldUpdateAnalysis) {
|
||||
// Copy the BFI/BPI from Target to BodyBlock.
|
||||
for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors();
|
||||
I < E; ++I)
|
||||
BPI->setEdgeProbability(BodyBlock, I,
|
||||
BPI->getEdgeProbability(Target, I));
|
||||
BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
|
||||
}
|
||||
// It's possible Target was its own successor through an indirectbr.
|
||||
// In this case, the indirectbr now comes from BodyBlock.
|
||||
if (IBRPred == Target)
|
||||
@@ -374,13 +387,22 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F) {
|
||||
ValueToValueMapTy VMap;
|
||||
BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
|
||||
|
||||
BlockFrequency BlockFreqForDirectSucc;
|
||||
for (BasicBlock *Pred : OtherPreds) {
|
||||
// If the target is a loop to itself, then the terminator of the split
|
||||
// block needs to be updated.
|
||||
if (Pred == Target)
|
||||
BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
|
||||
else
|
||||
Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
|
||||
// block (BodyBlock) needs to be updated.
|
||||
BasicBlock *Src = Pred != Target ? Pred : BodyBlock;
|
||||
Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
|
||||
if (ShouldUpdateAnalysis)
|
||||
BlockFreqForDirectSucc += BFI->getBlockFreq(Src) *
|
||||
BPI->getEdgeProbability(Src, DirectSucc);
|
||||
}
|
||||
if (ShouldUpdateAnalysis) {
|
||||
BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency());
|
||||
BlockFrequency NewBlockFreqForTarget =
|
||||
BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
|
||||
BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
|
||||
BPI->eraseBlock(Target);
|
||||
}
|
||||
|
||||
// Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
:ir
|
||||
foo
|
||||
# Func Hash:
|
||||
40197883220
|
||||
47485104005
|
||||
# Num Counters:
|
||||
4
|
||||
# Counter Values:
|
||||
202
|
||||
88
|
||||
139
|
||||
20
|
||||
5
|
||||
|
||||
63
|
||||
|
||||
@@ -37,12 +37,14 @@ return:
|
||||
; BRANCHPROB: Printing analysis 'Branch Probability Analysis' for function 'foo':
|
||||
; BRANCHPROB:---- Branch Probabilities ----
|
||||
; BRANCHPROB: edge entry -> if.then probability is 0x37c32b17 / 0x80000000 = 43.56%
|
||||
; BRANCHPROB: edge entry -> return probability is 0x483cd4e9 / 0x80000000 = 56.44%
|
||||
; BRANCHPROB: edge entry -> return.clone probability is 0x483cd4e9 / 0x80000000 = 56.44%
|
||||
; BRANCHPROB: edge if.then -> return probability is 0x5ba2e8ba / 0x80000000 = 71.59%
|
||||
; BRANCHPROB: edge if.then -> label2 probability is 0x1d1745d1 / 0x80000000 = 22.73%
|
||||
; BRANCHPROB: edge if.then -> label3 probability is 0x0745d174 / 0x80000000 = 5.68%
|
||||
; BRANCHPROB: edge label2 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
|
||||
; BRANCHPROB: edge label3 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
|
||||
; BRANCHPROB: edge label2 -> return.clone probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
|
||||
; BRANCHPROB: edge label3 -> return.clone probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
|
||||
; BRANCHPROB: edge return -> .split probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
|
||||
; BRANCHPROB: edge return.clone -> .split probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone uwtable
|
||||
define i32 @bar(i32 %v) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%mul = shl nsw i32 %v, 1
|
||||
ret i32 %mul
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readonly uwtable
|
||||
define i32 @foo(i8* nocapture readonly %p) #1 {
|
||||
entry:
|
||||
%targets = alloca [256 x i8*], align 16
|
||||
%arrayidx1 = getelementptr inbounds [256 x i8*], [256 x i8*]* %targets, i64 0, i64 93
|
||||
store i8* blockaddress(@foo, %if.end), i8** %arrayidx1, align 8
|
||||
br label %for.cond2
|
||||
|
||||
for.cond2: ; preds = %if.end, %for.cond2, %entry
|
||||
; CHECK: for.cond2: ; preds = %.split1
|
||||
%p.addr.0 = phi i8* [ %p, %entry ], [ %incdec.ptr5, %if.end ], [ %incdec.ptr, %for.cond2 ]
|
||||
%incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1
|
||||
%0 = load i8, i8* %p.addr.0, align 1
|
||||
%cond = icmp eq i8 %0, 93
|
||||
br i1 %cond, label %if.end.preheader, label %for.cond2
|
||||
|
||||
if.end.preheader: ; preds = %for.cond2
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.end.preheader, %if.end
|
||||
; CHECK: if.end: ; preds = %.split1
|
||||
%p.addr.1 = phi i8* [ %incdec.ptr5, %if.end ], [ %incdec.ptr, %if.end.preheader ]
|
||||
%incdec.ptr5 = getelementptr inbounds i8, i8* %p.addr.1, i64 1
|
||||
%1 = load i8, i8* %p.addr.1, align 1
|
||||
%idxprom6 = zext i8 %1 to i64
|
||||
%arrayidx7 = getelementptr inbounds [256 x i8*], [256 x i8*]* %targets, i64 0, i64 %idxprom6
|
||||
%2 = load i8*, i8** %arrayidx7, align 8
|
||||
indirectbr i8* %2, [label %for.cond2, label %if.end]
|
||||
; CHECK: indirectbr i8* %2, [label %for.cond2, label %if.end]
|
||||
}
|
||||
Reference in New Issue
Block a user