mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 03:56:16 +08:00
[LegacyDivergenceAnalysis] Add NewPM support
Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D142161
This commit is contained in:
committed by
Anshil Gandhi
parent
778a582e8e
commit
c52f9485b0
@@ -16,6 +16,9 @@
|
||||
#define LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
|
||||
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/PostDominators.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include <memory>
|
||||
|
||||
@@ -28,19 +31,8 @@ class TargetTransformInfo;
|
||||
class Use;
|
||||
class Value;
|
||||
|
||||
class LegacyDivergenceAnalysis : public FunctionPass {
|
||||
class LegacyDivergenceAnalysisImpl {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
LegacyDivergenceAnalysis();
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
|
||||
bool runOnFunction(Function &F) override;
|
||||
|
||||
// Print all divergent branches in the function.
|
||||
void print(raw_ostream &OS, const Module *) const override;
|
||||
|
||||
// Returns true if V is divergent at its definition.
|
||||
bool isDivergent(const Value *V) const;
|
||||
|
||||
@@ -57,11 +49,18 @@ public:
|
||||
// Keep the analysis results uptodate by removing an erased value.
|
||||
void removeValue(const Value *V) { DivergentValues.erase(V); }
|
||||
|
||||
private:
|
||||
// Print all divergent branches in the function.
|
||||
void print(raw_ostream &OS, const Module *) const;
|
||||
|
||||
// Whether analysis should be performed by GPUDivergenceAnalysis.
|
||||
bool shouldUseGPUDivergenceAnalysis(const Function &F,
|
||||
const TargetTransformInfo &TTI) const;
|
||||
const TargetTransformInfo &TTI,
|
||||
const LoopInfo &LI);
|
||||
|
||||
void run(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
|
||||
PostDominatorTree &PDT, const LoopInfo &LI);
|
||||
|
||||
protected:
|
||||
// (optional) handle to new DivergenceAnalysis
|
||||
std::unique_ptr<DivergenceInfo> gpuDA;
|
||||
|
||||
@@ -71,6 +70,34 @@ private:
|
||||
// Stores divergent uses of possibly uniform values.
|
||||
DenseSet<const Use *> DivergentUses;
|
||||
};
|
||||
} // End llvm namespace
|
||||
|
||||
class LegacyDivergenceAnalysis : public FunctionPass,
|
||||
public LegacyDivergenceAnalysisImpl {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
LegacyDivergenceAnalysis();
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
bool runOnFunction(Function &F) override;
|
||||
};
|
||||
|
||||
class LegacyDivergenceAnalysisPass
|
||||
: public PassInfoMixin<LegacyDivergenceAnalysisPass>,
|
||||
public LegacyDivergenceAnalysisImpl {
|
||||
public:
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||
|
||||
private:
|
||||
// (optional) handle to new DivergenceAnalysis
|
||||
std::unique_ptr<DivergenceInfo> gpuDA;
|
||||
|
||||
// Stores all divergent values.
|
||||
DenseSet<const Value *> DivergentValues;
|
||||
|
||||
// Stores divergent uses of possibly uniform values.
|
||||
DenseSet<const Use *> DivergentUses;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
|
||||
|
||||
@@ -299,47 +299,25 @@ FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
|
||||
return new LegacyDivergenceAnalysis();
|
||||
}
|
||||
|
||||
void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
|
||||
AU.addRequiredTransitive<PostDominatorTreeWrapperPass>();
|
||||
AU.addRequiredTransitive<LoopInfoWrapperPass>();
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
|
||||
const Function &F, const TargetTransformInfo &TTI) const {
|
||||
bool LegacyDivergenceAnalysisImpl::shouldUseGPUDivergenceAnalysis(
|
||||
const Function &F, const TargetTransformInfo &TTI, const LoopInfo &LI) {
|
||||
if (!(UseGPUDA || TTI.useGPUDivergenceAnalysis()))
|
||||
return false;
|
||||
|
||||
// GPUDivergenceAnalysis requires a reducible CFG.
|
||||
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
using RPOTraversal = ReversePostOrderTraversal<const Function *>;
|
||||
RPOTraversal FuncRPOT(&F);
|
||||
return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
|
||||
const LoopInfo>(FuncRPOT, LI);
|
||||
}
|
||||
|
||||
bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
|
||||
auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
|
||||
if (TTIWP == nullptr)
|
||||
return false;
|
||||
|
||||
TargetTransformInfo &TTI = TTIWP->getTTI(F);
|
||||
// Fast path: if the target does not have branch divergence, we do not mark
|
||||
// any branch as divergent.
|
||||
if (!TTI.hasBranchDivergence())
|
||||
return false;
|
||||
|
||||
DivergentValues.clear();
|
||||
DivergentUses.clear();
|
||||
gpuDA = nullptr;
|
||||
|
||||
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
|
||||
|
||||
if (shouldUseGPUDivergenceAnalysis(F, TTI)) {
|
||||
void LegacyDivergenceAnalysisImpl::run(Function &F,
|
||||
llvm::TargetTransformInfo &TTI,
|
||||
llvm::DominatorTree &DT,
|
||||
llvm::PostDominatorTree &PDT,
|
||||
const llvm::LoopInfo &LI) {
|
||||
if (shouldUseGPUDivergenceAnalysis(F, TTI, LI)) {
|
||||
// run the new GPU divergence analysis
|
||||
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
|
||||
/* KnownReducible = */ true);
|
||||
|
||||
@@ -349,29 +327,24 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
|
||||
DP.populateWithSourcesOfDivergence();
|
||||
DP.propagate();
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
|
||||
<< ":\n";
|
||||
print(dbgs(), F.getParent()));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
|
||||
bool LegacyDivergenceAnalysisImpl::isDivergent(const Value *V) const {
|
||||
if (gpuDA) {
|
||||
return gpuDA->isDivergent(*V);
|
||||
}
|
||||
return DivergentValues.count(V);
|
||||
}
|
||||
|
||||
bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const {
|
||||
bool LegacyDivergenceAnalysisImpl::isDivergentUse(const Use *U) const {
|
||||
if (gpuDA) {
|
||||
return gpuDA->isDivergentUse(*U);
|
||||
}
|
||||
return DivergentValues.count(U->get()) || DivergentUses.count(U);
|
||||
}
|
||||
|
||||
void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
|
||||
void LegacyDivergenceAnalysisImpl::print(raw_ostream &OS,
|
||||
const Module *) const {
|
||||
if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
|
||||
return;
|
||||
|
||||
@@ -407,3 +380,56 @@ void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
|
||||
}
|
||||
OS << "\n";
|
||||
}
|
||||
|
||||
void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
|
||||
AU.addRequiredTransitive<PostDominatorTreeWrapperPass>();
|
||||
AU.addRequiredTransitive<LoopInfoWrapperPass>();
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
|
||||
auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
|
||||
if (TTIWP == nullptr)
|
||||
return false;
|
||||
|
||||
TargetTransformInfo &TTI = TTIWP->getTTI(F);
|
||||
// Fast path: if the target does not have branch divergence, we do not mark
|
||||
// any branch as divergent.
|
||||
if (!TTI.hasBranchDivergence())
|
||||
return false;
|
||||
|
||||
DivergentValues.clear();
|
||||
DivergentUses.clear();
|
||||
gpuDA = nullptr;
|
||||
|
||||
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
|
||||
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
|
||||
LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
|
||||
<< ":\n";
|
||||
LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PreservedAnalyses
|
||||
LegacyDivergenceAnalysisPass::run(Function &F, FunctionAnalysisManager &AM) {
|
||||
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
|
||||
if (!TTI.hasBranchDivergence())
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
DivergentValues.clear();
|
||||
DivergentUses.clear();
|
||||
gpuDA = nullptr;
|
||||
|
||||
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
|
||||
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
|
||||
auto &LI = AM.getResult<LoopAnalysis>(F);
|
||||
LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
|
||||
LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
|
||||
<< ":\n";
|
||||
LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
|
||||
return PreservedAnalyses::all();
|
||||
}
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "llvm/Analysis/InstCount.h"
|
||||
#include "llvm/Analysis/LazyCallGraph.h"
|
||||
#include "llvm/Analysis/LazyValueInfo.h"
|
||||
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
|
||||
#include "llvm/Analysis/Lint.h"
|
||||
#include "llvm/Analysis/LoopAccessAnalysis.h"
|
||||
#include "llvm/Analysis/LoopCacheAnalysis.h"
|
||||
|
||||
@@ -308,6 +308,7 @@ FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
|
||||
FUNCTION_PASS("lint", LintPass())
|
||||
FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings())
|
||||
FUNCTION_PASS("instnamer", InstructionNamerPass())
|
||||
FUNCTION_PASS("legacy-divergence-analysis", LegacyDivergenceAnalysisPass())
|
||||
FUNCTION_PASS("loweratomic", LowerAtomicPass())
|
||||
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
|
||||
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
|
||||
|
||||
@@ -1,8 +1,19 @@
|
||||
; RUN: opt -mtriple amdgcn-amdhsa -mcpu=gfx90a -passes=legacy-divergence-analysis < %s -S 2>&1 | FileCheck -check-prefix=OPT %s
|
||||
; RUN: llc -mtriple amdgcn-amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
declare i32 @llvm.amdgcn.readfirstlane(i32)
|
||||
|
||||
; OPT-LABEL: define amdgpu_kernel void @readfirstlane_uniform(
|
||||
; OPT-NEXT: %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
; OPT-NEXT: %scalar = tail call i32 @llvm.amdgcn.readfirstlane(i32 %tid)
|
||||
; OPT-NEXT: %idx = zext i32 %scalar to i64
|
||||
; OPT-NEXT: %gep0 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %idx
|
||||
; OPT-NEXT: %val = load float, ptr addrspace(1) %gep0, align 4
|
||||
; OPT-NEXT: %gep1 = getelementptr inbounds float, ptr addrspace(1) %1, i64 10
|
||||
; OPT-NEXT: store float %val, ptr addrspace(1) %gep1, align 4
|
||||
; OPT-NEXT: ret void
|
||||
;
|
||||
; GCN-LABEL: readfirstlane_uniform
|
||||
; GCN: s_load_dwordx4 s[[[IN_ADDR:[0-9]+]]:3], s[4:5], 0x0
|
||||
; GCN: v_readfirstlane_b32 s[[SCALAR:[0-9]+]], v0
|
||||
|
||||
Reference in New Issue
Block a user