[LegacyDivergenceAnalysis] Add NewPM support

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D142161
This commit is contained in:
Anshil Gandhi
2023-01-20 15:20:39 -07:00
committed by Anshil Gandhi
parent 778a582e8e
commit c52f9485b0
5 changed files with 120 additions and 54 deletions

View File

@@ -16,6 +16,9 @@
#define LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include <memory>
@@ -28,19 +31,8 @@ class TargetTransformInfo;
class Use;
class Value;
class LegacyDivergenceAnalysis : public FunctionPass {
class LegacyDivergenceAnalysisImpl {
public:
static char ID;
LegacyDivergenceAnalysis();
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnFunction(Function &F) override;
// Print all divergent branches in the function.
void print(raw_ostream &OS, const Module *) const override;
// Returns true if V is divergent at its definition.
bool isDivergent(const Value *V) const;
@@ -57,11 +49,18 @@ public:
// Keep the analysis results uptodate by removing an erased value.
void removeValue(const Value *V) { DivergentValues.erase(V); }
private:
// Print all divergent branches in the function.
void print(raw_ostream &OS, const Module *) const;
// Whether analysis should be performed by GPUDivergenceAnalysis.
bool shouldUseGPUDivergenceAnalysis(const Function &F,
const TargetTransformInfo &TTI) const;
const TargetTransformInfo &TTI,
const LoopInfo &LI);
void run(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
PostDominatorTree &PDT, const LoopInfo &LI);
protected:
// (optional) handle to new DivergenceAnalysis
std::unique_ptr<DivergenceInfo> gpuDA;
@@ -71,6 +70,34 @@ private:
// Stores divergent uses of possibly uniform values.
DenseSet<const Use *> DivergentUses;
};
} // End llvm namespace
class LegacyDivergenceAnalysis : public FunctionPass,
public LegacyDivergenceAnalysisImpl {
public:
static char ID;
LegacyDivergenceAnalysis();
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnFunction(Function &F) override;
};
class LegacyDivergenceAnalysisPass
: public PassInfoMixin<LegacyDivergenceAnalysisPass>,
public LegacyDivergenceAnalysisImpl {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
private:
// (optional) handle to new DivergenceAnalysis
std::unique_ptr<DivergenceInfo> gpuDA;
// Stores all divergent values.
DenseSet<const Value *> DivergentValues;
// Stores divergent uses of possibly uniform values.
DenseSet<const Use *> DivergentUses;
};
} // end namespace llvm
#endif // LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H

View File

@@ -299,47 +299,25 @@ FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
return new LegacyDivergenceAnalysis();
}
void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
AU.addRequiredTransitive<PostDominatorTreeWrapperPass>();
AU.addRequiredTransitive<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
const Function &F, const TargetTransformInfo &TTI) const {
bool LegacyDivergenceAnalysisImpl::shouldUseGPUDivergenceAnalysis(
const Function &F, const TargetTransformInfo &TTI, const LoopInfo &LI) {
if (!(UseGPUDA || TTI.useGPUDivergenceAnalysis()))
return false;
// GPUDivergenceAnalysis requires a reducible CFG.
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
using RPOTraversal = ReversePostOrderTraversal<const Function *>;
RPOTraversal FuncRPOT(&F);
return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
const LoopInfo>(FuncRPOT, LI);
}
bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
if (TTIWP == nullptr)
return false;
TargetTransformInfo &TTI = TTIWP->getTTI(F);
// Fast path: if the target does not have branch divergence, we do not mark
// any branch as divergent.
if (!TTI.hasBranchDivergence())
return false;
DivergentValues.clear();
DivergentUses.clear();
gpuDA = nullptr;
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
if (shouldUseGPUDivergenceAnalysis(F, TTI)) {
void LegacyDivergenceAnalysisImpl::run(Function &F,
llvm::TargetTransformInfo &TTI,
llvm::DominatorTree &DT,
llvm::PostDominatorTree &PDT,
const llvm::LoopInfo &LI) {
if (shouldUseGPUDivergenceAnalysis(F, TTI, LI)) {
// run the new GPU divergence analysis
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
/* KnownReducible = */ true);
@@ -349,29 +327,24 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
DP.populateWithSourcesOfDivergence();
DP.propagate();
}
LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
<< ":\n";
print(dbgs(), F.getParent()));
return false;
}
bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
bool LegacyDivergenceAnalysisImpl::isDivergent(const Value *V) const {
if (gpuDA) {
return gpuDA->isDivergent(*V);
}
return DivergentValues.count(V);
}
bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const {
bool LegacyDivergenceAnalysisImpl::isDivergentUse(const Use *U) const {
if (gpuDA) {
return gpuDA->isDivergentUse(*U);
}
return DivergentValues.count(U->get()) || DivergentUses.count(U);
}
void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
void LegacyDivergenceAnalysisImpl::print(raw_ostream &OS,
const Module *) const {
if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
return;
@@ -407,3 +380,56 @@ void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
}
OS << "\n";
}
void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
AU.addRequiredTransitive<PostDominatorTreeWrapperPass>();
AU.addRequiredTransitive<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
if (TTIWP == nullptr)
return false;
TargetTransformInfo &TTI = TTIWP->getTTI(F);
// Fast path: if the target does not have branch divergence, we do not mark
// any branch as divergent.
if (!TTI.hasBranchDivergence())
return false;
DivergentValues.clear();
DivergentUses.clear();
gpuDA = nullptr;
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
<< ":\n";
LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
return false;
}
PreservedAnalyses
LegacyDivergenceAnalysisPass::run(Function &F, FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
if (!TTI.hasBranchDivergence())
return PreservedAnalyses::all();
DivergentValues.clear();
DivergentUses.clear();
gpuDA = nullptr;
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
<< ":\n";
LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
return PreservedAnalyses::all();
}

View File

@@ -46,6 +46,7 @@
#include "llvm/Analysis/InstCount.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopCacheAnalysis.h"

View File

@@ -308,6 +308,7 @@ FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
FUNCTION_PASS("lint", LintPass())
FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings())
FUNCTION_PASS("instnamer", InstructionNamerPass())
FUNCTION_PASS("legacy-divergence-analysis", LegacyDivergenceAnalysisPass())
FUNCTION_PASS("loweratomic", LowerAtomicPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())

View File

@@ -1,8 +1,19 @@
; RUN: opt -mtriple amdgcn-amdhsa -mcpu=gfx90a -passes=legacy-divergence-analysis < %s -S 2>&1 | FileCheck -check-prefix=OPT %s
; RUN: llc -mtriple amdgcn-amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.readfirstlane(i32)
; OPT-LABEL: define amdgpu_kernel void @readfirstlane_uniform(
; OPT-NEXT: %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
; OPT-NEXT: %scalar = tail call i32 @llvm.amdgcn.readfirstlane(i32 %tid)
; OPT-NEXT: %idx = zext i32 %scalar to i64
; OPT-NEXT: %gep0 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %idx
; OPT-NEXT: %val = load float, ptr addrspace(1) %gep0, align 4
; OPT-NEXT: %gep1 = getelementptr inbounds float, ptr addrspace(1) %1, i64 10
; OPT-NEXT: store float %val, ptr addrspace(1) %gep1, align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: readfirstlane_uniform
; GCN: s_load_dwordx4 s[[[IN_ADDR:[0-9]+]]:3], s[4:5], 0x0
; GCN: v_readfirstlane_b32 s[[SCALAR:[0-9]+]], v0