mirror of
https://github.com/intel/llvm.git
synced 2026-01-19 17:45:07 +08:00
[ScopHelper] Provide support for recognising collective invariant loads
Summary: This patch aims to provide support for detecting load patterns which are collectively invariant but right now `isHoistableLoad()` is checking each load instruction individually which cannot detect the load pattern as a whole. Patch by: Sahil Girish Yerawar Reviewers: bollu, philip.pfaffe, Meinersbur Reviewed By: philip.pfaffe, Meinersbur Differential Revision: https://reviews.llvm.org/D48026 llvm-svn: 335949
This commit is contained in:
@@ -391,10 +391,12 @@ llvm::Value *getConditionFromTerminator(llvm::TerminatorInst *TI);
|
||||
/// @param LI The loop info.
|
||||
/// @param SE The scalar evolution analysis.
|
||||
/// @param DT The dominator tree of the function.
|
||||
/// @param KnownInvariantLoads The invariant load set.
|
||||
///
|
||||
/// @return True if @p LInst can be hoisted in @p R.
|
||||
bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI,
|
||||
llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT);
|
||||
llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT,
|
||||
const InvariantLoadsSetTy &KnownInvariantLoads);
|
||||
|
||||
/// Return true iff @p V is an intrinsic that we ignore during code
|
||||
/// generation.
|
||||
|
||||
@@ -483,8 +483,7 @@ bool ScopDetection::onlyValidRequiredInvariantLoads(
|
||||
// very function (and its children).
|
||||
if (Context.RequiredILS.count(Load))
|
||||
continue;
|
||||
|
||||
if (!isHoistableLoad(Load, CurRegion, LI, SE, DT))
|
||||
if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
|
||||
return false;
|
||||
|
||||
for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
|
||||
@@ -938,7 +937,7 @@ bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
|
||||
auto *V = dyn_cast<Value>(Unknown->getValue());
|
||||
if (auto *Load = dyn_cast<LoadInst>(V)) {
|
||||
if (Context.CurRegion.contains(Load) &&
|
||||
isHoistableLoad(Load, CurRegion, LI, SE, DT))
|
||||
isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
|
||||
Context.RequiredILS.insert(Load);
|
||||
continue;
|
||||
}
|
||||
@@ -1148,18 +1147,38 @@ bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
|
||||
// inside the scop. Hence, we can only create a run-time check if we are
|
||||
// sure the base pointer is not an instruction defined inside the scop.
|
||||
// However, we can ignore loads that will be hoisted.
|
||||
for (const auto &Ptr : AS) {
|
||||
Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
|
||||
if (Inst && Context.CurRegion.contains(Inst)) {
|
||||
auto *Load = dyn_cast<LoadInst>(Inst);
|
||||
if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT)) {
|
||||
Context.RequiredILS.insert(Load);
|
||||
continue;
|
||||
}
|
||||
|
||||
CanBuildRunTimeCheck = false;
|
||||
break;
|
||||
InvariantLoadsSetTy VariantLS, InvariantLS;
|
||||
// In order to detect loads which are dependent on other invariant loads
|
||||
// as invariant, we use fixed-point iteration method here i.e we iterate
|
||||
// over the alias set for arbitrary number of times until it is safe to
|
||||
// assume that all the invariant loads have been detected
|
||||
while (1) {
|
||||
const unsigned int VariantSize = VariantLS.size(),
|
||||
InvariantSize = InvariantLS.size();
|
||||
|
||||
for (const auto &Ptr : AS) {
|
||||
Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
|
||||
if (Inst && Context.CurRegion.contains(Inst)) {
|
||||
auto *Load = dyn_cast<LoadInst>(Inst);
|
||||
if (Load && InvariantLS.count(Load))
|
||||
continue;
|
||||
if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
|
||||
InvariantLS)) {
|
||||
if (VariantLS.count(Load))
|
||||
VariantLS.remove(Load);
|
||||
Context.RequiredILS.insert(Load);
|
||||
InvariantLS.insert(Load);
|
||||
} else {
|
||||
CanBuildRunTimeCheck = false;
|
||||
VariantLS.insert(Load);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (InvariantSize == InvariantLS.size() &&
|
||||
VariantSize == VariantLS.size())
|
||||
break;
|
||||
}
|
||||
|
||||
if (CanBuildRunTimeCheck)
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "polly/Support/SCEVValidator.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/RegionInfo.h"
|
||||
#include "llvm/Analysis/RegionInfoImpl.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
@@ -453,10 +454,40 @@ Value *polly::getConditionFromTerminator(TerminatorInst *TI) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static bool hasVariantIndex(GetElementPtrInst *Gep, Loop *L, Region &R,
|
||||
ScalarEvolution &SE) {
|
||||
for (const Use &Val : llvm::drop_begin(Gep->operands(), 1)) {
|
||||
const SCEV *PtrSCEV = SE.getSCEVAtScope(Val, L);
|
||||
Loop *OuterLoop = R.outermostLoopInRegion(L);
|
||||
if (!SE.isLoopInvariant(PtrSCEV, OuterLoop))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool polly::isHoistableLoad(LoadInst *LInst, Region &R, LoopInfo &LI,
|
||||
ScalarEvolution &SE, const DominatorTree &DT) {
|
||||
ScalarEvolution &SE, const DominatorTree &DT,
|
||||
const InvariantLoadsSetTy &KnownInvariantLoads) {
|
||||
Loop *L = LI.getLoopFor(LInst->getParent());
|
||||
auto *Ptr = LInst->getPointerOperand();
|
||||
|
||||
// A LoadInst is hoistable if the address it is loading from is also
|
||||
// invariant; in this case: another invariant load (whether that address
|
||||
// is also not written to has to be checked separately)
|
||||
// TODO: This only checks for a LoadInst->GetElementPtrInst->LoadInst
|
||||
// pattern generated by the Chapel frontend, but generally this applies
|
||||
// for any chain of instruction that does not also depend on any
|
||||
// induction variable
|
||||
if (auto *GepInst = dyn_cast<GetElementPtrInst>(Ptr)) {
|
||||
if (!hasVariantIndex(GepInst, L, R, SE)) {
|
||||
if (auto *DecidingLoad =
|
||||
dyn_cast<LoadInst>(GepInst->getPointerOperand())) {
|
||||
if (KnownInvariantLoads.count(DecidingLoad))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const SCEV *PtrSCEV = SE.getSCEVAtScope(Ptr, L);
|
||||
while (L && R.contains(L)) {
|
||||
if (!SE.isLoopInvariant(PtrSCEV, L))
|
||||
|
||||
56
polly/test/ScopDetect/collective_invariant_loads.ll
Normal file
56
polly/test/ScopDetect/collective_invariant_loads.ll
Normal file
@@ -0,0 +1,56 @@
|
||||
; RUN: opt %loadPolly -polly-scops -polly-invariant-load-hoisting -analyze < %s | FileCheck %s
|
||||
|
||||
;CHECK: Function: test_init_chpl
|
||||
;CHECK-NEXT: Region: %bb1---%bb16
|
||||
;CHECK-NEXT: Max Loop Depth: 2
|
||||
;CHECK-NEXT: Invariant Accesses: {
|
||||
;CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
;CHECK-NEXT: [tmp5] -> { Stmt_bb2[i0, i1] -> MemRef_arg[1] };
|
||||
;CHECK-NEXT: Execution Context: [tmp5] -> { : }
|
||||
;CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
;CHECK-NEXT: [tmp5] -> { Stmt_bb2[i0, i1] -> MemRef_tmp3[9] };
|
||||
;CHECK-NEXT: Execution Context: [tmp5] -> { : }
|
||||
;CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
;CHECK-NEXT: [tmp5] -> { Stmt_bb2[i0, i1] -> MemRef_tmp3[2] };
|
||||
;CHECK-NEXT: Execution Context: [tmp5] -> { : }
|
||||
;CHECK-NEXT: }
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
%array_ty = type { i64, %array_ptr*, i8 }
|
||||
%array_ptr = type { [2 x i64], [2 x i64], [2 x i64], i64, i64, double*, double*, i8 }
|
||||
|
||||
; Function Attrs: noinline
|
||||
define weak dso_local void @test_init_chpl(%array_ty* nonnull %arg) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb14, %bb
|
||||
%.0 = phi i64 [ 0, %bb ], [ %tmp15, %bb14 ]
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb1
|
||||
%.01 = phi i64 [ 0, %bb1 ], [ %tmp13, %bb2 ]
|
||||
%tmp = getelementptr inbounds %array_ty, %array_ty* %arg, i64 0, i32 1
|
||||
%tmp3 = load %array_ptr*, %array_ptr** %tmp, align 8
|
||||
%tmp4 = getelementptr inbounds %array_ptr, %array_ptr* %tmp3, i64 0, i32 1, i64 0
|
||||
%tmp5 = load i64, i64* %tmp4, align 8
|
||||
%tmp6 = mul nsw i64 %tmp5, %.0
|
||||
%tmp7 = add nsw i64 %tmp6, %.01
|
||||
%tmp8 = getelementptr inbounds %array_ptr, %array_ptr* %tmp3, i64 0, i32 6
|
||||
%tmp9 = load double*, double** %tmp8, align 8
|
||||
%tmp10 = getelementptr inbounds double, double* %tmp9, i64 %tmp7
|
||||
store double 13.0, double* %tmp10, align 8
|
||||
%tmp13 = add nuw nsw i64 %.01, 1
|
||||
%exitcond = icmp ne i64 %tmp13, 1000
|
||||
br i1 %exitcond, label %bb2, label %bb14
|
||||
|
||||
bb14: ; preds = %bb2
|
||||
%tmp15 = add nuw nsw i64 %.0, 1
|
||||
%exitcond8 = icmp ne i64 %tmp15, 1000
|
||||
br i1 %exitcond8, label %bb1, label %bb16
|
||||
|
||||
bb16: ; preds = %bb14
|
||||
ret void
|
||||
}
|
||||
Reference in New Issue
Block a user