From bf9473b2d8dfeaebcfa25370d16a0f753afa57f8 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <doerfert@cs.uni-saarland.de>
Date: Tue, 10 May 2016 14:42:30 +0000
Subject: [PATCH] Weaken profitability constraints during ScopDetection

  Regions with one affine loop can be profitable if the loop is
  distributable. To this end we will allow them to be treated as
  profitable if they contain at least two non-trivial basic blocks.

llvm-svn: 269064
---
 polly/include/polly/ScopDetection.h           | 11 +++++++++
 polly/lib/Analysis/ScopDetection.cpp          | 24 +++++++++++++++++++
 polly/test/ScopDetect/only-one-affine-loop.ll |  9 +++----
 .../NonAffine/non_affine_loop_used_later.ll   |  4 +++-
 4 files changed, 41 insertions(+), 7 deletions(-)
diff --git a/polly/include/polly/ScopDetection.h b/polly/include/polly/ScopDetection.h
index c029df40fa66..0431af0150de 100644
--- a/polly/include/polly/ScopDetection.h
+++ b/polly/include/polly/ScopDetection.h
@@ -318,6 +318,17 @@ private:
   bool hasSufficientCompute(DetectionContext &Context,
                             int NumAffineLoops) const;
 
+  /// @brief Check if the unique affine loop might be amendable to distribution.
+  ///
+  /// This function checks if the number of non-trivial blocks in the unique
+  /// affine loop in Context.CurRegion is at least two, thus if the loop might
+  /// be amendable to distribution.
+  ///
+  /// @param Context  The context of scop detection.
+  ///
+  /// @return True only if the affine loop might be amendable to distributable.
+  bool hasPossiblyDistributableLoop(DetectionContext &Context) const;
+
   /// @brief Check if a region is profitable to optimize.
   ///
   /// Regions that are unlikely to expose interesting optimization opportunities
diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
index 4ee92fcfa93c..34f439bae0bb 100644
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -1269,6 +1269,26 @@ bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
   return InstCount >= ProfitabilityMinPerLoopInstructions;
 }
 
+bool ScopDetection::hasPossiblyDistributableLoop(
+    DetectionContext &Context) const {
+  for (auto *BB : Context.CurRegion.blocks()) {
+    auto *L = LI->getLoopFor(BB);
+    if (!Context.CurRegion.contains(L))
+      continue;
+    if (Context.BoxedLoopsSet.count(L))
+      continue;
+    unsigned StmtsWithStoresInLoops = 0;
+    for (auto *LBB : L->blocks()) {
+      bool MemStore = false;
+      for (auto &I : *LBB)
+        MemStore |= isa<StoreInst>(&I);
+      StmtsWithStoresInLoops += MemStore;
+    }
+    return (StmtsWithStoresInLoops > 1);
+  }
+  return false;
+}
+
 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
   Region &CurRegion = Context.CurRegion;
 
@@ -1288,6 +1308,10 @@ bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
   if (NumAffineLoops >= 2)
     return true;
 
+  // A loop with multiple non-trivial blocks migt be amendable to distribution.
+  if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
+    return true;
+
   // Scops that contain a loop with a non-trivial amount of computation per
   // loop-iteration are interesting as we may be able to parallelize such
   // loops. Individual loops that have only a small amount of computation
diff --git a/polly/test/ScopDetect/only-one-affine-loop.ll b/polly/test/ScopDetect/only-one-affine-loop.ll
index a57255ec002b..0cfad0dbc5dc 100644
--- a/polly/test/ScopDetect/only-one-affine-loop.ll
+++ b/polly/test/ScopDetect/only-one-affine-loop.ll
@@ -1,14 +1,11 @@
 ; RUN: opt %loadPolly -polly-detect -polly-process-unprofitable=false -analyze \
 ; RUN:     -polly-allow-nonaffine-loops < %s | FileCheck %s
 ;
-; RUN: opt %loadPolly -polly-detect -analyze \
-; RUN:     -polly-allow-nonaffine-loops < %s | FileCheck %s --check-prefix=UNPROFIT
-;
 ; Even if we allow non-affine loops we can only model the outermost loop, all
-; other loops are boxed in non-affine regions
+; other loops are boxed in non-affine regions. However, the inner loops can be
+; distributed as black-boxes, thus we will recognize the outer loop as profitable.
 ;
-; CHECK-NOT: Valid
-; UNPROFIT:  Valid Region for Scop: for.cond => for.end.51
+; CHECK:  Valid Region for Scop: for.cond => for.end.51
 ;
 ;    void f(int *A) {
 ;      for (int i = 0; i < 100; i++) {
diff --git a/polly/test/ScopInfo/NonAffine/non_affine_loop_used_later.ll b/polly/test/ScopInfo/NonAffine/non_affine_loop_used_later.ll
index f1f521e747fb..78bea729b3f6 100644
--- a/polly/test/ScopInfo/NonAffine/non_affine_loop_used_later.ll
+++ b/polly/test/ScopInfo/NonAffine/non_affine_loop_used_later.ll
@@ -86,7 +86,9 @@
 ; CHECK-NEXT:             [N] -> { Stmt_bb23[i0] -> MemRef_j_0__phi[] };
 ; CHECK-NEXT: }
 ;
-; PROFIT-NOT: Statements
+; As we might be able to distribute the outer loop we consider the region profitable for now.
+;
+; PROFIT: Statements
 ;
 ;    void f(int *A, int N, int M) {
 ;      int i = 0, j = 0;