diff --git a/polly/include/polly/ScopDetection.h b/polly/include/polly/ScopDetection.h index c029df40fa66..0431af0150de 100644 --- a/polly/include/polly/ScopDetection.h +++ b/polly/include/polly/ScopDetection.h @@ -318,6 +318,17 @@ private: bool hasSufficientCompute(DetectionContext &Context, int NumAffineLoops) const; + /// @brief Check if the unique affine loop might be amendable to distribution. + /// + /// This function checks if the number of non-trivial blocks in the unique + /// affine loop in Context.CurRegion is at least two, thus if the loop might + /// be amendable to distribution. + /// + /// @param Context The context of scop detection. + /// + /// @return True only if the affine loop might be amendable to distributable. + bool hasPossiblyDistributableLoop(DetectionContext &Context) const; + /// @brief Check if a region is profitable to optimize. /// /// Regions that are unlikely to expose interesting optimization opportunities diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index 4ee92fcfa93c..34f439bae0bb 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -1269,6 +1269,26 @@ bool ScopDetection::hasSufficientCompute(DetectionContext &Context, return InstCount >= ProfitabilityMinPerLoopInstructions; } +bool ScopDetection::hasPossiblyDistributableLoop( + DetectionContext &Context) const { + for (auto *BB : Context.CurRegion.blocks()) { + auto *L = LI->getLoopFor(BB); + if (!Context.CurRegion.contains(L)) + continue; + if (Context.BoxedLoopsSet.count(L)) + continue; + unsigned StmtsWithStoresInLoops = 0; + for (auto *LBB : L->blocks()) { + bool MemStore = false; + for (auto &I : *LBB) + MemStore |= isa(&I); + StmtsWithStoresInLoops += MemStore; + } + return (StmtsWithStoresInLoops > 1); + } + return false; +} + bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; @@ -1288,6 +1308,10 @@ bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { if (NumAffineLoops >= 2) return true; + // A loop with multiple non-trivial blocks migt be amendable to distribution. + if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) + return true; + // Scops that contain a loop with a non-trivial amount of computation per // loop-iteration are interesting as we may be able to parallelize such // loops. Individual loops that have only a small amount of computation diff --git a/polly/test/ScopDetect/only-one-affine-loop.ll b/polly/test/ScopDetect/only-one-affine-loop.ll index a57255ec002b..0cfad0dbc5dc 100644 --- a/polly/test/ScopDetect/only-one-affine-loop.ll +++ b/polly/test/ScopDetect/only-one-affine-loop.ll @@ -1,14 +1,11 @@ ; RUN: opt %loadPolly -polly-detect -polly-process-unprofitable=false -analyze \ ; RUN: -polly-allow-nonaffine-loops < %s | FileCheck %s ; -; RUN: opt %loadPolly -polly-detect -analyze \ -; RUN: -polly-allow-nonaffine-loops < %s | FileCheck %s --check-prefix=UNPROFIT -; ; Even if we allow non-affine loops we can only model the outermost loop, all -; other loops are boxed in non-affine regions +; other loops are boxed in non-affine regions. However, the inner loops can be +; distributed as black-boxes, thus we will recognize the outer loop as profitable. ; -; CHECK-NOT: Valid -; UNPROFIT: Valid Region for Scop: for.cond => for.end.51 +; CHECK: Valid Region for Scop: for.cond => for.end.51 ; ; void f(int *A) { ; for (int i = 0; i < 100; i++) { diff --git a/polly/test/ScopInfo/NonAffine/non_affine_loop_used_later.ll b/polly/test/ScopInfo/NonAffine/non_affine_loop_used_later.ll index f1f521e747fb..78bea729b3f6 100644 --- a/polly/test/ScopInfo/NonAffine/non_affine_loop_used_later.ll +++ b/polly/test/ScopInfo/NonAffine/non_affine_loop_used_later.ll @@ -86,7 +86,9 @@ ; CHECK-NEXT: [N] -> { Stmt_bb23[i0] -> MemRef_j_0__phi[] }; ; CHECK-NEXT: } ; -; PROFIT-NOT: Statements +; As we might be able to distribute the outer loop we consider the region profitable for now. +; +; PROFIT: Statements ; ; void f(int *A, int N, int M) { ; int i = 0, j = 0;