diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index 2d8b393cc039..062cdfbcfe3b 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -235,15 +235,6 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI, NodeBuilder.allocateNewArrays(StartExitBlocks); Annotator.buildAliasScopes(S); - // The code below annotates the "llvm.loop.vectorize.enable" to false - // for the code flow taken when RTCs fail. Because we don't want the - // Loop Vectorizer to come in later and vectorize the original fall back - // loop when Polly is enabled. - for (Loop *L : LI.getLoopsInPreorder()) { - if (S.contains(L)) - addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0); - } - if (PerfMonitoring) { PerfMonitor P(S, EnteringBB->getParent()->getParent()); P.initialize(); @@ -285,6 +276,21 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI, Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); + auto *CI = dyn_cast(RTC); + // The code below annotates the "llvm.loop.vectorize.enable" to false + // for the code flow taken when RTCs fail. Because we don't want the + // Loop Vectorizer to come in later and vectorize the original fall back + // loop when Polly is enabled. This avoids loop versioning on fallback + // loop by Loop Vectorizer. Don't do this when Polly's RTC value is + // false (due to code generation failure), as we are left with only one + // version of Loop. + if (!(CI && CI->isZero())) { + for (Loop *L : LI.getLoopsInPreorder()) { + if (S.contains(L)) + addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0); + } + } + // Explicitly set the insert point to the end of the block to avoid that a // split at the builder's current // insert position would move the malloc calls to the wrong BasicBlock. diff --git a/polly/test/CodeGen/Metadata/fallback_vec_annotate.ll b/polly/test/CodeGen/Metadata/fallback_vec_annotate.ll new file mode 100644 index 000000000000..317d30649ab1 --- /dev/null +++ b/polly/test/CodeGen/Metadata/fallback_vec_annotate.ll @@ -0,0 +1,28 @@ +; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s +; RUN: opt %loadNPMPolly -S -passes=polly-codegen < %s | FileCheck %s + +; Verify vectorization is not disabled when RTC of Polly is false + +; CHECK: attributes {{.*}} = { "polly-optimized" } +; CHECK-NOT: {{.*}} = !{!"llvm.loop.vectorize.enable", i32 0} + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64-unknown-linux-android10000" + +define void @ham(i64 %arg) { +bb: + br label %bb1 + +bb1: ; preds = %bb3, %bb + %phi = phi ptr [ %getelementptr4, %bb3 ], [ null, %bb ] + br label %bb2 + +bb2: ; preds = %bb2, %bb1 + %getelementptr = getelementptr i8, ptr %phi, i64 1 + store i8 0, ptr %getelementptr, align 1 + br i1 false, label %bb2, label %bb3 + +bb3: ; preds = %bb2 + %getelementptr4 = getelementptr i8, ptr %phi, i64 %arg + br label %bb1 +}