[Polly] Retain vectorization for fallback loop when RTC is unsatisfiable (#165525)

When Polly generates a false runtime condition (RTC), the associated
Polly generated loop is never executed and is eventually eliminated. As
a result, the fallback loop becomes the default execution path.
Disabling vectorization for this fallback loop will be
counterproductive. This patch ensures that vectorization is only
disabled when the RTC is not false (no Codegen failure).
This commit is contained in:
Karthika Devi C
2025-11-10 13:48:15 +05:30
committed by GitHub
parent f6138015ef
commit 6408703de5
2 changed files with 43 additions and 9 deletions

View File

@@ -235,15 +235,6 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI,
NodeBuilder.allocateNewArrays(StartExitBlocks);
Annotator.buildAliasScopes(S);
// The code below annotates the "llvm.loop.vectorize.enable" to false
// for the code flow taken when RTCs fail. Because we don't want the
// Loop Vectorizer to come in later and vectorize the original fall back
// loop when Polly is enabled.
for (Loop *L : LI.getLoopsInPreorder()) {
if (S.contains(L))
addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0);
}
if (PerfMonitoring) {
PerfMonitor P(S, EnteringBB->getParent()->getParent());
P.initialize();
@@ -285,6 +276,21 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI,
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
auto *CI = dyn_cast<ConstantInt>(RTC);
// The code below annotates the "llvm.loop.vectorize.enable" to false
// for the code flow taken when RTCs fail. Because we don't want the
// Loop Vectorizer to come in later and vectorize the original fall back
// loop when Polly is enabled. This avoids loop versioning on fallback
// loop by Loop Vectorizer. Don't do this when Polly's RTC value is
// false (due to code generation failure), as we are left with only one
// version of Loop.
if (!(CI && CI->isZero())) {
for (Loop *L : LI.getLoopsInPreorder()) {
if (S.contains(L))
addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0);
}
}
// Explicitly set the insert point to the end of the block to avoid that a
// split at the builder's current
// insert position would move the malloc calls to the wrong BasicBlock.

View File

@@ -0,0 +1,28 @@
; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s
; RUN: opt %loadNPMPolly -S -passes=polly-codegen < %s | FileCheck %s
; Verify vectorization is not disabled when RTC of Polly is false
; CHECK: attributes {{.*}} = { "polly-optimized" }
; CHECK-NOT: {{.*}} = !{!"llvm.loop.vectorize.enable", i32 0}
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-android10000"
define void @ham(i64 %arg) {
bb:
br label %bb1
bb1: ; preds = %bb3, %bb
%phi = phi ptr [ %getelementptr4, %bb3 ], [ null, %bb ]
br label %bb2
bb2: ; preds = %bb2, %bb1
%getelementptr = getelementptr i8, ptr %phi, i64 1
store i8 0, ptr %getelementptr, align 1
br i1 false, label %bb2, label %bb3
bb3: ; preds = %bb2
%getelementptr4 = getelementptr i8, ptr %phi, i64 %arg
br label %bb1
}