[InstCombine] Fold icmp pred X + K, Y -> icmp pred2 X, Y if both X and Y is divisible by K (#147130)

This patch generalizes `icmp ule X +nuw 1, Y -> icmp ult X, Y`-like
optimizations to handle the case that the added RHS constant is a common
power-of-2 divisor of both X and Y. We can further generalize this
pattern to handle non-power-of-2 divisors as well.
Alive2: https://alive2.llvm.org/ce/z/QgpeM_

Compile-time improvement (Stage2-O3 -0.09%):
https://llvm-compile-time-tracker.com/compare.php?from=0ba59587fa98849ed5107fee4134e810e84b69a3&to=f80e5fe0bb2e63c05401bde7cd42899ea270909b&stat=instructions:u

The original case is from the comparison of expanded GEP offsets:
https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2530/files#r2183005292
This commit is contained in:
Yingwei Zheng
2025-07-05 23:42:53 +08:00
committed by GitHub
parent 0bc6d60b9b
commit c9d9c3e349
2 changed files with 164 additions and 54 deletions

View File

@@ -5120,6 +5120,18 @@ static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
return nullptr;
}
/// Return true if X is a multiple of C.
/// TODO: Handle non-power-of-2 factors.
static bool isMultipleOf(Value *X, const APInt &C, const SimplifyQuery &Q) {
if (C.isOne())
return true;
if (!C.isPowerOf2())
return false;
return MaskedValueIsZero(X, C - 1, Q);
}
/// Try to fold icmp (binop), X or icmp X, (binop).
/// TODO: A large part of this logic is duplicated in InstSimplify's
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -5278,66 +5290,62 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, Y, Z);
}
// icmp slt (A + -1), Op1 -> icmp sle A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
match(B, m_AllOnes()))
return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
if (ICmpInst::isRelational(Pred)) {
// Return if both X and Y is divisible by Z/-Z.
// TODO: Generalize to check if (X - Y) is divisible by Z/-Z.
auto ShareCommonDivisor = [&Q](Value *X, Value *Y, Value *Z,
bool IsNegative) -> bool {
const APInt *OffsetC;
if (!match(Z, m_APInt(OffsetC)))
return false;
// icmp sge (A + -1), Op1 -> icmp sgt A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
match(B, m_AllOnes()))
return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
// Fast path for Z == 1/-1.
if (IsNegative ? OffsetC->isAllOnes() : OffsetC->isOne())
return true;
// icmp sle (A + 1), Op1 -> icmp slt A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
APInt C = *OffsetC;
if (IsNegative)
C.negate();
// Note: -INT_MIN is also negative.
if (!C.isStrictlyPositive())
return false;
// icmp sgt (A + 1), Op1 -> icmp sge A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
return isMultipleOf(X, C, Q) && isMultipleOf(Y, C, Q);
};
// icmp sgt Op0, (C + -1) -> icmp sge Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
match(D, m_AllOnes()))
return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
// TODO: The subtraction-related identities shown below also hold, but
// canonicalization from (X -nuw 1) to (X + -1) means that the combinations
// wouldn't happen even if they were implemented.
//
// icmp ult (A - 1), Op1 -> icmp ule A, Op1
// icmp uge (A - 1), Op1 -> icmp ugt A, Op1
// icmp ugt Op0, (C - 1) -> icmp uge Op0, C
// icmp ule Op0, (C - 1) -> icmp ult Op0, C
// icmp sle Op0, (C + -1) -> icmp slt Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
match(D, m_AllOnes()))
return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
// icmp slt (A + -1), Op1 -> icmp sle A, Op1
// icmp sge (A + -1), Op1 -> icmp sgt A, Op1
// icmp sle (A + 1), Op1 -> icmp slt A, Op1
// icmp sgt (A + 1), Op1 -> icmp sge A, Op1
// icmp ule (A + 1), Op0 -> icmp ult A, Op1
// icmp ugt (A + 1), Op0 -> icmp uge A, Op1
if (A && NoOp0WrapProblem &&
ShareCommonDivisor(A, Op1, B,
ICmpInst::isLT(Pred) || ICmpInst::isGE(Pred)))
return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), A,
Op1);
// icmp sge Op0, (C + 1) -> icmp sgt Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
// icmp slt Op0, (C + 1) -> icmp sle Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
// TODO: The subtraction-related identities shown below also hold, but
// canonicalization from (X -nuw 1) to (X + -1) means that the combinations
// wouldn't happen even if they were implemented.
//
// icmp ult (A - 1), Op1 -> icmp ule A, Op1
// icmp uge (A - 1), Op1 -> icmp ugt A, Op1
// icmp ugt Op0, (C - 1) -> icmp uge Op0, C
// icmp ule Op0, (C - 1) -> icmp ult Op0, C
// icmp ule (A + 1), Op0 -> icmp ult A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
// icmp ugt (A + 1), Op0 -> icmp uge A, Op1
if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
// icmp uge Op0, (C + 1) -> icmp ugt Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
// icmp ult Op0, (C + 1) -> icmp ule Op0, C
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
// icmp sgt Op0, (C + -1) -> icmp sge Op0, C
// icmp sle Op0, (C + -1) -> icmp slt Op0, C
// icmp sge Op0, (C + 1) -> icmp sgt Op0, C
// icmp slt Op0, (C + 1) -> icmp sle Op0, C
// icmp uge Op0, (C + 1) -> icmp ugt Op0, C
// icmp ult Op0, (C + 1) -> icmp ule Op0, C
if (C && NoOp1WrapProblem &&
ShareCommonDivisor(Op0, C, D,
ICmpInst::isGT(Pred) || ICmpInst::isLE(Pred)))
return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), Op0,
C);
}
// if C1 has greater magnitude than C2:
// icmp (A + C1), (C + C2) -> icmp (A + C3), C

View File

@@ -1900,6 +1900,108 @@ define i1 @icmp_add1_sle(i32 %x, i32 %y) {
ret i1 %cmp
}
define i1 @icmp_slt_offset_with_common_divisor(i64 %x, i64 %y) {
; CHECK-LABEL: @icmp_slt_offset_with_common_divisor(
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[SHLX]], [[SHLY]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%shlx = shl i64 %x, 4
%shly = shl i64 %y, 4
%shlx_offset = add nsw i64 %shlx, -16
%cmp = icmp slt i64 %shlx_offset, %shly
ret i1 %cmp
}
define i1 @icmp_slt_offset_with_smaller_common_divisor(i64 %x, i64 %y) {
; CHECK-LABEL: @icmp_slt_offset_with_smaller_common_divisor(
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[SHLX]], [[SHLY]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%shlx = shl i64 %x, 4
%shly = shl i64 %y, 4
%shlx_offset = add nsw i64 %shlx, -8
%cmp = icmp slt i64 %shlx_offset, %shly
ret i1 %cmp
}
define i1 @icmp_sle_offset_with_common_divisor(i64 %x, i64 %y) {
; CHECK-LABEL: @icmp_sle_offset_with_common_divisor(
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[SHLX]], [[SHLY]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%shlx = shl i64 %x, 4
%shly = shl i64 %y, 4
%shlx_offset = add nsw i64 %shlx, 16
%cmp = icmp sle i64 %shlx_offset, %shly
ret i1 %cmp
}
define i1 @icmp_ule_offset_with_common_divisor(i64 %x, i64 %y) {
; CHECK-LABEL: @icmp_ule_offset_with_common_divisor(
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[SHLX]], [[SHLY]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%shlx = shl i64 %x, 4
%shly = shl i64 %y, 4
%shlx_offset = add nuw i64 %shlx, 16
%cmp = icmp ule i64 %shlx_offset, %shly
ret i1 %cmp
}
; TODO: Handle non-power-of-2 divisors
define i1 @icmp_ule_offset_with_common_non_pow2_divisor(i64 %x, i64 %y) {
; CHECK-LABEL: @icmp_ule_offset_with_common_non_pow2_divisor(
; CHECK-NEXT: [[MULX:%.*]] = mul nuw i64 [[X:%.*]], 7
; CHECK-NEXT: [[MULY:%.*]] = mul nuw i64 [[Y:%.*]], 7
; CHECK-NEXT: [[MULX_OFFSET:%.*]] = add nuw i64 [[MULX]], 7
; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MULX_OFFSET]], [[MULY]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%mulx = mul nuw i64 %x, 7
%muly = mul nuw i64 %y, 7
%mulx_offset = add nuw i64 %mulx, 7
%cmp = icmp ule i64 %mulx_offset, %muly
ret i1 %cmp
}
define i1 @neg_icmp_slt_offset_without_common_divisor(i64 %x, i64 %y) {
; CHECK-LABEL: @neg_icmp_slt_offset_without_common_divisor(
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
; CHECK-NEXT: [[SHLX_OFFSET:%.*]] = add nsw i64 [[SHLX]], -32
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[SHLX_OFFSET]], [[SHLY]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%shlx = shl i64 %x, 4
%shly = shl i64 %y, 4
%shlx_offset = add nsw i64 %shlx, -32
%cmp = icmp slt i64 %shlx_offset, %shly
ret i1 %cmp
}
define i1 @neg_icmp_slt_offset_with_wrong_sign(i64 %x, i64 %y) {
; CHECK-LABEL: @neg_icmp_slt_offset_with_wrong_sign(
; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
; CHECK-NEXT: [[SHLX_OFFSET:%.*]] = add nsw i64 [[SHLX]], 16
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[SHLX_OFFSET]], [[SHLY]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%shlx = shl i64 %x, 4
%shly = shl i64 %y, 4
%shlx_offset = add nsw i64 %shlx, 16
%cmp = icmp slt i64 %shlx_offset, %shly
ret i1 %cmp
}
define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) {
; CHECK-LABEL: @icmp_add20_sge_add57(
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[Y:%.*]], 37