mirror of
https://github.com/intel/llvm.git
synced 2026-01-27 06:06:34 +08:00
[ConstantRange] Estimate tighter lower (upper) bounds for masked binary and (or) (#120352)
Fixes #118108. Co-author: Yingwei Zheng (@dtcxzyw)
This commit is contained in:
committed by
GitHub
parent
1d0f40ba05
commit
2feffecb88
@@ -35,7 +35,7 @@ extern "C" {
|
||||
//
|
||||
fpm_t test_init() { return __arm_fpm_init(); }
|
||||
|
||||
// CHECK-LABEL: define dso_local noundef i64 @test_src1_1(
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, -6) i64 @test_src1_1(
|
||||
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: ret i64 -8
|
||||
@@ -44,7 +44,7 @@ fpm_t test_src1_1() {
|
||||
return __arm_set_fpm_src1_format(INIT_ONES, __ARM_FPM_E5M2);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local noundef i64 @test_src1_2(
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, -6) i64 @test_src1_2(
|
||||
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: ret i64 1
|
||||
@@ -53,7 +53,7 @@ fpm_t test_src1_2() {
|
||||
return __arm_set_fpm_src1_format(INIT_ZERO, __ARM_FPM_E4M3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local noundef i64 @test_src2_1(
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, -48) i64 @test_src2_1(
|
||||
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: ret i64 -57
|
||||
@@ -62,7 +62,7 @@ fpm_t test_src2_1() {
|
||||
return __arm_set_fpm_src2_format(INIT_ONES, __ARM_FPM_E5M2);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local noundef i64 @test_src2_2(
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, -48) i64 @test_src2_2(
|
||||
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: ret i64 8
|
||||
@@ -71,7 +71,7 @@ fpm_t test_src2_2() {
|
||||
return __arm_set_fpm_src2_format(INIT_ZERO, __ARM_FPM_E4M3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local noundef i64 @test_dst1_1(
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, -384) i64 @test_dst1_1(
|
||||
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: ret i64 -449
|
||||
@@ -80,7 +80,7 @@ fpm_t test_dst1_1() {
|
||||
return __arm_set_fpm_dst_format(INIT_ONES, __ARM_FPM_E5M2);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local noundef i64 @test_dst2_2(
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, -384) i64 @test_dst2_2(
|
||||
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: ret i64 64
|
||||
@@ -139,21 +139,21 @@ fpm_t test_lscale() { return __arm_set_fpm_lscale(INIT_ZERO, 127); }
|
||||
//
|
||||
fpm_t test_lscale2() { return __arm_set_fpm_lscale2(INIT_ZERO, 63); }
|
||||
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_1(
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, 4278190081) i64 @test_nscale_1(
|
||||
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: ret i64 2147483648
|
||||
//
|
||||
fpm_t test_nscale_1() { return __arm_set_fpm_nscale(INIT_ZERO, -128); }
|
||||
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_2(
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, 4278190081) i64 @test_nscale_2(
|
||||
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: ret i64 2130706432
|
||||
//
|
||||
fpm_t test_nscale_2() { return __arm_set_fpm_nscale(INIT_ZERO, 127); }
|
||||
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_3(
|
||||
// CHECK-LABEL: define dso_local noundef range(i64 0, 4278190081) i64 @test_nscale_3(
|
||||
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: ret i64 4278190080
|
||||
|
||||
@@ -1520,15 +1520,72 @@ ConstantRange ConstantRange::binaryNot() const {
|
||||
return ConstantRange(APInt::getAllOnes(getBitWidth())).sub(*this);
|
||||
}
|
||||
|
||||
/// Estimate the 'bit-masked AND' operation's lower bound.
|
||||
///
|
||||
/// E.g., given two ranges as follows (single quotes are separators and
|
||||
/// have no meaning here),
|
||||
///
|
||||
/// LHS = [10'00101'1, ; LLo
|
||||
/// 10'10000'0] ; LHi
|
||||
/// RHS = [10'11111'0, ; RLo
|
||||
/// 10'11111'1] ; RHi
|
||||
///
|
||||
/// we know that the higher 2 bits of the result is always 10; and we also
|
||||
/// notice that RHS[1:6] are always 1, so the result[1:6] cannot be less than
|
||||
/// LHS[1:6] (i.e., 00101). Thus, the lower bound is 10'00101'0.
|
||||
///
|
||||
/// The algorithm is as follows,
|
||||
/// 1. we first calculate a mask to find the higher common bits by
|
||||
/// Mask = ~((LLo ^ LHi) | (RLo ^ RHi) | (LLo ^ RLo));
|
||||
/// Mask = clear all non-leading-ones bits in Mask;
|
||||
/// in the example, the Mask is set to 11'00000'0;
|
||||
/// 2. calculate a new mask by setting all common leading bits to 1 in RHS, and
|
||||
/// keeping the longest leading ones (i.e., 11'11111'0 in the example);
|
||||
/// 3. return (LLo & new mask) as the lower bound;
|
||||
/// 4. repeat the step 2 and 3 with LHS and RHS swapped, and update the lower
|
||||
/// bound with the larger one.
|
||||
static APInt estimateBitMaskedAndLowerBound(const ConstantRange &LHS,
|
||||
const ConstantRange &RHS) {
|
||||
auto BitWidth = LHS.getBitWidth();
|
||||
// If either is full set or unsigned wrapped, then the range must contain '0'
|
||||
// which leads the lower bound to 0.
|
||||
if ((LHS.isFullSet() || RHS.isFullSet()) ||
|
||||
(LHS.isWrappedSet() || RHS.isWrappedSet()))
|
||||
return APInt::getZero(BitWidth);
|
||||
|
||||
auto LLo = LHS.getLower();
|
||||
auto LHi = LHS.getUpper() - 1;
|
||||
auto RLo = RHS.getLower();
|
||||
auto RHi = RHS.getUpper() - 1;
|
||||
|
||||
// Calculate the mask for the higher common bits.
|
||||
auto Mask = ~((LLo ^ LHi) | (RLo ^ RHi) | (LLo ^ RLo));
|
||||
unsigned LeadingOnes = Mask.countLeadingOnes();
|
||||
Mask.clearLowBits(BitWidth - LeadingOnes);
|
||||
|
||||
auto estimateBound = [BitWidth, &Mask](APInt ALo, const APInt &BLo,
|
||||
const APInt &BHi) {
|
||||
unsigned LeadingOnes = ((BLo & BHi) | Mask).countLeadingOnes();
|
||||
unsigned StartBit = BitWidth - LeadingOnes;
|
||||
ALo.clearLowBits(StartBit);
|
||||
return ALo;
|
||||
};
|
||||
|
||||
auto LowerBoundByLHS = estimateBound(LLo, RLo, RHi);
|
||||
auto LowerBoundByRHS = estimateBound(RLo, LLo, LHi);
|
||||
|
||||
return APIntOps::umax(LowerBoundByLHS, LowerBoundByRHS);
|
||||
}
|
||||
|
||||
ConstantRange ConstantRange::binaryAnd(const ConstantRange &Other) const {
|
||||
if (isEmptySet() || Other.isEmptySet())
|
||||
return getEmpty();
|
||||
|
||||
ConstantRange KnownBitsRange =
|
||||
fromKnownBits(toKnownBits() & Other.toKnownBits(), false);
|
||||
ConstantRange UMinUMaxRange =
|
||||
getNonEmpty(APInt::getZero(getBitWidth()),
|
||||
APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()) + 1);
|
||||
auto LowerBound = estimateBitMaskedAndLowerBound(*this, Other);
|
||||
ConstantRange UMinUMaxRange = getNonEmpty(
|
||||
LowerBound, APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()) + 1);
|
||||
return KnownBitsRange.intersectWith(UMinUMaxRange);
|
||||
}
|
||||
|
||||
@@ -1538,10 +1595,17 @@ ConstantRange ConstantRange::binaryOr(const ConstantRange &Other) const {
|
||||
|
||||
ConstantRange KnownBitsRange =
|
||||
fromKnownBits(toKnownBits() | Other.toKnownBits(), false);
|
||||
|
||||
// ~a & ~b >= x
|
||||
// <=> ~(~a & ~b) <= ~x
|
||||
// <=> a | b <= ~x
|
||||
// <=> a | b < ~x + 1 = -x
|
||||
// thus, UpperBound(a | b) == -LowerBound(~a & ~b)
|
||||
auto UpperBound =
|
||||
-estimateBitMaskedAndLowerBound(binaryNot(), Other.binaryNot());
|
||||
// Upper wrapped range.
|
||||
ConstantRange UMaxUMinRange =
|
||||
getNonEmpty(APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()),
|
||||
APInt::getZero(getBitWidth()));
|
||||
ConstantRange UMaxUMinRange = getNonEmpty(
|
||||
APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()), UpperBound);
|
||||
return KnownBitsRange.intersectWith(UMaxUMinRange);
|
||||
}
|
||||
|
||||
|
||||
88
llvm/test/Transforms/SCCP/range-and-or-bit-masked.ll
Normal file
88
llvm/test/Transforms/SCCP/range-and-or-bit-masked.ll
Normal file
@@ -0,0 +1,88 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -passes=ipsccp %s | FileCheck %s
|
||||
|
||||
declare void @use(i1)
|
||||
|
||||
define i1 @test1(i64 %x) {
|
||||
; CHECK-LABEL: @test1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[COND:%.*]] = icmp ugt i64 [[X:%.*]], 65535
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[COND]])
|
||||
; CHECK-NEXT: [[MASK:%.*]] = and i64 [[X]], -65521
|
||||
; CHECK-NEXT: ret i1 false
|
||||
;
|
||||
entry:
|
||||
%cond = icmp ugt i64 %x, 65535
|
||||
call void @llvm.assume(i1 %cond)
|
||||
%mask = and i64 %x, -65521
|
||||
%cmp = icmp eq i64 %mask, 0
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
define void @test.and(i64 %x, i64 %y) {
|
||||
; CHECK-LABEL: @test.and(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp uge i64 [[X:%.*]], 138
|
||||
; CHECK-NEXT: [[C1:%.*]] = icmp ule i64 [[X]], 161
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[C0]])
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[C1]])
|
||||
; CHECK-NEXT: [[C2:%.*]] = icmp uge i64 [[Y:%.*]], 186
|
||||
; CHECK-NEXT: [[C3:%.*]] = icmp ule i64 [[Y]], 188
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[C2]])
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
|
||||
; CHECK-NEXT: [[AND:%.*]] = and i64 [[X]], [[Y]]
|
||||
; CHECK-NEXT: call void @use(i1 false)
|
||||
; CHECK-NEXT: [[R1:%.*]] = icmp ult i64 [[AND]], 137
|
||||
; CHECK-NEXT: call void @use(i1 [[R1]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%c0 = icmp uge i64 %x, 138 ; 0b10001010
|
||||
%c1 = icmp ule i64 %x, 161 ; 0b10100000
|
||||
call void @llvm.assume(i1 %c0)
|
||||
call void @llvm.assume(i1 %c1)
|
||||
%c2 = icmp uge i64 %y, 186 ; 0b10111010
|
||||
%c3 = icmp ule i64 %y, 188 ; 0b10111110
|
||||
call void @llvm.assume(i1 %c2)
|
||||
call void @llvm.assume(i1 %c3)
|
||||
%and = and i64 %x, %y
|
||||
%r0 = icmp ult i64 %and, 136 ; 0b10001000
|
||||
call void @use(i1 %r0) ; false
|
||||
%r1 = icmp ult i64 %and, 137
|
||||
call void @use(i1 %r1) ; unknown
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test.or(i64 %x, i64 %y) {
|
||||
; CHECK-LABEL: @test.or(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp ule i64 [[X:%.*]], 117
|
||||
; CHECK-NEXT: [[C1:%.*]] = icmp uge i64 [[X]], 95
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[C0]])
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[C1]])
|
||||
; CHECK-NEXT: [[C2:%.*]] = icmp ule i64 [[Y:%.*]], 69
|
||||
; CHECK-NEXT: [[C3:%.*]] = icmp uge i64 [[Y]], 67
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[C2]])
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i64 [[X]], [[Y]]
|
||||
; CHECK-NEXT: call void @use(i1 false)
|
||||
; CHECK-NEXT: [[R1:%.*]] = icmp ugt i64 [[OR]], 118
|
||||
; CHECK-NEXT: call void @use(i1 [[R1]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%c0 = icmp ule i64 %x, 117 ; 0b01110101
|
||||
%c1 = icmp uge i64 %x, 95 ; 0b01011111
|
||||
call void @llvm.assume(i1 %c0)
|
||||
call void @llvm.assume(i1 %c1)
|
||||
%c2 = icmp ule i64 %y, 69 ; 0b01000101
|
||||
%c3 = icmp uge i64 %y, 67 ; 0b01000011
|
||||
call void @llvm.assume(i1 %c2)
|
||||
call void @llvm.assume(i1 %c3)
|
||||
%or = or i64 %x, %y
|
||||
%r0 = icmp ugt i64 %or, 119 ; 0b01110111
|
||||
call void @use(i1 %r0) ; false
|
||||
%r1 = icmp ugt i64 %or, 118
|
||||
call void @use(i1 %r1) ; unknown
|
||||
ret void
|
||||
}
|
||||
@@ -2720,6 +2720,37 @@ TEST_F(ConstantRangeTest, binaryAnd) {
|
||||
EXPECT_EQ(R16_32.binaryAnd(R0_99), R0_32);
|
||||
EXPECT_EQ(R0_99.binaryAnd(R16_32), R0_32);
|
||||
|
||||
// 'And' with leading bits are masked (with common leading bits stripped)
|
||||
ConstantRange RMaskedL(APInt(8, 0b10'00101'1), APInt(8, 0b10'10000'0 + 1));
|
||||
ConstantRange RMaskedR(APInt(8, 0b10'11111'0), APInt(8, 0b10'11111'1 + 1));
|
||||
EXPECT_EQ(RMaskedL.binaryAnd(RMaskedR).getLower(), APInt(8, 0b10'00101'0));
|
||||
EXPECT_EQ(RMaskedR.binaryAnd(RMaskedL).getLower(), APInt(8, 0b10'00101'0));
|
||||
|
||||
ConstantRange RMaskedL1(APInt(8, 0b00'011'010), APInt(8, 0b00'100'100 + 1));
|
||||
ConstantRange RMaskedR1(APInt(8, 0b00'111'010), APInt(8, 0b00'111'110 + 1));
|
||||
EXPECT_EQ(RMaskedL1.binaryAnd(RMaskedR1).getLower(), APInt(8, 0b00'011'000));
|
||||
EXPECT_EQ(RMaskedR1.binaryAnd(RMaskedL1).getLower(), APInt(8, 0b00'011'000));
|
||||
|
||||
ConstantRange RMaskedL2(APInt(8, 0b0000'0111u), APInt(8, 0b0000'1101u + 1u));
|
||||
ConstantRange RMaskedR2(APInt(8, 0xff), APInt(8, 0));
|
||||
EXPECT_EQ(RMaskedL2.binaryAnd(RMaskedR2), RMaskedL2);
|
||||
EXPECT_EQ(RMaskedR2.binaryAnd(RMaskedL2), RMaskedL2);
|
||||
|
||||
ConstantRange RMaskedL3(APInt(4, 0b0011u), APInt(4, 0));
|
||||
ConstantRange RMaskedR3(APInt(4, 0b1011u), APInt(4, 0));
|
||||
APInt Zero_4(4, 0);
|
||||
EXPECT_EQ(RMaskedL3.binaryAnd(RMaskedR3).getLower().uge(Zero_4), true);
|
||||
EXPECT_EQ(RMaskedR3.binaryAnd(RMaskedL3).getLower().uge(Zero_4), true);
|
||||
|
||||
// wrapped set
|
||||
APInt NegSeven(4, 9); // Also -7
|
||||
ConstantRange RMaskedL4(NegSeven, APInt(4, 1));
|
||||
ConstantRange RMaskedR4(NegSeven, APInt(4, 0));
|
||||
EXPECT_EQ(RMaskedL4.binaryAnd(RMaskedR4).contains(Zero_4), true);
|
||||
EXPECT_EQ(RMaskedR4.binaryAnd(RMaskedL4).contains(Zero_4), true);
|
||||
EXPECT_EQ(RMaskedL4.binaryAnd(RMaskedR4).contains(NegSeven), true);
|
||||
EXPECT_EQ(RMaskedR4.binaryAnd(RMaskedL4).contains(NegSeven), true);
|
||||
|
||||
TestBinaryOpExhaustive(
|
||||
[](const ConstantRange &CR1, const ConstantRange &CR2) {
|
||||
return CR1.binaryAnd(CR2);
|
||||
|
||||
Reference in New Issue
Block a user