[VectorCombine] foldExtractedCmps - disable fold on non-commutative binops

The fold needs to be adjusted to correctly track the LHS/RHS operands, which will take some refactoring, for now just disable the fold in this case.

Fixes #114901
This commit is contained in:
Simon Pilgrim
2024-11-05 11:42:20 +00:00
parent a88be11eef
commit 05e838f428
2 changed files with 14 additions and 8 deletions

View File

@@ -1032,9 +1032,15 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
/// a vector into vector operations followed by extract. Note: The SLP pass
/// may miss this pattern because of implementation problems.
bool VectorCombine::foldExtractedCmps(Instruction &I) {
auto *BI = dyn_cast<BinaryOperator>(&I);
// We are looking for a scalar binop of booleans.
// binop i1 (cmp Pred I0, C0), (cmp Pred I1, C1)
if (!I.isBinaryOp() || !I.getType()->isIntegerTy(1))
if (!BI || !I.getType()->isIntegerTy(1))
return false;
// TODO: Support non-commutative binary ops.
if (!BI->isCommutative())
return false;
// The compare predicates should match, and each compare should have a
@@ -1113,8 +1119,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
Value *VecLogic = Builder.CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
VCmp, Shuf);
Value *VecLogic = Builder.CreateBinOp(BI->getOpcode(), VCmp, Shuf);
Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
replaceValue(I, *NewExt);
++NumVecCmpBO;

View File

@@ -2,7 +2,7 @@
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=SSE
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=AVX
; FIXME: PR114901 - ensure that the ASHR node doesn't commute the operands.
; PR114901 - ensure that the ASHR node doesn't commute the operands.
define i1 @PR114901(<4 x i32> %a) {
; SSE-LABEL: define i1 @PR114901(
; SSE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -15,10 +15,11 @@ define i1 @PR114901(<4 x i32> %a) {
;
; AVX-LABEL: define i1 @PR114901(
; AVX-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
; AVX-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
; AVX-NEXT: [[TMP2:%.*]] = ashr <4 x i1> [[TMP1]], [[SHIFT]]
; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
; AVX-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A]], i32 1
; AVX-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[A]], i32 3
; AVX-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], -8
; AVX-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[E3]], 42
; AVX-NEXT: [[R:%.*]] = ashr i1 [[CMP3]], [[CMP1]]
; AVX-NEXT: ret i1 [[R]]
;
%e1 = extractelement <4 x i32> %a, i32 1