mirror of
https://github.com/intel/llvm.git
synced 2026-01-24 00:20:25 +08:00
X86: Fold masked-merge when and-not is not available
Differential Revision: https://reviews.llvm.org/D112754
This commit is contained in:
@@ -46577,6 +46577,59 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
|
||||
return Ret;
|
||||
}
|
||||
|
||||
static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
|
||||
SDValue And1_L, SDValue And1_R, SDLoc DL,
|
||||
SelectionDAG &DAG) {
|
||||
if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())
|
||||
return SDValue();
|
||||
SDValue NotOp = And0_L->getOperand(0);
|
||||
if (NotOp == And1_R)
|
||||
std::swap(And1_R, And1_L);
|
||||
if (NotOp != And1_L)
|
||||
return SDValue();
|
||||
|
||||
// (~(NotOp) & And0_R) | (NotOp & And1_R)
|
||||
// --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
|
||||
EVT VT = And1_L->getValueType(0);
|
||||
SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
|
||||
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
|
||||
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
|
||||
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
|
||||
return Xor1;
|
||||
}
|
||||
|
||||
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
|
||||
/// equivalent `((x ^ y) & m) ^ y)` pattern.
|
||||
/// This is typically a better representation for targets without a fused
|
||||
/// "and-not" operation. This function is intended to be called from a
|
||||
/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes.
|
||||
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
|
||||
// Note that masked-merge variants using XOR or ADD expressions are
|
||||
// normalized to OR by InstCombine so we only check for OR.
|
||||
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
|
||||
SDValue N0 = Node->getOperand(0);
|
||||
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
|
||||
return SDValue();
|
||||
SDValue N1 = Node->getOperand(1);
|
||||
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(Node);
|
||||
SDValue N00 = N0->getOperand(0);
|
||||
SDValue N01 = N0->getOperand(1);
|
||||
SDValue N10 = N1->getOperand(0);
|
||||
SDValue N11 = N1->getOperand(1);
|
||||
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
|
||||
return Result;
|
||||
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
|
||||
return Result;
|
||||
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
|
||||
return Result;
|
||||
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
|
||||
return Result;
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget &Subtarget) {
|
||||
@@ -46670,6 +46723,11 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
|
||||
return Res;
|
||||
}
|
||||
|
||||
// We should fold "masked merge" patterns when `andn` is not available.
|
||||
if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)
|
||||
if (SDValue R = foldMaskedMerge(N, DAG))
|
||||
return R;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
||||
@@ -8,11 +8,10 @@
|
||||
define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; NOBMI-LABEL: masked_merge0:
|
||||
; NOBMI: # %bb.0:
|
||||
; NOBMI-NEXT: movl %edi, %eax
|
||||
; NOBMI-NEXT: andl %edi, %esi
|
||||
; NOBMI-NEXT: notl %eax
|
||||
; NOBMI-NEXT: andl %edx, %eax
|
||||
; NOBMI-NEXT: orl %esi, %eax
|
||||
; NOBMI-NEXT: movl %esi, %eax
|
||||
; NOBMI-NEXT: xorl %edx, %eax
|
||||
; NOBMI-NEXT: andl %edi, %eax
|
||||
; NOBMI-NEXT: xorl %edx, %eax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: masked_merge0:
|
||||
@@ -54,14 +53,22 @@ define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
|
||||
}
|
||||
|
||||
define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
|
||||
; CHECK-LABEL: masked_merge2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: notb %al
|
||||
; CHECK-NEXT: andb %sil, %al
|
||||
; CHECK-NEXT: andb %dil, %sil
|
||||
; CHECK-NEXT: orb %sil, %al
|
||||
; CHECK-NEXT: retq
|
||||
; NOBMI-LABEL: masked_merge2:
|
||||
; NOBMI: # %bb.0:
|
||||
; NOBMI-NEXT: movl %esi, %eax
|
||||
; NOBMI-NEXT: xorb %sil, %al
|
||||
; NOBMI-NEXT: andb %dil, %al
|
||||
; NOBMI-NEXT: xorb %sil, %al
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: masked_merge2:
|
||||
; BMI: # %bb.0:
|
||||
; BMI-NEXT: movl %edi, %eax
|
||||
; BMI-NEXT: notb %al
|
||||
; BMI-NEXT: andb %sil, %al
|
||||
; BMI-NEXT: andb %dil, %sil
|
||||
; BMI-NEXT: orb %sil, %al
|
||||
; BMI-NEXT: retq
|
||||
%not = xor i8 %a0, -1
|
||||
%and0 = and i8 %not, %a1
|
||||
%and1 = and i8 %a1, %a0
|
||||
@@ -72,13 +79,12 @@ define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
|
||||
define i64 @masked_merge3(i64 %a0, i64 %a1, i64 %a2) {
|
||||
; NOBMI-LABEL: masked_merge3:
|
||||
; NOBMI: # %bb.0:
|
||||
; NOBMI-NEXT: movq %rdi, %rax
|
||||
; NOBMI-NEXT: notq %rsi
|
||||
; NOBMI-NEXT: movq %rsi, %rax
|
||||
; NOBMI-NEXT: notq %rdx
|
||||
; NOBMI-NEXT: andq %rdi, %rsi
|
||||
; NOBMI-NEXT: xorq %rdx, %rax
|
||||
; NOBMI-NEXT: notq %rax
|
||||
; NOBMI-NEXT: andq %rdx, %rax
|
||||
; NOBMI-NEXT: orq %rsi, %rax
|
||||
; NOBMI-NEXT: andq %rdi, %rax
|
||||
; NOBMI-NEXT: xorq %rdx, %rax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: masked_merge3:
|
||||
|
||||
@@ -137,12 +137,11 @@ define i64 @or_shift1_and1_64(i64 %x, i64 %y) {
|
||||
define i32 @or_and_and_rhs_neg_i32(i32 %x, i32 %y, i32 %z) {
|
||||
; NOBMI-LABEL: or_and_and_rhs_neg_i32:
|
||||
; NOBMI: # %bb.0: # %entry
|
||||
; NOBMI-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; NOBMI-NEXT: # kill: def $edx killed $edx def $rdx
|
||||
; NOBMI-NEXT: xorl %edi, %edx
|
||||
; NOBMI-NEXT: andl %esi, %edx
|
||||
; NOBMI-NEXT: notl %esi
|
||||
; NOBMI-NEXT: andl %edi, %esi
|
||||
; NOBMI-NEXT: orl %edx, %esi
|
||||
; NOBMI-NEXT: leal 1(%rsi), %eax
|
||||
; NOBMI-NEXT: xorl %edi, %edx
|
||||
; NOBMI-NEXT: leal 1(%rdx), %eax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: or_and_and_rhs_neg_i32:
|
||||
@@ -164,12 +163,11 @@ entry:
|
||||
define i32 @or_and_and_lhs_neg_i32(i32 %x, i32 %y, i32 %z) {
|
||||
; NOBMI-LABEL: or_and_and_lhs_neg_i32:
|
||||
; NOBMI: # %bb.0: # %entry
|
||||
; NOBMI-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; NOBMI-NEXT: # kill: def $edx killed $edx def $rdx
|
||||
; NOBMI-NEXT: xorl %edi, %edx
|
||||
; NOBMI-NEXT: andl %esi, %edx
|
||||
; NOBMI-NEXT: notl %esi
|
||||
; NOBMI-NEXT: andl %edi, %esi
|
||||
; NOBMI-NEXT: orl %edx, %esi
|
||||
; NOBMI-NEXT: leal 1(%rsi), %eax
|
||||
; NOBMI-NEXT: xorl %edi, %edx
|
||||
; NOBMI-NEXT: leal 1(%rdx), %eax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: or_and_and_lhs_neg_i32:
|
||||
@@ -191,12 +189,11 @@ entry:
|
||||
define i32 @or_and_rhs_neg_and_i32(i32 %x, i32 %y, i32 %z) {
|
||||
; NOBMI-LABEL: or_and_rhs_neg_and_i32:
|
||||
; NOBMI: # %bb.0: # %entry
|
||||
; NOBMI-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; NOBMI-NEXT: xorl %edx, %edi
|
||||
; NOBMI-NEXT: andl %esi, %edi
|
||||
; NOBMI-NEXT: notl %esi
|
||||
; NOBMI-NEXT: andl %edx, %esi
|
||||
; NOBMI-NEXT: orl %edi, %esi
|
||||
; NOBMI-NEXT: leal 1(%rsi), %eax
|
||||
; NOBMI-NEXT: xorl %edx, %edi
|
||||
; NOBMI-NEXT: leal 1(%rdi), %eax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: or_and_rhs_neg_and_i32:
|
||||
@@ -218,12 +215,11 @@ entry:
|
||||
define i32 @or_and_lhs_neg_and_i32(i32 %x, i32 %y, i32 %z) {
|
||||
; NOBMI-LABEL: or_and_lhs_neg_and_i32:
|
||||
; NOBMI: # %bb.0: # %entry
|
||||
; NOBMI-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; NOBMI-NEXT: xorl %edx, %edi
|
||||
; NOBMI-NEXT: andl %esi, %edi
|
||||
; NOBMI-NEXT: notl %esi
|
||||
; NOBMI-NEXT: andl %edx, %esi
|
||||
; NOBMI-NEXT: orl %edi, %esi
|
||||
; NOBMI-NEXT: leal 1(%rsi), %eax
|
||||
; NOBMI-NEXT: xorl %edx, %edi
|
||||
; NOBMI-NEXT: leal 1(%rdi), %eax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: or_and_lhs_neg_and_i32:
|
||||
@@ -245,11 +241,10 @@ entry:
|
||||
define i64 @or_and_and_rhs_neg_i64(i64 %x, i64 %y, i64 %z) {
|
||||
; NOBMI-LABEL: or_and_and_rhs_neg_i64:
|
||||
; NOBMI: # %bb.0: # %entry
|
||||
; NOBMI-NEXT: xorq %rdi, %rdx
|
||||
; NOBMI-NEXT: andq %rsi, %rdx
|
||||
; NOBMI-NEXT: notq %rsi
|
||||
; NOBMI-NEXT: andq %rdi, %rsi
|
||||
; NOBMI-NEXT: orq %rdx, %rsi
|
||||
; NOBMI-NEXT: leaq 1(%rsi), %rax
|
||||
; NOBMI-NEXT: xorq %rdi, %rdx
|
||||
; NOBMI-NEXT: leaq 1(%rdx), %rax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: or_and_and_rhs_neg_i64:
|
||||
@@ -271,11 +266,10 @@ entry:
|
||||
define i64 @or_and_and_lhs_neg_i64(i64 %x, i64 %y, i64 %z) {
|
||||
; NOBMI-LABEL: or_and_and_lhs_neg_i64:
|
||||
; NOBMI: # %bb.0: # %entry
|
||||
; NOBMI-NEXT: xorq %rdi, %rdx
|
||||
; NOBMI-NEXT: andq %rsi, %rdx
|
||||
; NOBMI-NEXT: notq %rsi
|
||||
; NOBMI-NEXT: andq %rdi, %rsi
|
||||
; NOBMI-NEXT: orq %rdx, %rsi
|
||||
; NOBMI-NEXT: leaq 1(%rsi), %rax
|
||||
; NOBMI-NEXT: xorq %rdi, %rdx
|
||||
; NOBMI-NEXT: leaq 1(%rdx), %rax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: or_and_and_lhs_neg_i64:
|
||||
@@ -297,11 +291,10 @@ entry:
|
||||
define i64 @or_and_rhs_neg_and_i64(i64 %x, i64 %y, i64 %z) {
|
||||
; NOBMI-LABEL: or_and_rhs_neg_and_i64:
|
||||
; NOBMI: # %bb.0: # %entry
|
||||
; NOBMI-NEXT: xorq %rdx, %rdi
|
||||
; NOBMI-NEXT: andq %rsi, %rdi
|
||||
; NOBMI-NEXT: notq %rsi
|
||||
; NOBMI-NEXT: andq %rdx, %rsi
|
||||
; NOBMI-NEXT: orq %rdi, %rsi
|
||||
; NOBMI-NEXT: leaq 1(%rsi), %rax
|
||||
; NOBMI-NEXT: xorq %rdx, %rdi
|
||||
; NOBMI-NEXT: leaq 1(%rdi), %rax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: or_and_rhs_neg_and_i64:
|
||||
@@ -323,11 +316,10 @@ entry:
|
||||
define i64 @or_and_lhs_neg_and_i64(i64 %x, i64 %y, i64 %z) {
|
||||
; NOBMI-LABEL: or_and_lhs_neg_and_i64:
|
||||
; NOBMI: # %bb.0: # %entry
|
||||
; NOBMI-NEXT: xorq %rdx, %rdi
|
||||
; NOBMI-NEXT: andq %rsi, %rdi
|
||||
; NOBMI-NEXT: notq %rsi
|
||||
; NOBMI-NEXT: andq %rdx, %rsi
|
||||
; NOBMI-NEXT: orq %rdi, %rsi
|
||||
; NOBMI-NEXT: leaq 1(%rsi), %rax
|
||||
; NOBMI-NEXT: xorq %rdx, %rdi
|
||||
; NOBMI-NEXT: leaq 1(%rdi), %rax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: or_and_lhs_neg_and_i64:
|
||||
|
||||
@@ -58,11 +58,10 @@ define i16 @out16(i16 %x, i16 %y, i16 %mask) {
|
||||
define i32 @out32(i32 %x, i32 %y, i32 %mask) {
|
||||
; CHECK-NOBMI-LABEL: out32:
|
||||
; CHECK-NOBMI: # %bb.0:
|
||||
; CHECK-NOBMI-NEXT: movl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %edi
|
||||
; CHECK-NOBMI-NEXT: notl %eax
|
||||
; CHECK-NOBMI-NEXT: andl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: orl %edi, %eax
|
||||
; CHECK-NOBMI-NEXT: movl %edi, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: retq
|
||||
;
|
||||
; CHECK-BMI-LABEL: out32:
|
||||
@@ -81,11 +80,10 @@ define i32 @out32(i32 %x, i32 %y, i32 %mask) {
|
||||
define i64 @out64(i64 %x, i64 %y, i64 %mask) {
|
||||
; CHECK-NOBMI-LABEL: out64:
|
||||
; CHECK-NOBMI: # %bb.0:
|
||||
; CHECK-NOBMI-NEXT: movq %rdx, %rax
|
||||
; CHECK-NOBMI-NEXT: andq %rdx, %rdi
|
||||
; CHECK-NOBMI-NEXT: notq %rax
|
||||
; CHECK-NOBMI-NEXT: andq %rsi, %rax
|
||||
; CHECK-NOBMI-NEXT: orq %rdi, %rax
|
||||
; CHECK-NOBMI-NEXT: movq %rdi, %rax
|
||||
; CHECK-NOBMI-NEXT: xorq %rsi, %rax
|
||||
; CHECK-NOBMI-NEXT: andq %rdx, %rax
|
||||
; CHECK-NOBMI-NEXT: xorq %rsi, %rax
|
||||
; CHECK-NOBMI-NEXT: retq
|
||||
;
|
||||
; CHECK-BMI-LABEL: out64:
|
||||
@@ -628,11 +626,10 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
|
||||
define i32 @out_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
|
||||
; CHECK-NOBMI-LABEL: out_constant_varx_42:
|
||||
; CHECK-NOBMI: # %bb.0:
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %edi
|
||||
; CHECK-NOBMI-NEXT: movl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: notl %eax
|
||||
; CHECK-NOBMI-NEXT: andl $42, %eax
|
||||
; CHECK-NOBMI-NEXT: orl %edi, %eax
|
||||
; CHECK-NOBMI-NEXT: movl %edi, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl $42, %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl $42, %eax
|
||||
; CHECK-NOBMI-NEXT: retq
|
||||
;
|
||||
; CHECK-BMI-LABEL: out_constant_varx_42:
|
||||
@@ -673,11 +670,10 @@ define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
|
||||
define i32 @out_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
|
||||
; CHECK-NOBMI-LABEL: out_constant_varx_42_invmask:
|
||||
; CHECK-NOBMI: # %bb.0:
|
||||
; CHECK-NOBMI-NEXT: movl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: notl %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edi, %eax
|
||||
; CHECK-NOBMI-NEXT: andl $42, %edx
|
||||
; CHECK-NOBMI-NEXT: orl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: movl %edi, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl $42, %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl %edi, %eax
|
||||
; CHECK-NOBMI-NEXT: retq
|
||||
;
|
||||
; CHECK-BMI-LABEL: out_constant_varx_42_invmask:
|
||||
@@ -801,11 +797,10 @@ define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) {
|
||||
define i32 @out_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
|
||||
; CHECK-NOBMI-LABEL: out_constant_42_vary:
|
||||
; CHECK-NOBMI: # %bb.0:
|
||||
; CHECK-NOBMI-NEXT: movl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: notl %eax
|
||||
; CHECK-NOBMI-NEXT: andl $42, %edx
|
||||
; CHECK-NOBMI-NEXT: andl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: orl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: movl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl $42, %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: retq
|
||||
;
|
||||
; CHECK-BMI-LABEL: out_constant_42_vary:
|
||||
@@ -844,11 +839,10 @@ define i32 @in_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
|
||||
define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) {
|
||||
; CHECK-NOBMI-LABEL: out_constant_42_vary_invmask:
|
||||
; CHECK-NOBMI: # %bb.0:
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %esi
|
||||
; CHECK-NOBMI-NEXT: movl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: notl %eax
|
||||
; CHECK-NOBMI-NEXT: andl $42, %eax
|
||||
; CHECK-NOBMI-NEXT: orl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: movl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl $42, %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl $42, %eax
|
||||
; CHECK-NOBMI-NEXT: retq
|
||||
;
|
||||
; CHECK-BMI-LABEL: out_constant_42_vary_invmask:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user