[RISCV] Add support for fixed vector mask logic operations.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D96741
This commit is contained in:
Craig Topper
2021-02-16 09:26:22 -08:00
parent 064ada4ec6
commit 07ca13fe07
4 changed files with 241 additions and 11 deletions

View File

@@ -531,6 +531,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
continue;
}
@@ -1209,11 +1212,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::MUL:
return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
case ISD::AND:
return lowerToScalableOp(Op, DAG, RISCVISD::AND_VL);
return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
RISCVISD::AND_VL);
case ISD::OR:
return lowerToScalableOp(Op, DAG, RISCVISD::OR_VL);
return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
RISCVISD::OR_VL);
case ISD::XOR:
return lowerToScalableOp(Op, DAG, RISCVISD::XOR_VL);
return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
RISCVISD::XOR_VL);
case ISD::SDIV:
return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
case ISD::SREM:
@@ -2231,8 +2237,19 @@ RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
}
SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
MVT VT = Op.getSimpleValueType();
if (VT.getVectorElementType() == MVT::i1)
return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
}
SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
unsigned NewOpc) const {
unsigned NewOpc,
bool HasMask) const {
MVT VT = Op.getSimpleValueType();
assert(useRVVForFixedLengthVectorVT(VT) &&
"Only expected to lower fixed length vector operation!");
@@ -2258,7 +2275,8 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
Ops.push_back(Mask);
if (HasMask)
Ops.push_back(Mask);
Ops.push_back(VL);
SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);

View File

@@ -407,8 +407,11 @@ private:
SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
unsigned NewOpc) const;
SDValue lowerFixedLengthVectorLogicOpToRVV(SDValue Op, SelectionDAG &DAG,
unsigned MaskOpc,
unsigned VecOpc) const;
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc,
bool HasMask = true) const;
bool isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,

View File

@@ -126,6 +126,9 @@ def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>;
def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>;
def riscv_vmnot_vl : PatFrag<(ops node:$rs, node:$vl),
(riscv_vmxor_vl node:$rs, true_mask, node:$vl)>;
// Ignore the vl operand.
def SplatFPOp : PatFrag<(ops node:$op),
(riscv_vfmv_v_f_vl node:$op, srcvalue)>;
@@ -558,15 +561,41 @@ foreach mti = AllMasks in {
(!cast<Instruction>("PseudoVMXOR_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>;
// FIXME: Add remaining mask instructions.
def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmor_vl VR:$rs1, VR:$rs2,
def : Pat<(mti.Mask (riscv_vmand_vl (riscv_vmnot_vl VR:$rs1,
(XLenVT (VLOp GPR:$vl))),
VR:$rs2, (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMANDNOT_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>;
def : Pat<(mti.Mask (riscv_vmor_vl (riscv_vmnot_vl VR:$rs1,
(XLenVT (VLOp GPR:$vl))),
true_mask, (XLenVT (VLOp GPR:$vl)))),
VR:$rs2, (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMORNOT_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>;
// XOR is associative so we need 2 patterns for VMXNOR.
def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmnot_vl VR:$rs1,
(XLenVT (VLOp GPR:$vl))),
VR:$rs2, (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMXNOR_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>;
def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmand_vl VR:$rs1, VR:$rs2,
(XLenVT (VLOp GPR:$vl))),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMNAND_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>;
def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmor_vl VR:$rs1, VR:$rs2,
(XLenVT (VLOp GPR:$vl))),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMNOR_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>;
def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmxor_vl VR:$rs1, VR:$rs2,
(XLenVT (VLOp GPR:$vl))),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMXNOR_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>;
// Match the not idiom to the vnot.mm pseudo.
def : Pat<(mti.Mask (riscv_vmxor_vl VR:$rs, true_mask, (XLenVT (VLOp GPR:$vl)))),
def : Pat<(mti.Mask (riscv_vmnot_vl VR:$rs, (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMNAND_MM_" # mti.LMul.MX)
VR:$rs, VR:$rs, GPR:$vl, mti.SEW)>;
}

View File

@@ -0,0 +1,180 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
define void @and_v8i1(<8 x i1>* %x, <8 x i1>* %y) {
; CHECK-LABEL: and_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 8
; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; CHECK-NEXT: vle1.v v25, (a0)
; CHECK-NEXT: vle1.v v26, (a1)
; CHECK-NEXT: vmand.mm v25, v25, v26
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = load <8 x i1>, <8 x i1>* %x
%b = load <8 x i1>, <8 x i1>* %y
%c = and <8 x i1> %a, %b
store <8 x i1> %c, <8 x i1>* %x
ret void
}
define void @or_v16i1(<16 x i1>* %x, <16 x i1>* %y) {
; CHECK-LABEL: or_v16i1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 16
; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; CHECK-NEXT: vle1.v v25, (a0)
; CHECK-NEXT: vle1.v v26, (a1)
; CHECK-NEXT: vmor.mm v25, v25, v26
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = load <16 x i1>, <16 x i1>* %x
%b = load <16 x i1>, <16 x i1>* %y
%c = or <16 x i1> %a, %b
store <16 x i1> %c, <16 x i1>* %x
ret void
}
define void @xor_v32i1(<32 x i1>* %x, <32 x i1>* %y) {
; CHECK-LABEL: xor_v32i1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 32
; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; CHECK-NEXT: vle1.v v25, (a0)
; CHECK-NEXT: vle1.v v26, (a1)
; CHECK-NEXT: vmxor.mm v25, v25, v26
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = load <32 x i1>, <32 x i1>* %x
%b = load <32 x i1>, <32 x i1>* %y
%c = xor <32 x i1> %a, %b
store <32 x i1> %c, <32 x i1>* %x
ret void
}
define void @not_v64i1(<64 x i1>* %x, <64 x i1>* %y) {
; CHECK-LABEL: not_v64i1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 64
; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu
; CHECK-NEXT: vle1.v v25, (a0)
; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%b = load <64 x i1>, <64 x i1>* %y
%c = xor <64 x i1> %a, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
store <64 x i1> %c, <64 x i1>* %x
ret void
}
define void @andnot_v8i1(<8 x i1>* %x, <8 x i1>* %y) {
; CHECK-LABEL: andnot_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 8
; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; CHECK-NEXT: vle1.v v25, (a0)
; CHECK-NEXT: vle1.v v26, (a1)
; CHECK-NEXT: vmandnot.mm v25, v25, v26
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = load <8 x i1>, <8 x i1>* %x
%b = load <8 x i1>, <8 x i1>* %y
%c = xor <8 x i1> %a, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%d = and <8 x i1> %b, %c
store <8 x i1> %d, <8 x i1>* %x
ret void
}
define void @ornot_v16i1(<16 x i1>* %x, <16 x i1>* %y) {
; CHECK-LABEL: ornot_v16i1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 16
; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; CHECK-NEXT: vle1.v v25, (a0)
; CHECK-NEXT: vle1.v v26, (a1)
; CHECK-NEXT: vmornot.mm v25, v25, v26
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = load <16 x i1>, <16 x i1>* %x
%b = load <16 x i1>, <16 x i1>* %y
%c = xor <16 x i1> %a, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%d = or <16 x i1> %b, %c
store <16 x i1> %d, <16 x i1>* %x
ret void
}
define void @xornot_v32i1(<32 x i1>* %x, <32 x i1>* %y) {
; CHECK-LABEL: xornot_v32i1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 32
; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; CHECK-NEXT: vle1.v v25, (a0)
; CHECK-NEXT: vle1.v v26, (a1)
; CHECK-NEXT: vmxnor.mm v25, v25, v26
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = load <32 x i1>, <32 x i1>* %x
%b = load <32 x i1>, <32 x i1>* %y
%c = xor <32 x i1> %a, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%d = xor <32 x i1> %b, %c
store <32 x i1> %d, <32 x i1>* %x
ret void
}
define void @nand_v8i1(<8 x i1>* %x, <8 x i1>* %y) {
; CHECK-LABEL: nand_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 8
; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; CHECK-NEXT: vle1.v v25, (a0)
; CHECK-NEXT: vle1.v v26, (a1)
; CHECK-NEXT: vmnand.mm v25, v25, v26
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = load <8 x i1>, <8 x i1>* %x
%b = load <8 x i1>, <8 x i1>* %y
%c = and <8 x i1> %a, %b
%d = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
store <8 x i1> %d, <8 x i1>* %x
ret void
}
define void @nor_v16i1(<16 x i1>* %x, <16 x i1>* %y) {
; CHECK-LABEL: nor_v16i1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 16
; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; CHECK-NEXT: vle1.v v25, (a0)
; CHECK-NEXT: vle1.v v26, (a1)
; CHECK-NEXT: vmnor.mm v25, v25, v26
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = load <16 x i1>, <16 x i1>* %x
%b = load <16 x i1>, <16 x i1>* %y
%c = or <16 x i1> %a, %b
%d = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
store <16 x i1> %d, <16 x i1>* %x
ret void
}
define void @xnor_v32i1(<32 x i1>* %x, <32 x i1>* %y) {
; CHECK-LABEL: xnor_v32i1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, zero, 32
; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; CHECK-NEXT: vle1.v v25, (a0)
; CHECK-NEXT: vle1.v v26, (a1)
; CHECK-NEXT: vmxnor.mm v25, v25, v26
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = load <32 x i1>, <32 x i1>* %x
%b = load <32 x i1>, <32 x i1>* %y
%c = xor <32 x i1> %a, %b
%d = xor <32 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
store <32 x i1> %d, <32 x i1>* %x
ret void
}