diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index adde745a5a91..eef6ae677ac8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3753,11 +3753,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { if (!Info) return false; - // When Mask is not a true mask, this transformation is illegal for some - // operations whose results are affected by mask, like viota.m. - if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue)) - return false; - // If True has a merge operand then it needs to be the same as vmerge's False, // since False will be used for the result's merge operand. if (HasTiedDest && !isImplicitDef(True->getOperand(0))) { @@ -3835,6 +3830,16 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { if (!VL) return false; + // Some operations produce different elementwise results depending on the + // active elements, like viota.m or vredsum. This transformation is illegal + // for these if we change the active elements (i.e. mask or VL). + if (Info->ActiveElementsAffectResult) { + if (Mask && !usesAllOnesMask(Mask, Glue)) + return false; + if (TrueVL != VL) + return false; + } + // If we end up changing the VL or mask of True, then we need to make sure it // doesn't raise any observable fp exceptions, since changing the active // elements will affect how fflags is set. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index f0c0953a3e56..025cc36d19eb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -381,7 +381,7 @@ struct RISCVMaskedPseudoInfo { uint16_t MaskedPseudo; uint16_t UnmaskedPseudo; uint8_t MaskOpIdx; - uint8_t MaskAffectsResult : 1; + uint8_t ActiveElementsAffectResult : 1; }; #define GET_RISCVMaskedPseudosTable_DECL #include "RISCVGenSearchableTables.inc" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index d72390b7c14b..b860273d639e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -561,17 +561,17 @@ def RISCVVIntrinsicsTable : GenericTable { // unmasked variant. For all but compares, both the masked and // unmasked variant have a passthru and policy operand. For compares, // neither has a policy op, and only the masked version has a passthru. -class RISCVMaskedPseudo MaskIdx, bit MaskAffectsRes=false> { +class RISCVMaskedPseudo MaskIdx, bit ActiveAffectsRes=false> { Pseudo MaskedPseudo = !cast(NAME); Pseudo UnmaskedPseudo = !cast(!subst("_MASK", "", NAME)); bits<4> MaskOpIdx = MaskIdx; - bit MaskAffectsResult = MaskAffectsRes; + bit ActiveElementsAffectResult = ActiveAffectsRes; } def RISCVMaskedPseudosTable : GenericTable { let FilterClass = "RISCVMaskedPseudo"; let CppTypeName = "RISCVMaskedPseudoInfo"; - let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx", "MaskAffectsResult"]; + let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx", "ActiveElementsAffectResult"]; let PrimaryKey = ["MaskedPseudo"]; let PrimaryKeyName = "getMaskedPseudoInfo"; } @@ -2065,7 +2065,7 @@ multiclass VPseudoVIOTA_M { SchedUnary<"WriteVIotaV", "ReadVIotaV", mx, forceMergeOpRead=true>; def "_" # mx # "_MASK" : VPseudoUnaryMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, SchedUnary<"WriteVIotaV", "ReadVIotaV", mx, forceMergeOpRead=true>; } @@ -3162,7 +3162,7 @@ multiclass VPseudoTernaryWithTailPolicy; def "_" # mx # "_E" # sew # "_MASK" : VPseudoTernaryMaskPolicy, - RISCVMaskedPseudo; + RISCVMaskedPseudo; } } @@ -3179,7 +3179,7 @@ multiclass VPseudoTernaryWithTailPolicyRoundingMode, - RISCVMaskedPseudo; + RISCVMaskedPseudo; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll index 61acf1afa94d..ec03f773c710 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll @@ -19,6 +19,17 @@ define @vadd( %passthru, ret %w } +define @vadd_mask( %passthru, %a, %b, %m, iXLen %vl) { +; CHECK-LABEL: vadd_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.riscv.vadd.mask.nxv4i32.nxv4i32( poison, %a, %b, %m, iXLen %vl, iXLen 3) + %w = call @llvm.riscv.vmv.v.v.nxv4i32( %passthru, %v, iXLen %vl) + ret %w +} + define @vadd_undef( %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: vadd_undef: ; CHECK: # %bb.0: @@ -106,8 +117,8 @@ declare @llvm.riscv.vmv.v.v.nxv4f32(, < declare @llvm.riscv.vfadd.nxv4f32.nxv4f32(, , , iXLen, iXLen) -define @vfadd( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { -; CHECK-LABEL: vfadd: +define @unfoldable_vfadd( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: unfoldable_vfadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v10, v10, v12 @@ -118,3 +129,42 @@ define @vfadd( %passthru, @llvm.riscv.vmv.v.v.nxv4f32( %passthru, %v, iXLen %vl2) ret %w } + +define @foldable_vfadd( %passthru, %a, %b, iXLen %vl) { +; CHECK-LABEL: foldable_vfadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vfadd.vv v8, v10, v12 +; CHECK-NEXT: ret + %v = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen %vl) + %w = call @llvm.riscv.vmv.v.v.nxv4f32( %passthru, %v, iXLen %vl) + ret %w +} + +; This shouldn't be folded because we need to preserve exceptions with +; "fpexcept.strict" exception behaviour, and changing the VL may hide them. +define @unfoldable_constrained_fadd( %passthru, %x, %y, iXLen %vl) strictfp { +; CHECK-LABEL: unfoldable_constrained_fadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %a = call @llvm.experimental.constrained.fadd( %x, %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + %b = call @llvm.riscv.vmv.v.v.nxv4f32( %passthru, %a, iXLen %vl) strictfp + ret %b +} + +define @unfoldable_vredsum( %passthru, %x, %y) { +; CHECK-LABEL: unfoldable_vredsum: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vredsum.vs v9, v10, v9 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vredsum.nxv2i32.nxv4i32( poison, %x, %y, iXLen -1) + %b = call @llvm.riscv.vmv.v.v.nxv2i32( %passthru, %a, iXLen 1) + ret %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index b6921abf8fdf..a08bcae074b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1014,6 +1014,24 @@ define @vfredusum_allones_mask( %passth ret %b } +define @unfoldable_vredsum_allones_mask_diff_vl( %passthru, %x, %y) { +; CHECK-LABEL: unfoldable_vredsum_allones_mask_diff_vl: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma +; CHECK-NEXT: vredsum.vs v11, v9, v10 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v11 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vredsum.nxv2i32.nxv2i32( + %passthru, + %x, + %y, + i64 -1) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 1) + ret %b +} + declare @llvm.riscv.vle.nxv32i16.i64(, ptr nocapture, i64) declare @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(, , i8, , i64, i64 immarg) declare @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(, , i64)