[RISCV] Unify non-vp and vp rounding intrinsic costing (#147872)

Currently we have slightly different costing for the vp and non-vp
version of the rounding intrinsics.

We can delete this code and use the generic BasicTTIImpl code for the vp
intrinsics which falls back to the non-vp versions.

I'm not sure if the zvfh costing is correct, this should probably be
fixed in a follow up patch. At the moment the non-vp cost is more
important since it is what the loop vectorizer will use.
This commit is contained in:
Luke Lau
2025-07-10 15:46:05 +08:00
committed by GitHub
parent 60c14ac582
commit da8d7f49ff
2 changed files with 165 additions and 206 deletions

View File

@@ -1244,17 +1244,6 @@ static const CostTblEntry VectorIntrinsicCostTable[]{
{Intrinsic::vp_cttz, MVT::i64, 25},
};
static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID) {
switch (ID) {
#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
case Intrinsic::VPID: \
return ISD::VPSD;
#include "llvm/IR/VPIntrinsics.def"
#undef HELPER_MAP_VPID_TO_VPSD
}
return ISD::DELETED_NODE;
}
InstructionCost
RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const {
@@ -1482,36 +1471,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return Cost;
}
case Intrinsic::vp_rint: {
// RISC-V target uses at least 5 instructions to lower rounding intrinsics.
unsigned Cost = 5;
auto LT = getTypeLegalizationCost(RetTy);
if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
return Cost * LT.first;
break;
}
case Intrinsic::vp_nearbyint: {
// More one read and one write for fflags than vp_rint.
unsigned Cost = 7;
auto LT = getTypeLegalizationCost(RetTy);
if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
return Cost * LT.first;
break;
}
case Intrinsic::vp_ceil:
case Intrinsic::vp_floor:
case Intrinsic::vp_round:
case Intrinsic::vp_roundeven:
case Intrinsic::vp_roundtozero: {
// Rounding with static rounding mode needs two more instructions to
// swap/write FRM than vp_rint.
unsigned Cost = 7;
auto LT = getTypeLegalizationCost(RetTy);
unsigned VPISD = getISDForVPIntrinsicID(ICA.getID());
if (TLI->isOperationCustom(VPISD, LT.second))
return Cost * LT.first;
break;
}
case Intrinsic::experimental_vp_splat: {
auto LT = getTypeLegalizationCost(RetTy);
// TODO: Lower i1 experimental_vp_splat

View File

@@ -1126,23 +1126,23 @@ define void @vp_ceil() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %20 = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %21 = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %22 = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <2 x bfloat> @llvm.vp.ceil.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
@@ -1176,15 +1176,15 @@ define void @vp_ceil() {
define void @vp_ceil_f16() {
; ZVFH-LABEL: 'vp_ceil_f16'
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'vp_ceil_f16'
@@ -1222,23 +1222,23 @@ define void @vp_floor() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %20 = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %21 = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %22 = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <2 x bfloat> @llvm.vp.floor.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
@@ -1272,15 +1272,15 @@ define void @vp_floor() {
define void @vp_floor_f16() {
; ZVFH-LABEL: 'vp_floor_f16'
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'vp_floor_f16'
@@ -1318,23 +1318,23 @@ define void @vp_round() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %20 = call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %21 = call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %22 = call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <2 x bfloat> @llvm.vp.round.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
@@ -1368,15 +1368,15 @@ define void @vp_round() {
define void @vp_round_f16() {
; ZVFH-LABEL: 'vp_round_f16'
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.round.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.round.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.round.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.round.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x half> @llvm.vp.round.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x half> @llvm.vp.round.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x half> @llvm.vp.round.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <16 x half> @llvm.vp.round.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'vp_round_f16'
@@ -1414,23 +1414,23 @@ define void @vp_roundeven() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.roundeven.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.roundeven.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.roundeven.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.roundeven.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.roundeven.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.roundeven.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.roundeven.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %20 = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %21 = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %22 = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <2 x bfloat> @llvm.vp.roundeven.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
@@ -1464,15 +1464,15 @@ define void @vp_roundeven() {
define void @vp_roundeven_f16() {
; ZVFH-LABEL: 'vp_roundeven_f16'
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'vp_roundeven_f16'
@@ -1560,15 +1560,15 @@ define void @vp_roundtozero() {
define void @vp_roundtozero_f16() {
; ZVFH-LABEL: 'vp_roundtozero_f16'
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'vp_roundtozero_f16'
@@ -1606,23 +1606,23 @@ define void @vp_rint() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %10 = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %11 = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %20 = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %21 = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %22 = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <2 x bfloat> @llvm.vp.rint.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
@@ -1656,15 +1656,15 @@ define void @vp_rint() {
define void @vp_rint_f16() {
; ZVFH-LABEL: 'vp_rint_f16'
; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <2 x half> @llvm.vp.rint.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <4 x half> @llvm.vp.rint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <8 x half> @llvm.vp.rint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x half> @llvm.vp.rint.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x half> @llvm.vp.rint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x half> @llvm.vp.rint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'vp_rint_f16'
@@ -1702,23 +1702,23 @@ define void @vp_nearbyint() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %20 = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %21 = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %22 = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <2 x bfloat> @llvm.vp.nearbyint.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
@@ -1752,15 +1752,15 @@ define void @vp_nearbyint() {
define void @vp_nearbyint_f16() {
; ZVFH-LABEL: 'vp_nearbyint_f16'
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'vp_nearbyint_f16'