mirror of
https://github.com/intel/llvm.git
synced 2026-01-13 02:38:07 +08:00
[ARM] Introduce intrinsics for MVE fp-converts under strict-fp. (#170686)
This is the last of the generic instructions created from MVE intrinsics. It was a little more awkward than the others due to it taking a Type as one of the arguments. This creates a new function to create the intrinsic we need.
This commit is contained in:
@@ -598,7 +598,7 @@ foreach half = [ "b", "t" ] in {
|
||||
} // params = [f16], pnt = PNT_None
|
||||
} // loop over half = "b", "t"
|
||||
|
||||
multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi, IRBuilderBase itof> {
|
||||
multiclass float_int_conversions<Type FScalar, Type IScalar, Builder ftoi, Builder itof> {
|
||||
defvar FVector = VecOf<FScalar>;
|
||||
defvar IVector = VecOf<IScalar>;
|
||||
|
||||
|
||||
@@ -123,10 +123,10 @@ def fcmp_ule_node: IRBuilder<"CreateFCmpULE">;
|
||||
def splat: CGHelperFn<"ARMMVEVectorSplat">;
|
||||
def select: IRBuilder<"CreateSelect">;
|
||||
def fneg: IRBuilder<"CreateFNeg">;
|
||||
def sitofp: IRBuilder<"CreateSIToFP">;
|
||||
def uitofp: IRBuilder<"CreateUIToFP">;
|
||||
def fptosi: IRBuilder<"CreateFPToSI">;
|
||||
def fptoui: IRBuilder<"CreateFPToUI">;
|
||||
def sitofp_node: IRBuilder<"CreateSIToFP">;
|
||||
def uitofp_node: IRBuilder<"CreateUIToFP">;
|
||||
def fptosi_node: IRBuilder<"CreateFPToSI">;
|
||||
def fptoui_node: IRBuilder<"CreateFPToUI">;
|
||||
def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> {
|
||||
let special_params = [IRBuilderIntParam<1, "unsigned">];
|
||||
}
|
||||
@@ -215,9 +215,9 @@ def bitsize;
|
||||
|
||||
// strictFPAlt allows a node to have different code generation under strict-fp.
|
||||
// TODO: The standard node can be IRBuilderBase or IRIntBase.
|
||||
class strictFPAlt<Builder standard_, IRIntBase strictfp_> : Builder {
|
||||
class strictFPAlt<Builder standard_, Builder strictfp_> : Builder {
|
||||
Builder standard = standard_;
|
||||
IRIntBase strictfp = strictfp_;
|
||||
Builder strictfp = strictfp_;
|
||||
}
|
||||
|
||||
// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
|
||||
@@ -605,6 +605,14 @@ def fcmp_ult : strictFPAlt<fcmp_ult_node,
|
||||
IRInt<"cmp_lt", [Predicate, Vector]>>;
|
||||
def fcmp_ule : strictFPAlt<fcmp_ule_node,
|
||||
IRInt<"cmp_le", [Predicate, Vector]>>;
|
||||
def sitofp: strictFPAlt<sitofp_node,
|
||||
CGFHelperFn<"ARMMVECreateSIToFP">>;
|
||||
def uitofp: strictFPAlt<uitofp_node,
|
||||
CGFHelperFn<"ARMMVECreateUIToFP">>;
|
||||
def fptosi: strictFPAlt<fptosi_node,
|
||||
CGFHelperFn<"ARMMVECreateFPToSI">>;
|
||||
def fptoui: strictFPAlt<fptoui_node,
|
||||
CGFHelperFn<"ARMMVECreateFPToUI">>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Convenience lists of parameter types. 'T' is just a container record, so you
|
||||
|
||||
@@ -3512,6 +3512,38 @@ static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
|
||||
return Builder.CreateShuffleVector(V, Indices);
|
||||
}
|
||||
|
||||
static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
|
||||
CodeGenFunction *CGF, llvm::Value *V,
|
||||
llvm::Type *Ty) {
|
||||
return Builder.CreateCall(
|
||||
CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
|
||||
{V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
|
||||
}
|
||||
|
||||
static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
|
||||
CodeGenFunction *CGF, llvm::Value *V,
|
||||
llvm::Type *Ty) {
|
||||
return Builder.CreateCall(
|
||||
CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
|
||||
{V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
|
||||
}
|
||||
|
||||
static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
|
||||
CodeGenFunction *CGF, llvm::Value *V,
|
||||
llvm::Type *Ty) {
|
||||
return Builder.CreateCall(
|
||||
CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
|
||||
{V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
|
||||
}
|
||||
|
||||
static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
|
||||
CodeGenFunction *CGF, llvm::Value *V,
|
||||
llvm::Type *Ty) {
|
||||
return Builder.CreateCall(
|
||||
CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
|
||||
{V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
|
||||
}
|
||||
|
||||
Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
|
||||
const CallExpr *E,
|
||||
ReturnValueSlot ReturnValue,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1294,10 +1294,13 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
|
||||
return GenIRIntBase(Op);
|
||||
} else if (Op->isSubClassOf("strictFPAlt")) {
|
||||
auto StardardBuilder = Op->getValueAsDef("standard");
|
||||
Result::Ptr Standard = StardardBuilder->isSubClassOf("IRBuilder")
|
||||
Result::Ptr Standard = StardardBuilder->isSubClassOf("IRBuilderBase")
|
||||
? GenIRBuilderBase(StardardBuilder)
|
||||
: GenIRIntBase(StardardBuilder);
|
||||
Result::Ptr StrictFp = GenIRIntBase(Op->getValueAsDef("strictfp"));
|
||||
auto StrictBuilder = Op->getValueAsDef("strictfp");
|
||||
Result::Ptr StrictFp = StrictBuilder->isSubClassOf("IRBuilderBase")
|
||||
? GenIRBuilderBase(StrictBuilder)
|
||||
: GenIRIntBase(StrictBuilder);
|
||||
return std::make_shared<StrictFpAltResult>(Standard, StrictFp);
|
||||
} else {
|
||||
PrintFatalError("Unsupported dag node " + Op->getName());
|
||||
|
||||
@@ -1304,6 +1304,12 @@ defm int_arm_mve_vcvt_fix: MVEMXPredicated<
|
||||
[llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
|
||||
LLVMMatchType<0>, llvm_anyvector_ty>;
|
||||
|
||||
def int_arm_mve_vcvt_fp_int: DefaultAttrsIntrinsic<
|
||||
[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */],
|
||||
[IntrNoMem]>;
|
||||
def int_arm_mve_vcvt_int_fp: DefaultAttrsIntrinsic<
|
||||
[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */],
|
||||
[IntrNoMem]>;
|
||||
def int_arm_mve_vcvt_fp_int_predicated: DefaultAttrsIntrinsic<
|
||||
[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */,
|
||||
llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */],
|
||||
|
||||
@@ -4067,7 +4067,7 @@ class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned,
|
||||
}
|
||||
|
||||
multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
|
||||
SDNode unpred_op> {
|
||||
SDNode unpred_op, SDPatternOperator unpred_intrinsic> {
|
||||
defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u"));
|
||||
defvar ToInt = !eq(Src.SuffixLetter,"f");
|
||||
|
||||
@@ -4078,6 +4078,8 @@ multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
|
||||
let Predicates = [HasMVEFloat] in {
|
||||
def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))),
|
||||
(Dest.Vec (Inst (Src.Vec MQPR:$src)))>;
|
||||
def : Pat<(Dest.Vec (unpred_intrinsic (Src.Vec MQPR:$src), (i32 Unsigned))),
|
||||
(Dest.Vec (Inst (Src.Vec MQPR:$src)))>;
|
||||
def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated
|
||||
(Src.Vec MQPR:$src), (i32 Unsigned),
|
||||
(Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))),
|
||||
@@ -4088,15 +4090,15 @@ multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
|
||||
}
|
||||
// The unsuffixed VCVT for float->int implicitly rounds toward zero,
|
||||
// which I reflect here in the llvm instruction names
|
||||
defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint>;
|
||||
defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint>;
|
||||
defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint>;
|
||||
defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint>;
|
||||
defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint, int_arm_mve_vcvt_int_fp>;
|
||||
defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint, int_arm_mve_vcvt_int_fp>;
|
||||
defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint, int_arm_mve_vcvt_int_fp>;
|
||||
defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint, int_arm_mve_vcvt_int_fp>;
|
||||
// Whereas VCVT for int->float rounds to nearest
|
||||
defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp>;
|
||||
defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>;
|
||||
defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>;
|
||||
defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>;
|
||||
defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp, int_arm_mve_vcvt_fp_int>;
|
||||
defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp, int_arm_mve_vcvt_fp_int>;
|
||||
defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp, int_arm_mve_vcvt_fp_int>;
|
||||
defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp, int_arm_mve_vcvt_fp_int>;
|
||||
|
||||
let Predicates = [HasMVEFloat] in {
|
||||
def : Pat<(v4i32 (fp_to_sint_sat v4f32:$src, i32)),
|
||||
|
||||
@@ -452,4 +452,85 @@ entry:
|
||||
|
||||
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_f16_s16(<8 x i16> noundef %a) #0 {
|
||||
; CHECK-LABEL: test_vcvtq_f16_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vcvt.f16.s16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> %a, i32 0)
|
||||
ret <8 x half> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_f16_u16(<8 x i16> noundef %a) #0 {
|
||||
; CHECK-LABEL: test_vcvtq_f16_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vcvt.f16.u16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> %a, i32 1)
|
||||
ret <8 x half> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_f32_s32(<4 x i32> noundef %a) #0 {
|
||||
; CHECK-LABEL: test_vcvtq_f32_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vcvt.f32.s32 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> %a, i32 0)
|
||||
ret <4 x float> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_f32_u32(<4 x i32> noundef %a) #0 {
|
||||
; CHECK-LABEL: test_vcvtq_f32_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vcvt.f32.u32 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> %a, i32 1)
|
||||
ret <4 x float> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_s16_f16(<8 x half> noundef %a) #0 {
|
||||
; CHECK-LABEL: test_vcvtq_s16_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vcvt.s16.f16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = tail call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> %a, i32 0)
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_s32_f32(<4 x float> noundef %a) #0 {
|
||||
; CHECK-LABEL: test_vcvtq_s32_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vcvt.s32.f32 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = tail call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> %a, i32 0)
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_u16_f16(<8 x half> noundef %a) #0 {
|
||||
; CHECK-LABEL: test_vcvtq_u16_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vcvt.u16.f16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = tail call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> %a, i32 1)
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_u32_f32(<4 x float> noundef %a) #0 {
|
||||
; CHECK-LABEL: test_vcvtq_u32_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vcvt.u32.f32 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = tail call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> %a, i32 1)
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
||||
Reference in New Issue
Block a user