mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
[AVX-512][X86] Convert avx_cvtt_ps2dq_256 and sse2_cvttps2dq intrinsics to ISD::FP_TO_SINT in the intrinsics table and delete patterns. While nearby also move CVTDQ2PS patterns into their instructions.
This allows these intrinsics to also use EVEX instructons. llvm-svn: 286434
This commit is contained in:
@@ -1441,15 +1441,18 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
itins.rm>, Sched<[itins.Sched.Folded]>;
|
||||
}
|
||||
|
||||
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
X86MemOperand x86memop, string asm, Domain d,
|
||||
OpndItins itins> {
|
||||
multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
|
||||
ValueType DstTy, ValueType SrcTy, PatFrag ld_frag,
|
||||
string asm, Domain d, OpndItins itins> {
|
||||
let hasSideEffects = 0 in {
|
||||
def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
|
||||
[], itins.rr, d>, Sched<[itins.Sched]>;
|
||||
def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm,
|
||||
[(set RC:$dst, (DstTy (sint_to_fp (SrcTy RC:$src))))],
|
||||
itins.rr, d>, Sched<[itins.Sched]>;
|
||||
let mayLoad = 1 in
|
||||
def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
|
||||
[], itins.rm, d>, Sched<[itins.Sched.Folded]>;
|
||||
def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
|
||||
[(set RC:$dst, (DstTy (sint_to_fp
|
||||
(SrcTy (bitconvert (ld_frag addr:$src))))))],
|
||||
itins.rm, d>, Sched<[itins.Sched.Folded]>;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1722,16 +1725,16 @@ defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
|
||||
ssmem, sse_load_f32, "cvtss2si",
|
||||
SSE_CVT_SS2SI_64>, XS, REX_W;
|
||||
|
||||
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
|
||||
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64,
|
||||
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
|
||||
SSEPackedSingle, SSE_CVT_PS>,
|
||||
PS, VEX, Requires<[HasAVX]>;
|
||||
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
|
||||
PS, VEX, Requires<[HasAVX, NoVLX]>;
|
||||
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64,
|
||||
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
|
||||
SSEPackedSingle, SSE_CVT_PS>,
|
||||
PS, VEX, VEX_L, Requires<[HasAVX]>;
|
||||
PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>;
|
||||
|
||||
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
|
||||
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64,
|
||||
"cvtdq2ps\t{$src, $dst|$dst, $src}",
|
||||
SSEPackedSingle, SSE_CVT_PS>,
|
||||
PS, Requires<[UseSSE2]>;
|
||||
@@ -1999,72 +2002,41 @@ def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
|
||||
// Convert with truncation packed single/double fp to doubleword
|
||||
// SSE2 packed instructions with XS prefix
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvttps2dq VR128:$src))],
|
||||
(v4i32 (fp_to_sint (v4f32 VR128:$src))))],
|
||||
IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
|
||||
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
|
||||
(loadv4f32 addr:$src)))],
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (fp_to_sint (loadv4f32 addr:$src))))],
|
||||
IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
|
||||
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR256:$dst,
|
||||
(int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
|
||||
(v8i32 (fp_to_sint (v8f32 VR256:$src))))],
|
||||
IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
|
||||
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
|
||||
(loadv8f32 addr:$src)))],
|
||||
[(set VR256:$dst,
|
||||
(v8i32 (fp_to_sint (loadv8f32 addr:$src))))],
|
||||
IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
|
||||
Sched<[WriteCvtF2ILd]>;
|
||||
}
|
||||
|
||||
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (fp_to_sint (v4f32 VR128:$src))))],
|
||||
IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
|
||||
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
|
||||
(v4i32 (fp_to_sint (memopv4f32 addr:$src))))],
|
||||
IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
|
||||
(VCVTDQ2PSrr VR128:$src)>;
|
||||
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
|
||||
(VCVTDQ2PSrm addr:$src)>;
|
||||
|
||||
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
|
||||
(VCVTTPS2DQrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
|
||||
(VCVTTPS2DQrm addr:$src)>;
|
||||
|
||||
def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
|
||||
(VCVTDQ2PSYrr VR256:$src)>;
|
||||
def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))),
|
||||
(VCVTDQ2PSYrm addr:$src)>;
|
||||
|
||||
def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
|
||||
(VCVTTPS2DQYrr VR256:$src)>;
|
||||
def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
|
||||
(VCVTTPS2DQYrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE2] in {
|
||||
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
|
||||
(CVTDQ2PSrr VR128:$src)>;
|
||||
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
|
||||
(CVTDQ2PSrm addr:$src)>;
|
||||
|
||||
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
|
||||
(CVTTPS2DQrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
|
||||
(CVTTPS2DQrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in
|
||||
def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvttpd2dq\t{$src, $dst|$dst, $src}",
|
||||
|
||||
@@ -255,6 +255,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
|
||||
X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
|
||||
X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
|
||||
X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
|
||||
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
|
||||
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
|
||||
X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
|
||||
@@ -1638,6 +1639,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
|
||||
X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
|
||||
X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTPD2DQ, 0),
|
||||
X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
|
||||
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
|
||||
|
||||
@@ -354,10 +354,15 @@ declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
|
||||
; CHECK-LABEL: test_x86_sse2_cvttps2dq:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
|
||||
; CHECK-NEXT: retl ## encoding: [0xc3]
|
||||
; AVX-LABEL: test_x86_sse2_cvttps2dq:
|
||||
; AVX: ## BB#0:
|
||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
|
||||
; AVX-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX512VL-LABEL: test_x86_sse2_cvttps2dq:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0]
|
||||
; AVX512VL-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
@@ -2899,10 +2904,15 @@ declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
|
||||
; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0xc5,0xfe,0x5b,0xc0]
|
||||
; CHECK-NEXT: retl ## encoding: [0xc3]
|
||||
; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256:
|
||||
; AVX: ## BB#0:
|
||||
; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0xc5,0xfe,0x5b,0xc0]
|
||||
; AVX-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x5b,0xc0]
|
||||
; AVX512VL-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
@@ -543,10 +543,15 @@ define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
|
||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x5b,0xc0]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse2_cvttps2dq:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
; AVX2-LABEL: test_x86_sse2_cvttps2dq:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse2_cvttps2dq:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user