mirror of
https://github.com/intel/llvm.git
synced 2026-01-24 08:30:34 +08:00
[NVPTX] Prevent fptrunc of v2f32 from being folded into store (#149571)
This commit is contained in:
@@ -731,6 +731,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
|
||||
setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
|
||||
setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
|
||||
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
||||
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
|
||||
setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Expand);
|
||||
|
||||
// PTX does not support load / store predicate registers
|
||||
setOperationAction(ISD::LOAD, MVT::i1, Custom);
|
||||
|
||||
@@ -359,11 +359,12 @@ define <2 x bfloat> @test_select_cc_bf16_f32(<2 x bfloat> %a, <2 x bfloat> %b,
|
||||
define <2 x bfloat> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
|
||||
; CHECK-LABEL: test_fptrunc_2xfloat(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b64 %rd<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0];
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0];
|
||||
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = fptrunc <2 x float> %a to <2 x bfloat>
|
||||
ret <2 x bfloat> %r
|
||||
|
||||
@@ -1499,11 +1499,16 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
|
||||
define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
|
||||
; CHECK-LABEL: test_fptrunc_2xfloat(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0];
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0];
|
||||
; CHECK-NEXT: cvt.rn.f16.f32 %rs1, %r2;
|
||||
; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %r1;
|
||||
; CHECK-NEXT: mov.b32 %r3, {%rs2, %rs1};
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = fptrunc <2 x float> %a to <2 x half>
|
||||
ret <2 x half> %r
|
||||
|
||||
@@ -2108,6 +2108,41 @@ define <2 x float> @test_uitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 {
|
||||
ret <2 x float> %r
|
||||
}
|
||||
|
||||
define void @test_trunc_to_v2bf16(<2 x float> %a, ptr %p) {
|
||||
; CHECK-LABEL: test_trunc_to_v2bf16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2bf16_param_1];
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2bf16_param_0];
|
||||
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1;
|
||||
; CHECK-NEXT: st.b32 [%rd2], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%trunc = fptrunc <2 x float> %a to <2 x bfloat>
|
||||
store <2 x bfloat> %trunc, ptr %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) {
|
||||
; CHECK-LABEL: test_trunc_to_v2f16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2f16_param_1];
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2f16_param_0];
|
||||
; CHECK-NEXT: cvt.rn.f16x2.f32 %r3, %r2, %r1;
|
||||
; CHECK-NEXT: st.b32 [%rd2], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%trunc = fptrunc <2 x float> %a to <2 x half>
|
||||
store <2 x half> %trunc, ptr %p
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { "unsafe-fp-math" = "true" }
|
||||
attributes #2 = { "denormal-fp-math"="preserve-sign" }
|
||||
|
||||
Reference in New Issue
Block a user