mirror of
https://github.com/intel/llvm.git
synced 2026-01-23 16:06:39 +08:00
[NVPTX] don't erase CopyToRegs when folding movs into loads (#149393)
We may still need to keep CopyToReg even after folding uses into vector
loads, since the original register may be used in other blocks.
Partially reverts 1fdbe69849
This commit is contained in:
@@ -5060,12 +5060,6 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
|
||||
return !U.getUser()->use_empty();
|
||||
}
|
||||
|
||||
// Handle CopyToReg nodes that will become dead after our replacement
|
||||
if (U.getUser()->getOpcode() == ISD::CopyToReg) {
|
||||
DeadCopyToRegs.push_back(U.getUser());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Otherwise, this use prevents us from splitting a value.
|
||||
return false;
|
||||
}))
|
||||
@@ -5132,10 +5126,6 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
|
||||
for (unsigned I : seq(NewLoad->getNumValues() - NewNumOutputs))
|
||||
Results.push_back(NewLoad.getValue(NewNumOutputs + I));
|
||||
|
||||
// Remove dead CopyToReg nodes by folding them into the chain they reference
|
||||
for (SDNode *CTR : DeadCopyToRegs)
|
||||
DCI.CombineTo(CTR, CTR->getOperand(0));
|
||||
|
||||
return DCI.DAG.getMergeValues(Results, DL);
|
||||
}
|
||||
|
||||
@@ -6544,4 +6534,4 @@ void NVPTXTargetLowering::computeKnownBitsForTargetNode(
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -32,31 +32,57 @@ define <2 x i16> @test_ret_const() #0 {
|
||||
}
|
||||
|
||||
define i16 @test_extract_0(<2 x i16> %a) #0 {
|
||||
; COMMON-LABEL: test_extract_0(
|
||||
; COMMON: {
|
||||
; COMMON-NEXT: .reg .b16 %rs<3>;
|
||||
; COMMON-NEXT: .reg .b32 %r<3>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_0_param_0];
|
||||
; COMMON-NEXT: cvt.u32.u16 %r2, %rs1;
|
||||
; COMMON-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; COMMON-NEXT: ret;
|
||||
; I16x2-LABEL: test_extract_0(
|
||||
; I16x2: {
|
||||
; I16x2-NEXT: .reg .b16 %rs<2>;
|
||||
; I16x2-NEXT: .reg .b32 %r<3>;
|
||||
; I16x2-EMPTY:
|
||||
; I16x2-NEXT: // %bb.0:
|
||||
; I16x2-NEXT: ld.param.b32 %r1, [test_extract_0_param_0];
|
||||
; I16x2-NEXT: mov.b32 {%rs1, _}, %r1;
|
||||
; I16x2-NEXT: cvt.u32.u16 %r2, %rs1;
|
||||
; I16x2-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; I16x2-NEXT: ret;
|
||||
;
|
||||
; NO-I16x2-LABEL: test_extract_0(
|
||||
; NO-I16x2: {
|
||||
; NO-I16x2-NEXT: .reg .b16 %rs<2>;
|
||||
; NO-I16x2-NEXT: .reg .b32 %r<3>;
|
||||
; NO-I16x2-EMPTY:
|
||||
; NO-I16x2-NEXT: // %bb.0:
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r1, [test_extract_0_param_0];
|
||||
; NO-I16x2-NEXT: { .reg .b16 tmp; mov.b32 {%rs1, tmp}, %r1; }
|
||||
; NO-I16x2-NEXT: cvt.u32.u16 %r2, %rs1;
|
||||
; NO-I16x2-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; NO-I16x2-NEXT: ret;
|
||||
%e = extractelement <2 x i16> %a, i32 0
|
||||
ret i16 %e
|
||||
}
|
||||
|
||||
define i16 @test_extract_1(<2 x i16> %a) #0 {
|
||||
; COMMON-LABEL: test_extract_1(
|
||||
; COMMON: {
|
||||
; COMMON-NEXT: .reg .b16 %rs<3>;
|
||||
; COMMON-NEXT: .reg .b32 %r<3>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_1_param_0];
|
||||
; COMMON-NEXT: cvt.u32.u16 %r2, %rs2;
|
||||
; COMMON-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; COMMON-NEXT: ret;
|
||||
; I16x2-LABEL: test_extract_1(
|
||||
; I16x2: {
|
||||
; I16x2-NEXT: .reg .b16 %rs<2>;
|
||||
; I16x2-NEXT: .reg .b32 %r<3>;
|
||||
; I16x2-EMPTY:
|
||||
; I16x2-NEXT: // %bb.0:
|
||||
; I16x2-NEXT: ld.param.b32 %r1, [test_extract_1_param_0];
|
||||
; I16x2-NEXT: mov.b32 {_, %rs1}, %r1;
|
||||
; I16x2-NEXT: cvt.u32.u16 %r2, %rs1;
|
||||
; I16x2-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; I16x2-NEXT: ret;
|
||||
;
|
||||
; NO-I16x2-LABEL: test_extract_1(
|
||||
; NO-I16x2: {
|
||||
; NO-I16x2-NEXT: .reg .b16 %rs<2>;
|
||||
; NO-I16x2-NEXT: .reg .b32 %r<3>;
|
||||
; NO-I16x2-EMPTY:
|
||||
; NO-I16x2-NEXT: // %bb.0:
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r1, [test_extract_1_param_0];
|
||||
; NO-I16x2-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r1; }
|
||||
; NO-I16x2-NEXT: cvt.u32.u16 %r2, %rs1;
|
||||
; NO-I16x2-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; NO-I16x2-NEXT: ret;
|
||||
%e = extractelement <2 x i16> %a, i32 1
|
||||
ret i16 %e
|
||||
}
|
||||
@@ -71,8 +97,9 @@ define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 {
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1];
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_i_param_0];
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_extract_i_param_0];
|
||||
; COMMON-NEXT: setp.eq.b64 %p1, %rd1, 0;
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1;
|
||||
; COMMON-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
|
||||
; COMMON-NEXT: cvt.u32.u16 %r2, %rs3;
|
||||
; COMMON-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
@@ -99,10 +126,12 @@ define <2 x i16> @test_add(<2 x i16> %a, <2 x i16> %b) #0 {
|
||||
; NO-I16x2-NEXT: .reg .b32 %r<3>;
|
||||
; NO-I16x2-EMPTY:
|
||||
; NO-I16x2-NEXT: // %bb.0:
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_add_param_0];
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_add_param_1];
|
||||
; NO-I16x2-NEXT: add.s16 %rs5, %rs2, %rs4;
|
||||
; NO-I16x2-NEXT: add.s16 %rs6, %rs1, %rs3;
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r2, [test_add_param_1];
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r1, [test_add_param_0];
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r2;
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs3, %rs4}, %r1;
|
||||
; NO-I16x2-NEXT: add.s16 %rs5, %rs4, %rs2;
|
||||
; NO-I16x2-NEXT: add.s16 %rs6, %rs3, %rs1;
|
||||
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
|
||||
; NO-I16x2-NEXT: ret;
|
||||
%r = add <2 x i16> %a, %b
|
||||
@@ -128,7 +157,8 @@ define <2 x i16> @test_add_imm_0(<2 x i16> %a) #0 {
|
||||
; NO-I16x2-NEXT: .reg .b32 %r<2>;
|
||||
; NO-I16x2-EMPTY:
|
||||
; NO-I16x2-NEXT: // %bb.0:
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_add_imm_0_param_0];
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r1, [test_add_imm_0_param_0];
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r1;
|
||||
; NO-I16x2-NEXT: add.s16 %rs3, %rs2, 2;
|
||||
; NO-I16x2-NEXT: add.s16 %rs4, %rs1, 1;
|
||||
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3};
|
||||
@@ -155,7 +185,8 @@ define <2 x i16> @test_add_imm_1(<2 x i16> %a) #0 {
|
||||
; NO-I16x2-NEXT: .reg .b32 %r<2>;
|
||||
; NO-I16x2-EMPTY:
|
||||
; NO-I16x2-NEXT: // %bb.0:
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_add_imm_1_param_0];
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r1, [test_add_imm_1_param_0];
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r1;
|
||||
; NO-I16x2-NEXT: add.s16 %rs3, %rs2, 2;
|
||||
; NO-I16x2-NEXT: add.s16 %rs4, %rs1, 1;
|
||||
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3};
|
||||
@@ -171,10 +202,12 @@ define <2 x i16> @test_sub(<2 x i16> %a, <2 x i16> %b) #0 {
|
||||
; COMMON-NEXT: .reg .b32 %r<3>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_sub_param_0];
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_sub_param_1];
|
||||
; COMMON-NEXT: sub.s16 %rs5, %rs2, %rs4;
|
||||
; COMMON-NEXT: sub.s16 %rs6, %rs1, %rs3;
|
||||
; COMMON-NEXT: ld.param.b32 %r2, [test_sub_param_1];
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_sub_param_0];
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r2;
|
||||
; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r1;
|
||||
; COMMON-NEXT: sub.s16 %rs5, %rs4, %rs2;
|
||||
; COMMON-NEXT: sub.s16 %rs6, %rs3, %rs1;
|
||||
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
|
||||
; COMMON-NEXT: ret;
|
||||
%r = sub <2 x i16> %a, %b
|
||||
@@ -199,10 +232,12 @@ define <2 x i16> @test_smax(<2 x i16> %a, <2 x i16> %b) #0 {
|
||||
; NO-I16x2-NEXT: .reg .b32 %r<3>;
|
||||
; NO-I16x2-EMPTY:
|
||||
; NO-I16x2-NEXT: // %bb.0:
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_smax_param_0];
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_smax_param_1];
|
||||
; NO-I16x2-NEXT: max.s16 %rs5, %rs2, %rs4;
|
||||
; NO-I16x2-NEXT: max.s16 %rs6, %rs1, %rs3;
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r2, [test_smax_param_1];
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r1, [test_smax_param_0];
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r2;
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs3, %rs4}, %r1;
|
||||
; NO-I16x2-NEXT: max.s16 %rs5, %rs4, %rs2;
|
||||
; NO-I16x2-NEXT: max.s16 %rs6, %rs3, %rs1;
|
||||
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
|
||||
; NO-I16x2-NEXT: ret;
|
||||
%cmp = icmp sgt <2 x i16> %a, %b
|
||||
@@ -228,10 +263,12 @@ define <2 x i16> @test_umax(<2 x i16> %a, <2 x i16> %b) #0 {
|
||||
; NO-I16x2-NEXT: .reg .b32 %r<3>;
|
||||
; NO-I16x2-EMPTY:
|
||||
; NO-I16x2-NEXT: // %bb.0:
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_umax_param_0];
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_umax_param_1];
|
||||
; NO-I16x2-NEXT: max.u16 %rs5, %rs2, %rs4;
|
||||
; NO-I16x2-NEXT: max.u16 %rs6, %rs1, %rs3;
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r2, [test_umax_param_1];
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r1, [test_umax_param_0];
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r2;
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs3, %rs4}, %r1;
|
||||
; NO-I16x2-NEXT: max.u16 %rs5, %rs4, %rs2;
|
||||
; NO-I16x2-NEXT: max.u16 %rs6, %rs3, %rs1;
|
||||
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
|
||||
; NO-I16x2-NEXT: ret;
|
||||
%cmp = icmp ugt <2 x i16> %a, %b
|
||||
@@ -257,10 +294,12 @@ define <2 x i16> @test_smin(<2 x i16> %a, <2 x i16> %b) #0 {
|
||||
; NO-I16x2-NEXT: .reg .b32 %r<3>;
|
||||
; NO-I16x2-EMPTY:
|
||||
; NO-I16x2-NEXT: // %bb.0:
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_smin_param_0];
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_smin_param_1];
|
||||
; NO-I16x2-NEXT: min.s16 %rs5, %rs2, %rs4;
|
||||
; NO-I16x2-NEXT: min.s16 %rs6, %rs1, %rs3;
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r2, [test_smin_param_1];
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r1, [test_smin_param_0];
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r2;
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs3, %rs4}, %r1;
|
||||
; NO-I16x2-NEXT: min.s16 %rs5, %rs4, %rs2;
|
||||
; NO-I16x2-NEXT: min.s16 %rs6, %rs3, %rs1;
|
||||
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
|
||||
; NO-I16x2-NEXT: ret;
|
||||
%cmp = icmp sle <2 x i16> %a, %b
|
||||
@@ -286,10 +325,12 @@ define <2 x i16> @test_umin(<2 x i16> %a, <2 x i16> %b) #0 {
|
||||
; NO-I16x2-NEXT: .reg .b32 %r<3>;
|
||||
; NO-I16x2-EMPTY:
|
||||
; NO-I16x2-NEXT: // %bb.0:
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_umin_param_0];
|
||||
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_umin_param_1];
|
||||
; NO-I16x2-NEXT: min.u16 %rs5, %rs2, %rs4;
|
||||
; NO-I16x2-NEXT: min.u16 %rs6, %rs1, %rs3;
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r2, [test_umin_param_1];
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r1, [test_umin_param_0];
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r2;
|
||||
; NO-I16x2-NEXT: mov.b32 {%rs3, %rs4}, %r1;
|
||||
; NO-I16x2-NEXT: min.u16 %rs5, %rs4, %rs2;
|
||||
; NO-I16x2-NEXT: min.u16 %rs6, %rs3, %rs1;
|
||||
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
|
||||
; NO-I16x2-NEXT: ret;
|
||||
%cmp = icmp ule <2 x i16> %a, %b
|
||||
@@ -304,10 +345,12 @@ define <2 x i16> @test_mul(<2 x i16> %a, <2 x i16> %b) #0 {
|
||||
; COMMON-NEXT: .reg .b32 %r<3>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_mul_param_0];
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_mul_param_1];
|
||||
; COMMON-NEXT: mul.lo.s16 %rs5, %rs2, %rs4;
|
||||
; COMMON-NEXT: mul.lo.s16 %rs6, %rs1, %rs3;
|
||||
; COMMON-NEXT: ld.param.b32 %r2, [test_mul_param_1];
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_mul_param_0];
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r2;
|
||||
; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r1;
|
||||
; COMMON-NEXT: mul.lo.s16 %rs5, %rs4, %rs2;
|
||||
; COMMON-NEXT: mul.lo.s16 %rs6, %rs3, %rs1;
|
||||
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
|
||||
; COMMON-NEXT: ret;
|
||||
%r = mul <2 x i16> %a, %b
|
||||
@@ -686,14 +729,18 @@ define <2 x i16> @test_select_cc(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x
|
||||
; COMMON-NEXT: .reg .b32 %r<5>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_param_0];
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_param_2];
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_select_cc_param_3];
|
||||
; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs5;
|
||||
; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs6;
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs7, %rs8}, [test_select_cc_param_1];
|
||||
; COMMON-NEXT: selp.b16 %rs9, %rs2, %rs8, %p2;
|
||||
; COMMON-NEXT: selp.b16 %rs10, %rs1, %rs7, %p1;
|
||||
; COMMON-NEXT: ld.param.b32 %r4, [test_select_cc_param_3];
|
||||
; COMMON-NEXT: ld.param.b32 %r3, [test_select_cc_param_2];
|
||||
; COMMON-NEXT: ld.param.b32 %r2, [test_select_cc_param_1];
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_select_cc_param_0];
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r4;
|
||||
; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r3;
|
||||
; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs1;
|
||||
; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs2;
|
||||
; COMMON-NEXT: mov.b32 {%rs5, %rs6}, %r2;
|
||||
; COMMON-NEXT: mov.b32 {%rs7, %rs8}, %r1;
|
||||
; COMMON-NEXT: selp.b16 %rs9, %rs8, %rs6, %p2;
|
||||
; COMMON-NEXT: selp.b16 %rs10, %rs7, %rs5, %p1;
|
||||
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs10, %rs9};
|
||||
; COMMON-NEXT: ret;
|
||||
%cc = icmp ne <2 x i16> %c, %d
|
||||
@@ -711,10 +758,12 @@ define <2 x i32> @test_select_cc_i32_i16(<2 x i32> %a, <2 x i32> %b,
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_i32_i16_param_1];
|
||||
; COMMON-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_i32_i16_param_0];
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_i32_i16_param_2];
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_i32_i16_param_3];
|
||||
; COMMON-NEXT: setp.ne.b16 %p1, %rs1, %rs3;
|
||||
; COMMON-NEXT: setp.ne.b16 %p2, %rs2, %rs4;
|
||||
; COMMON-NEXT: ld.param.b32 %r6, [test_select_cc_i32_i16_param_3];
|
||||
; COMMON-NEXT: ld.param.b32 %r5, [test_select_cc_i32_i16_param_2];
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r6;
|
||||
; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r5;
|
||||
; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs1;
|
||||
; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs2;
|
||||
; COMMON-NEXT: selp.b32 %r7, %r2, %r4, %p2;
|
||||
; COMMON-NEXT: selp.b32 %r8, %r1, %r3, %p1;
|
||||
; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7};
|
||||
@@ -735,12 +784,14 @@ define <2 x i16> @test_select_cc_i16_i32(<2 x i16> %a, <2 x i16> %b,
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_i16_i32_param_3];
|
||||
; COMMON-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_i16_i32_param_2];
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_i16_i32_param_0];
|
||||
; COMMON-NEXT: ld.param.b32 %r2, [test_select_cc_i16_i32_param_1];
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_select_cc_i16_i32_param_0];
|
||||
; COMMON-NEXT: setp.ne.b32 %p1, %r3, %r5;
|
||||
; COMMON-NEXT: setp.ne.b32 %p2, %r4, %r6;
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_i16_i32_param_1];
|
||||
; COMMON-NEXT: selp.b16 %rs5, %rs2, %rs4, %p2;
|
||||
; COMMON-NEXT: selp.b16 %rs6, %rs1, %rs3, %p1;
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r2;
|
||||
; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r1;
|
||||
; COMMON-NEXT: selp.b16 %rs5, %rs4, %rs2, %p2;
|
||||
; COMMON-NEXT: selp.b16 %rs6, %rs3, %rs1, %p1;
|
||||
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
|
||||
; COMMON-NEXT: ret;
|
||||
<2 x i32> %c, <2 x i32> %d) #0 {
|
||||
@@ -851,7 +902,8 @@ define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 {
|
||||
; COMMON-NEXT: .reg .b32 %r<4>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_zext_2xi32_param_0];
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_zext_2xi32_param_0];
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1;
|
||||
; COMMON-NEXT: cvt.u32.u16 %r2, %rs2;
|
||||
; COMMON-NEXT: cvt.u32.u16 %r3, %rs1;
|
||||
; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2};
|
||||
@@ -868,7 +920,8 @@ define <2 x i64> @test_zext_2xi64(<2 x i16> %a) #0 {
|
||||
; COMMON-NEXT: .reg .b64 %rd<3>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_zext_2xi64_param_0];
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_zext_2xi64_param_0];
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1;
|
||||
; COMMON-NEXT: cvt.u64.u16 %rd1, %rs2;
|
||||
; COMMON-NEXT: cvt.u64.u16 %rd2, %rs1;
|
||||
; COMMON-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1};
|
||||
@@ -926,7 +979,8 @@ define <2 x i16> @test_shufflevector(<2 x i16> %a) #0 {
|
||||
; COMMON-NEXT: .reg .b32 %r<2>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_shufflevector_param_0];
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_shufflevector_param_0];
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1;
|
||||
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
|
||||
; COMMON-NEXT: ret;
|
||||
%s = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
|
||||
@@ -934,16 +988,29 @@ define <2 x i16> @test_shufflevector(<2 x i16> %a) #0 {
|
||||
}
|
||||
|
||||
define <2 x i16> @test_insertelement(<2 x i16> %a, i16 %x) #0 {
|
||||
; COMMON-LABEL: test_insertelement(
|
||||
; COMMON: {
|
||||
; COMMON-NEXT: .reg .b16 %rs<4>;
|
||||
; COMMON-NEXT: .reg .b32 %r<2>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1];
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs2, %rs3}, [test_insertelement_param_0];
|
||||
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
|
||||
; COMMON-NEXT: ret;
|
||||
; I16x2-LABEL: test_insertelement(
|
||||
; I16x2: {
|
||||
; I16x2-NEXT: .reg .b16 %rs<3>;
|
||||
; I16x2-NEXT: .reg .b32 %r<2>;
|
||||
; I16x2-EMPTY:
|
||||
; I16x2-NEXT: // %bb.0:
|
||||
; I16x2-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1];
|
||||
; I16x2-NEXT: ld.param.b32 %r1, [test_insertelement_param_0];
|
||||
; I16x2-NEXT: mov.b32 {%rs2, _}, %r1;
|
||||
; I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
|
||||
; I16x2-NEXT: ret;
|
||||
;
|
||||
; NO-I16x2-LABEL: test_insertelement(
|
||||
; NO-I16x2: {
|
||||
; NO-I16x2-NEXT: .reg .b16 %rs<3>;
|
||||
; NO-I16x2-NEXT: .reg .b32 %r<2>;
|
||||
; NO-I16x2-EMPTY:
|
||||
; NO-I16x2-NEXT: // %bb.0:
|
||||
; NO-I16x2-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1];
|
||||
; NO-I16x2-NEXT: ld.param.b32 %r1, [test_insertelement_param_0];
|
||||
; NO-I16x2-NEXT: { .reg .b16 tmp; mov.b32 {%rs2, tmp}, %r1; }
|
||||
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
|
||||
; NO-I16x2-NEXT: ret;
|
||||
%i = insertelement <2 x i16> %a, i16 %x, i64 1
|
||||
ret <2 x i16> %i
|
||||
}
|
||||
@@ -955,7 +1022,8 @@ define <2 x i16> @test_fptosi_2xhalf_to_2xi16(<2 x half> %a) #0 {
|
||||
; COMMON-NEXT: .reg .b32 %r<2>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptosi_2xhalf_to_2xi16_param_0];
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_fptosi_2xhalf_to_2xi16_param_0];
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1;
|
||||
; COMMON-NEXT: cvt.rzi.s16.f16 %rs3, %rs2;
|
||||
; COMMON-NEXT: cvt.rzi.s16.f16 %rs4, %rs1;
|
||||
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3};
|
||||
@@ -971,7 +1039,8 @@ define <2 x i16> @test_fptoui_2xhalf_to_2xi16(<2 x half> %a) #0 {
|
||||
; COMMON-NEXT: .reg .b32 %r<2>;
|
||||
; COMMON-EMPTY:
|
||||
; COMMON-NEXT: // %bb.0:
|
||||
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptoui_2xhalf_to_2xi16_param_0];
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_fptoui_2xhalf_to_2xi16_param_0];
|
||||
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1;
|
||||
; COMMON-NEXT: cvt.rzi.u16.f16 %rs3, %rs2;
|
||||
; COMMON-NEXT: cvt.rzi.u16.f16 %rs4, %rs1;
|
||||
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3};
|
||||
|
||||
@@ -1935,16 +1935,18 @@ define <4 x i8> @test_fptosi_4xhalf_to_4xi8(<4 x half> %a) #0 {
|
||||
; O0-NEXT: .reg .b32 %r<12>;
|
||||
; O0-EMPTY:
|
||||
; O0-NEXT: // %bb.0:
|
||||
; O0-NEXT: ld.param.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [test_fptosi_4xhalf_to_4xi8_param_0];
|
||||
; O0-NEXT: cvt.rzi.s16.f16 %rs5, %rs4;
|
||||
; O0-NEXT: cvt.rzi.s16.f16 %rs6, %rs3;
|
||||
; O0-NEXT: mov.b32 %r3, {%rs6, %rs5};
|
||||
; O0-NEXT: mov.b32 {%rs7, %rs8}, %r3;
|
||||
; O0-NEXT: cvt.u32.u16 %r4, %rs8;
|
||||
; O0-NEXT: cvt.u32.u16 %r5, %rs7;
|
||||
; O0-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptosi_4xhalf_to_4xi8_param_0];
|
||||
; O0-NEXT: mov.b32 {%rs1, %rs2}, %r2;
|
||||
; O0-NEXT: cvt.rzi.s16.f16 %rs3, %rs2;
|
||||
; O0-NEXT: cvt.rzi.s16.f16 %rs4, %rs1;
|
||||
; O0-NEXT: mov.b32 %r3, {%rs4, %rs3};
|
||||
; O0-NEXT: mov.b32 {%rs5, %rs6}, %r3;
|
||||
; O0-NEXT: cvt.u32.u16 %r4, %rs6;
|
||||
; O0-NEXT: cvt.u32.u16 %r5, %rs5;
|
||||
; O0-NEXT: prmt.b32 %r6, %r5, %r4, 0x3340U;
|
||||
; O0-NEXT: cvt.rzi.s16.f16 %rs9, %rs2;
|
||||
; O0-NEXT: cvt.rzi.s16.f16 %rs10, %rs1;
|
||||
; O0-NEXT: mov.b32 {%rs7, %rs8}, %r1;
|
||||
; O0-NEXT: cvt.rzi.s16.f16 %rs9, %rs8;
|
||||
; O0-NEXT: cvt.rzi.s16.f16 %rs10, %rs7;
|
||||
; O0-NEXT: mov.b32 %r7, {%rs10, %rs9};
|
||||
; O0-NEXT: mov.b32 {%rs11, %rs12}, %r7;
|
||||
; O0-NEXT: cvt.u32.u16 %r8, %rs12;
|
||||
@@ -1989,16 +1991,18 @@ define <4 x i8> @test_fptoui_4xhalf_to_4xi8(<4 x half> %a) #0 {
|
||||
; O0-NEXT: .reg .b32 %r<12>;
|
||||
; O0-EMPTY:
|
||||
; O0-NEXT: // %bb.0:
|
||||
; O0-NEXT: ld.param.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [test_fptoui_4xhalf_to_4xi8_param_0];
|
||||
; O0-NEXT: cvt.rzi.u16.f16 %rs5, %rs4;
|
||||
; O0-NEXT: cvt.rzi.u16.f16 %rs6, %rs3;
|
||||
; O0-NEXT: mov.b32 %r3, {%rs6, %rs5};
|
||||
; O0-NEXT: mov.b32 {%rs7, %rs8}, %r3;
|
||||
; O0-NEXT: cvt.u32.u16 %r4, %rs8;
|
||||
; O0-NEXT: cvt.u32.u16 %r5, %rs7;
|
||||
; O0-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptoui_4xhalf_to_4xi8_param_0];
|
||||
; O0-NEXT: mov.b32 {%rs1, %rs2}, %r2;
|
||||
; O0-NEXT: cvt.rzi.u16.f16 %rs3, %rs2;
|
||||
; O0-NEXT: cvt.rzi.u16.f16 %rs4, %rs1;
|
||||
; O0-NEXT: mov.b32 %r3, {%rs4, %rs3};
|
||||
; O0-NEXT: mov.b32 {%rs5, %rs6}, %r3;
|
||||
; O0-NEXT: cvt.u32.u16 %r4, %rs6;
|
||||
; O0-NEXT: cvt.u32.u16 %r5, %rs5;
|
||||
; O0-NEXT: prmt.b32 %r6, %r5, %r4, 0x3340U;
|
||||
; O0-NEXT: cvt.rzi.u16.f16 %rs9, %rs2;
|
||||
; O0-NEXT: cvt.rzi.u16.f16 %rs10, %rs1;
|
||||
; O0-NEXT: mov.b32 {%rs7, %rs8}, %r1;
|
||||
; O0-NEXT: cvt.rzi.u16.f16 %rs9, %rs8;
|
||||
; O0-NEXT: cvt.rzi.u16.f16 %rs10, %rs7;
|
||||
; O0-NEXT: mov.b32 %r7, {%rs10, %rs9};
|
||||
; O0-NEXT: mov.b32 {%rs11, %rs12}, %r7;
|
||||
; O0-NEXT: cvt.u32.u16 %r8, %rs12;
|
||||
|
||||
41
llvm/test/CodeGen/NVPTX/pr126337.ll
Normal file
41
llvm/test/CodeGen/NVPTX/pr126337.ll
Normal file
@@ -0,0 +1,41 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 | %ptxas-verify %}
|
||||
|
||||
; This IR should compile without triggering assertions in LICM
|
||||
; when the CopyToReg from %0 in the first BB gets eliminated
|
||||
; but we still use its result in the second BB.
|
||||
; Technically the problem happens in MIR, but there are multiple
|
||||
; passes involved, so testing with the IR reproducer is more convenient.
|
||||
; https://github.com/llvm/llvm-project/pull/126337#issuecomment-3081431594
|
||||
|
||||
target datalayout = "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
define ptx_kernel void @Equal_GPU_DT_COMPLEX64_DT_BOOL_kernel(<2 x float> %0) {
|
||||
; CHECK-LABEL: Equal_GPU_DT_COMPLEX64_DT_BOOL_kernel(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0: // %.preheader15
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [Equal_GPU_DT_COMPLEX64_DT_BOOL_kernel_param_0];
|
||||
; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {%r1, tmp}, %rd1; }
|
||||
; CHECK-NEXT: setp.eq.f32 %p1, %r1, 0f00000000;
|
||||
; CHECK-NEXT: selp.b16 %rs1, 1, 0, %p1;
|
||||
; CHECK-NEXT: $L__BB0_1: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: mov.b64 %rd2, 0;
|
||||
; CHECK-NEXT: st.b8 [%rd2], %rs1;
|
||||
; CHECK-NEXT: bra.uni $L__BB0_1;
|
||||
.preheader15:
|
||||
br label %1
|
||||
|
||||
1: ; preds = %1, %.preheader15
|
||||
%2 = fcmp oeq <2 x float> %0, zeroinitializer
|
||||
%3 = extractelement <2 x i1> %2, i64 0
|
||||
store i1 %3, ptr null, align 4
|
||||
br label %1
|
||||
}
|
||||
|
||||
@@ -117,16 +117,20 @@ define float @reduce_fadd_float(<8 x float> %in) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_float_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fadd_float_param_0];
|
||||
; CHECK-NEXT: add.rn.f32 %r9, %r5, 0f00000000;
|
||||
; CHECK-NEXT: add.rn.f32 %r10, %r9, %r6;
|
||||
; CHECK-NEXT: add.rn.f32 %r11, %r10, %r7;
|
||||
; CHECK-NEXT: add.rn.f32 %r12, %r11, %r8;
|
||||
; CHECK-NEXT: add.rn.f32 %r13, %r12, %r1;
|
||||
; CHECK-NEXT: add.rn.f32 %r14, %r13, %r2;
|
||||
; CHECK-NEXT: add.rn.f32 %r15, %r14, %r3;
|
||||
; CHECK-NEXT: add.rn.f32 %r16, %r15, %r4;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fadd_float_param_0+16];
|
||||
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd3;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fadd_float_param_0];
|
||||
; CHECK-NEXT: mov.b64 {%r5, %r6}, %rd2;
|
||||
; CHECK-NEXT: mov.b64 {%r7, %r8}, %rd1;
|
||||
; CHECK-NEXT: add.rn.f32 %r9, %r7, 0f00000000;
|
||||
; CHECK-NEXT: add.rn.f32 %r10, %r9, %r8;
|
||||
; CHECK-NEXT: add.rn.f32 %r11, %r10, %r5;
|
||||
; CHECK-NEXT: add.rn.f32 %r12, %r11, %r6;
|
||||
; CHECK-NEXT: add.rn.f32 %r13, %r12, %r3;
|
||||
; CHECK-NEXT: add.rn.f32 %r14, %r13, %r4;
|
||||
; CHECK-NEXT: add.rn.f32 %r15, %r14, %r1;
|
||||
; CHECK-NEXT: add.rn.f32 %r16, %r15, %r2;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r16;
|
||||
; CHECK-NEXT: ret;
|
||||
%res = call float @llvm.vector.reduce.fadd(float 0.0, <8 x float> %in)
|
||||
@@ -140,14 +144,18 @@ define float @reduce_fadd_float_reassoc(<8 x float> %in) {
|
||||
; CHECK-SM80-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-SM80-EMPTY:
|
||||
; CHECK-SM80-NEXT: // %bb.0:
|
||||
; CHECK-SM80-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_float_reassoc_param_0+16];
|
||||
; CHECK-SM80-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fadd_float_reassoc_param_0];
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r9, %r7, %r3;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r10, %r5, %r1;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r11, %r8, %r4;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r12, %r6, %r2;
|
||||
; CHECK-SM80-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fadd_float_reassoc_param_0+16];
|
||||
; CHECK-SM80-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fadd_float_reassoc_param_0];
|
||||
; CHECK-SM80-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-SM80-NEXT: mov.b64 {%r3, %r4}, %rd2;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r5, %r3, %r1;
|
||||
; CHECK-SM80-NEXT: mov.b64 {%r6, %r7}, %rd3;
|
||||
; CHECK-SM80-NEXT: mov.b64 {%r8, %r9}, %rd1;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r10, %r8, %r6;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r11, %r4, %r2;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r12, %r9, %r7;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r13, %r12, %r11;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r14, %r10, %r9;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r14, %r10, %r5;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r15, %r14, %r13;
|
||||
; CHECK-SM80-NEXT: add.rn.f32 %r16, %r15, 0f00000000;
|
||||
; CHECK-SM80-NEXT: st.param.b32 [func_retval0], %r16;
|
||||
@@ -321,15 +329,19 @@ define float @reduce_fmul_float(<8 x float> %in) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmul_float_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmul_float_param_0];
|
||||
; CHECK-NEXT: mul.rn.f32 %r9, %r5, %r6;
|
||||
; CHECK-NEXT: mul.rn.f32 %r10, %r9, %r7;
|
||||
; CHECK-NEXT: mul.rn.f32 %r11, %r10, %r8;
|
||||
; CHECK-NEXT: mul.rn.f32 %r12, %r11, %r1;
|
||||
; CHECK-NEXT: mul.rn.f32 %r13, %r12, %r2;
|
||||
; CHECK-NEXT: mul.rn.f32 %r14, %r13, %r3;
|
||||
; CHECK-NEXT: mul.rn.f32 %r15, %r14, %r4;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fmul_float_param_0+16];
|
||||
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd3;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fmul_float_param_0];
|
||||
; CHECK-NEXT: mov.b64 {%r5, %r6}, %rd2;
|
||||
; CHECK-NEXT: mov.b64 {%r7, %r8}, %rd1;
|
||||
; CHECK-NEXT: mul.rn.f32 %r9, %r7, %r8;
|
||||
; CHECK-NEXT: mul.rn.f32 %r10, %r9, %r5;
|
||||
; CHECK-NEXT: mul.rn.f32 %r11, %r10, %r6;
|
||||
; CHECK-NEXT: mul.rn.f32 %r12, %r11, %r3;
|
||||
; CHECK-NEXT: mul.rn.f32 %r13, %r12, %r4;
|
||||
; CHECK-NEXT: mul.rn.f32 %r14, %r13, %r1;
|
||||
; CHECK-NEXT: mul.rn.f32 %r15, %r14, %r2;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
|
||||
; CHECK-NEXT: ret;
|
||||
%res = call float @llvm.vector.reduce.fmul(float 1.0, <8 x float> %in)
|
||||
@@ -343,14 +355,18 @@ define float @reduce_fmul_float_reassoc(<8 x float> %in) {
|
||||
; CHECK-SM80-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-SM80-EMPTY:
|
||||
; CHECK-SM80-NEXT: // %bb.0:
|
||||
; CHECK-SM80-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmul_float_reassoc_param_0+16];
|
||||
; CHECK-SM80-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmul_float_reassoc_param_0];
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r9, %r7, %r3;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r10, %r5, %r1;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r11, %r8, %r4;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r12, %r6, %r2;
|
||||
; CHECK-SM80-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fmul_float_reassoc_param_0+16];
|
||||
; CHECK-SM80-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fmul_float_reassoc_param_0];
|
||||
; CHECK-SM80-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-SM80-NEXT: mov.b64 {%r3, %r4}, %rd2;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r5, %r3, %r1;
|
||||
; CHECK-SM80-NEXT: mov.b64 {%r6, %r7}, %rd3;
|
||||
; CHECK-SM80-NEXT: mov.b64 {%r8, %r9}, %rd1;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r10, %r8, %r6;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r11, %r4, %r2;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r12, %r9, %r7;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r13, %r12, %r11;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r14, %r10, %r9;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r14, %r10, %r5;
|
||||
; CHECK-SM80-NEXT: mul.rn.f32 %r15, %r14, %r13;
|
||||
; CHECK-SM80-NEXT: st.param.b32 [func_retval0], %r15;
|
||||
; CHECK-SM80-NEXT: ret;
|
||||
@@ -494,13 +510,17 @@ define float @reduce_fmax_float(<8 x float> %in) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmax_float_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmax_float_param_0];
|
||||
; CHECK-NEXT: max.f32 %r9, %r8, %r4;
|
||||
; CHECK-NEXT: max.f32 %r10, %r6, %r2;
|
||||
; CHECK-NEXT: max.f32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: max.f32 %r12, %r7, %r3;
|
||||
; CHECK-NEXT: max.f32 %r13, %r5, %r1;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fmax_float_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fmax_float_param_0];
|
||||
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2;
|
||||
; CHECK-NEXT: max.f32 %r5, %r4, %r2;
|
||||
; CHECK-NEXT: mov.b64 {%r6, %r7}, %rd3;
|
||||
; CHECK-NEXT: mov.b64 {%r8, %r9}, %rd1;
|
||||
; CHECK-NEXT: max.f32 %r10, %r9, %r7;
|
||||
; CHECK-NEXT: max.f32 %r11, %r10, %r5;
|
||||
; CHECK-NEXT: max.f32 %r12, %r3, %r1;
|
||||
; CHECK-NEXT: max.f32 %r13, %r8, %r6;
|
||||
; CHECK-NEXT: max.f32 %r14, %r13, %r12;
|
||||
; CHECK-NEXT: max.f32 %r15, %r14, %r11;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
|
||||
@@ -517,13 +537,17 @@ define float @reduce_fmax_float_reassoc(<8 x float> %in) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmax_float_reassoc_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmax_float_reassoc_param_0];
|
||||
; CHECK-NEXT: max.f32 %r9, %r8, %r4;
|
||||
; CHECK-NEXT: max.f32 %r10, %r6, %r2;
|
||||
; CHECK-NEXT: max.f32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: max.f32 %r12, %r7, %r3;
|
||||
; CHECK-NEXT: max.f32 %r13, %r5, %r1;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fmax_float_reassoc_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fmax_float_reassoc_param_0];
|
||||
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2;
|
||||
; CHECK-NEXT: max.f32 %r5, %r4, %r2;
|
||||
; CHECK-NEXT: mov.b64 {%r6, %r7}, %rd3;
|
||||
; CHECK-NEXT: mov.b64 {%r8, %r9}, %rd1;
|
||||
; CHECK-NEXT: max.f32 %r10, %r9, %r7;
|
||||
; CHECK-NEXT: max.f32 %r11, %r10, %r5;
|
||||
; CHECK-NEXT: max.f32 %r12, %r3, %r1;
|
||||
; CHECK-NEXT: max.f32 %r13, %r8, %r6;
|
||||
; CHECK-NEXT: max.f32 %r14, %r13, %r12;
|
||||
; CHECK-NEXT: max.f32 %r15, %r14, %r11;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
|
||||
@@ -628,13 +652,17 @@ define float @reduce_fmin_float(<8 x float> %in) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmin_float_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmin_float_param_0];
|
||||
; CHECK-NEXT: min.f32 %r9, %r8, %r4;
|
||||
; CHECK-NEXT: min.f32 %r10, %r6, %r2;
|
||||
; CHECK-NEXT: min.f32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: min.f32 %r12, %r7, %r3;
|
||||
; CHECK-NEXT: min.f32 %r13, %r5, %r1;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fmin_float_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fmin_float_param_0];
|
||||
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2;
|
||||
; CHECK-NEXT: min.f32 %r5, %r4, %r2;
|
||||
; CHECK-NEXT: mov.b64 {%r6, %r7}, %rd3;
|
||||
; CHECK-NEXT: mov.b64 {%r8, %r9}, %rd1;
|
||||
; CHECK-NEXT: min.f32 %r10, %r9, %r7;
|
||||
; CHECK-NEXT: min.f32 %r11, %r10, %r5;
|
||||
; CHECK-NEXT: min.f32 %r12, %r3, %r1;
|
||||
; CHECK-NEXT: min.f32 %r13, %r8, %r6;
|
||||
; CHECK-NEXT: min.f32 %r14, %r13, %r12;
|
||||
; CHECK-NEXT: min.f32 %r15, %r14, %r11;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
|
||||
@@ -651,13 +679,17 @@ define float @reduce_fmin_float_reassoc(<8 x float> %in) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmin_float_reassoc_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmin_float_reassoc_param_0];
|
||||
; CHECK-NEXT: min.f32 %r9, %r8, %r4;
|
||||
; CHECK-NEXT: min.f32 %r10, %r6, %r2;
|
||||
; CHECK-NEXT: min.f32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: min.f32 %r12, %r7, %r3;
|
||||
; CHECK-NEXT: min.f32 %r13, %r5, %r1;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fmin_float_reassoc_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fmin_float_reassoc_param_0];
|
||||
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2;
|
||||
; CHECK-NEXT: min.f32 %r5, %r4, %r2;
|
||||
; CHECK-NEXT: mov.b64 {%r6, %r7}, %rd3;
|
||||
; CHECK-NEXT: mov.b64 {%r8, %r9}, %rd1;
|
||||
; CHECK-NEXT: min.f32 %r10, %r9, %r7;
|
||||
; CHECK-NEXT: min.f32 %r11, %r10, %r5;
|
||||
; CHECK-NEXT: min.f32 %r12, %r3, %r1;
|
||||
; CHECK-NEXT: min.f32 %r13, %r8, %r6;
|
||||
; CHECK-NEXT: min.f32 %r14, %r13, %r12;
|
||||
; CHECK-NEXT: min.f32 %r15, %r14, %r11;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
|
||||
@@ -762,13 +794,17 @@ define float @reduce_fmaximum_float(<8 x float> %in) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmaximum_float_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmaximum_float_param_0];
|
||||
; CHECK-NEXT: max.NaN.f32 %r9, %r8, %r4;
|
||||
; CHECK-NEXT: max.NaN.f32 %r10, %r6, %r2;
|
||||
; CHECK-NEXT: max.NaN.f32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: max.NaN.f32 %r12, %r7, %r3;
|
||||
; CHECK-NEXT: max.NaN.f32 %r13, %r5, %r1;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fmaximum_float_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fmaximum_float_param_0];
|
||||
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2;
|
||||
; CHECK-NEXT: max.NaN.f32 %r5, %r4, %r2;
|
||||
; CHECK-NEXT: mov.b64 {%r6, %r7}, %rd3;
|
||||
; CHECK-NEXT: mov.b64 {%r8, %r9}, %rd1;
|
||||
; CHECK-NEXT: max.NaN.f32 %r10, %r9, %r7;
|
||||
; CHECK-NEXT: max.NaN.f32 %r11, %r10, %r5;
|
||||
; CHECK-NEXT: max.NaN.f32 %r12, %r3, %r1;
|
||||
; CHECK-NEXT: max.NaN.f32 %r13, %r8, %r6;
|
||||
; CHECK-NEXT: max.NaN.f32 %r14, %r13, %r12;
|
||||
; CHECK-NEXT: max.NaN.f32 %r15, %r14, %r11;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
|
||||
@@ -785,13 +821,17 @@ define float @reduce_fmaximum_float_reassoc(<8 x float> %in) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmaximum_float_reassoc_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmaximum_float_reassoc_param_0];
|
||||
; CHECK-NEXT: max.NaN.f32 %r9, %r8, %r4;
|
||||
; CHECK-NEXT: max.NaN.f32 %r10, %r6, %r2;
|
||||
; CHECK-NEXT: max.NaN.f32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: max.NaN.f32 %r12, %r7, %r3;
|
||||
; CHECK-NEXT: max.NaN.f32 %r13, %r5, %r1;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fmaximum_float_reassoc_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fmaximum_float_reassoc_param_0];
|
||||
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2;
|
||||
; CHECK-NEXT: max.NaN.f32 %r5, %r4, %r2;
|
||||
; CHECK-NEXT: mov.b64 {%r6, %r7}, %rd3;
|
||||
; CHECK-NEXT: mov.b64 {%r8, %r9}, %rd1;
|
||||
; CHECK-NEXT: max.NaN.f32 %r10, %r9, %r7;
|
||||
; CHECK-NEXT: max.NaN.f32 %r11, %r10, %r5;
|
||||
; CHECK-NEXT: max.NaN.f32 %r12, %r3, %r1;
|
||||
; CHECK-NEXT: max.NaN.f32 %r13, %r8, %r6;
|
||||
; CHECK-NEXT: max.NaN.f32 %r14, %r13, %r12;
|
||||
; CHECK-NEXT: max.NaN.f32 %r15, %r14, %r11;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
|
||||
@@ -896,13 +936,17 @@ define float @reduce_fminimum_float(<8 x float> %in) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fminimum_float_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fminimum_float_param_0];
|
||||
; CHECK-NEXT: min.NaN.f32 %r9, %r8, %r4;
|
||||
; CHECK-NEXT: min.NaN.f32 %r10, %r6, %r2;
|
||||
; CHECK-NEXT: min.NaN.f32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: min.NaN.f32 %r12, %r7, %r3;
|
||||
; CHECK-NEXT: min.NaN.f32 %r13, %r5, %r1;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fminimum_float_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fminimum_float_param_0];
|
||||
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2;
|
||||
; CHECK-NEXT: min.NaN.f32 %r5, %r4, %r2;
|
||||
; CHECK-NEXT: mov.b64 {%r6, %r7}, %rd3;
|
||||
; CHECK-NEXT: mov.b64 {%r8, %r9}, %rd1;
|
||||
; CHECK-NEXT: min.NaN.f32 %r10, %r9, %r7;
|
||||
; CHECK-NEXT: min.NaN.f32 %r11, %r10, %r5;
|
||||
; CHECK-NEXT: min.NaN.f32 %r12, %r3, %r1;
|
||||
; CHECK-NEXT: min.NaN.f32 %r13, %r8, %r6;
|
||||
; CHECK-NEXT: min.NaN.f32 %r14, %r13, %r12;
|
||||
; CHECK-NEXT: min.NaN.f32 %r15, %r14, %r11;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
|
||||
@@ -919,13 +963,17 @@ define float @reduce_fminimum_float_reassoc(<8 x float> %in) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fminimum_float_reassoc_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fminimum_float_reassoc_param_0];
|
||||
; CHECK-NEXT: min.NaN.f32 %r9, %r8, %r4;
|
||||
; CHECK-NEXT: min.NaN.f32 %r10, %r6, %r2;
|
||||
; CHECK-NEXT: min.NaN.f32 %r11, %r10, %r9;
|
||||
; CHECK-NEXT: min.NaN.f32 %r12, %r7, %r3;
|
||||
; CHECK-NEXT: min.NaN.f32 %r13, %r5, %r1;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fminimum_float_reassoc_param_0+16];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fminimum_float_reassoc_param_0];
|
||||
; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4;
|
||||
; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2;
|
||||
; CHECK-NEXT: min.NaN.f32 %r5, %r4, %r2;
|
||||
; CHECK-NEXT: mov.b64 {%r6, %r7}, %rd3;
|
||||
; CHECK-NEXT: mov.b64 {%r8, %r9}, %rd1;
|
||||
; CHECK-NEXT: min.NaN.f32 %r10, %r9, %r7;
|
||||
; CHECK-NEXT: min.NaN.f32 %r11, %r10, %r5;
|
||||
; CHECK-NEXT: min.NaN.f32 %r12, %r3, %r1;
|
||||
; CHECK-NEXT: min.NaN.f32 %r13, %r8, %r6;
|
||||
; CHECK-NEXT: min.NaN.f32 %r14, %r13, %r12;
|
||||
; CHECK-NEXT: min.NaN.f32 %r15, %r14, %r11;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
|
||||
|
||||
Reference in New Issue
Block a user