mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
[AArch64][ARM] Remove load from dup and vmul tests. NFC
These tests needn't use loads in their testing of dup and mul instructions, and as the load changes the test may no longer test what they are intending (as in D140069).
This commit is contained in:
@@ -6,15 +6,15 @@ define <8 x i8> @v_dup8(i8 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.8b v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
|
||||
%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
|
||||
%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
|
||||
%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
|
||||
%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
|
||||
%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
|
||||
%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
|
||||
%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
|
||||
ret <8 x i8> %tmp8
|
||||
%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
|
||||
%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
|
||||
%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
|
||||
%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
|
||||
%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
|
||||
%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
|
||||
%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
|
||||
%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
|
||||
ret <8 x i8> %tmp8
|
||||
}
|
||||
|
||||
define <4 x i16> @v_dup16(i16 %A) nounwind {
|
||||
@@ -22,11 +22,11 @@ define <4 x i16> @v_dup16(i16 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.4h v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
|
||||
%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
|
||||
%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
|
||||
%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
|
||||
ret <4 x i16> %tmp4
|
||||
%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
|
||||
%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
|
||||
%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
|
||||
%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
|
||||
ret <4 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <2 x i32> @v_dup32(i32 %A) nounwind {
|
||||
@@ -34,9 +34,9 @@ define <2 x i32> @v_dup32(i32 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.2s v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
|
||||
%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
|
||||
ret <2 x i32> %tmp2
|
||||
%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
|
||||
%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
|
||||
ret <2 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <2 x float> @v_dupfloat(float %A) nounwind {
|
||||
@@ -45,9 +45,9 @@ define <2 x float> @v_dupfloat(float %A) nounwind {
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
||||
; CHECK-NEXT: dup.2s v0, v0[0]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
|
||||
%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
|
||||
ret <2 x float> %tmp2
|
||||
%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
|
||||
%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
|
||||
ret <2 x float> %tmp2
|
||||
}
|
||||
|
||||
define <16 x i8> @v_dupQ8(i8 %A) nounwind {
|
||||
@@ -55,23 +55,23 @@ define <16 x i8> @v_dupQ8(i8 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.16b v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
|
||||
%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
|
||||
%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
|
||||
%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
|
||||
%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
|
||||
%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
|
||||
%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
|
||||
%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
|
||||
%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
|
||||
%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
|
||||
%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
|
||||
%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
|
||||
%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
|
||||
%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
|
||||
%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
|
||||
%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
|
||||
ret <16 x i8> %tmp16
|
||||
%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
|
||||
%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
|
||||
%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
|
||||
%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
|
||||
%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
|
||||
%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
|
||||
%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
|
||||
%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
|
||||
%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
|
||||
%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
|
||||
%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
|
||||
%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
|
||||
%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
|
||||
%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
|
||||
%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
|
||||
%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
|
||||
ret <16 x i8> %tmp16
|
||||
}
|
||||
|
||||
define <8 x i16> @v_dupQ16(i16 %A) nounwind {
|
||||
@@ -79,15 +79,15 @@ define <8 x i16> @v_dupQ16(i16 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.8h v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
|
||||
%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
|
||||
%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
|
||||
%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
|
||||
%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
|
||||
%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
|
||||
%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
|
||||
%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
|
||||
ret <8 x i16> %tmp8
|
||||
%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
|
||||
%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
|
||||
%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
|
||||
%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
|
||||
%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
|
||||
%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
|
||||
%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
|
||||
%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
|
||||
ret <8 x i16> %tmp8
|
||||
}
|
||||
|
||||
define <4 x i32> @v_dupQ32(i32 %A) nounwind {
|
||||
@@ -95,11 +95,11 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.4s v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
|
||||
%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
|
||||
%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
|
||||
%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
|
||||
ret <4 x i32> %tmp4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
|
||||
%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
|
||||
%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
|
||||
%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
|
||||
ret <4 x i32> %tmp4
|
||||
}
|
||||
|
||||
define <4 x float> @v_dupQfloat(float %A) nounwind {
|
||||
@@ -108,11 +108,11 @@ define <4 x float> @v_dupQfloat(float %A) nounwind {
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
||||
; CHECK-NEXT: dup.4s v0, v0[0]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
|
||||
%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
|
||||
%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
|
||||
%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
|
||||
ret <4 x float> %tmp4
|
||||
%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
|
||||
%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
|
||||
%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
|
||||
%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
|
||||
ret <4 x float> %tmp4
|
||||
}
|
||||
|
||||
; Check to make sure it works with shuffles, too.
|
||||
@@ -122,9 +122,9 @@ define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.8b v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i8> %tmp2
|
||||
%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
|
||||
@@ -132,9 +132,9 @@ define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.4h v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
|
||||
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x i16> %tmp2
|
||||
%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
|
||||
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
|
||||
@@ -142,9 +142,9 @@ define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.2s v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x i32> %tmp2
|
||||
%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <2 x float> @v_shuffledupfloat(float %A) nounwind {
|
||||
@@ -153,9 +153,9 @@ define <2 x float> @v_shuffledupfloat(float %A) nounwind {
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
||||
; CHECK-NEXT: dup.2s v0, v0[0]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <2 x float> undef, float %A, i32 0
|
||||
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x float> %tmp2
|
||||
%tmp1 = insertelement <2 x float> undef, float %A, i32 0
|
||||
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x float> %tmp2
|
||||
}
|
||||
|
||||
define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
|
||||
@@ -163,9 +163,9 @@ define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.16b v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
|
||||
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i8> %tmp2
|
||||
%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
|
||||
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
|
||||
@@ -173,9 +173,9 @@ define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.8h v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
|
||||
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i16> %tmp2
|
||||
%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
|
||||
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
|
||||
@@ -183,9 +183,9 @@ define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup.4s v0, w0
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
|
||||
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x i32> %tmp2
|
||||
%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
|
||||
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
|
||||
@@ -194,97 +194,89 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
||||
; CHECK-NEXT: dup.4s v0, v0[0]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = insertelement <4 x float> undef, float %A, i32 0
|
||||
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x float> %tmp2
|
||||
%tmp1 = insertelement <4 x float> undef, float %A, i32 0
|
||||
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i8> @vduplane8(ptr %A) nounwind {
|
||||
define <8 x i8> @vduplane8(<8 x i8> %A) nounwind {
|
||||
; CHECK-LABEL: vduplane8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: dup.8b v0, v0[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <8 x i8>, ptr %A
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <8 x i8> %tmp2
|
||||
%tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <8 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i16> @vduplane16(ptr %A) nounwind {
|
||||
define <4 x i16> @vduplane16(<4 x i16> %A) nounwind {
|
||||
; CHECK-LABEL: vduplane16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: dup.4h v0, v0[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <4 x i16> %tmp2
|
||||
%tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i32> @vduplane32(ptr %A) nounwind {
|
||||
define <2 x i32> @vduplane32(<2 x i32> %A) nounwind {
|
||||
; CHECK-LABEL: vduplane32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: dup.2s v0, v0[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
|
||||
ret <2 x i32> %tmp2
|
||||
%tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
|
||||
ret <2 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <2 x float> @vduplanefloat(ptr %A) nounwind {
|
||||
define <2 x float> @vduplanefloat(<2 x float> %A) nounwind {
|
||||
; CHECK-LABEL: vduplanefloat:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: dup.2s v0, v0[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x float>, ptr %A
|
||||
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
|
||||
ret <2 x float> %tmp2
|
||||
%tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
|
||||
ret <2 x float> %tmp2
|
||||
}
|
||||
|
||||
define <16 x i8> @vduplaneQ8(ptr %A) nounwind {
|
||||
define <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
|
||||
; CHECK-LABEL: vduplaneQ8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: dup.16b v0, v0[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <8 x i8>, ptr %A
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <16 x i8> %tmp2
|
||||
%tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <16 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i16> @vduplaneQ16(ptr %A) nounwind {
|
||||
define <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
|
||||
; CHECK-LABEL: vduplaneQ16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: dup.8h v0, v0[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <8 x i16> %tmp2
|
||||
%tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <8 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i32> @vduplaneQ32(ptr %A) nounwind {
|
||||
define <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
|
||||
; CHECK-LABEL: vduplaneQ32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: dup.4s v0, v0[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <4 x i32> %tmp2
|
||||
%tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <4 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <4 x float> @vduplaneQfloat(ptr %A) nounwind {
|
||||
define <4 x float> @vduplaneQfloat(<2 x float> %A) nounwind {
|
||||
; CHECK-LABEL: vduplaneQfloat:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: dup.4s v0, v0[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x float>, ptr %A
|
||||
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <4 x float> %tmp2
|
||||
%tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
|
||||
|
||||
@@ -1081,59 +1081,45 @@ declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x f
|
||||
declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
|
||||
|
||||
define <4 x i16> @mul_4h(ptr %A, ptr %B) nounwind {
|
||||
define <4 x i16> @mul_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: mul_4h:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: mul.4h v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = mul <4 x i16> %tmp1, %tmp3
|
||||
%tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = mul <4 x i16> %A, %tmp3
|
||||
ret <4 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <8 x i16> @mul_8h(ptr %A, ptr %B) nounwind {
|
||||
define <8 x i16> @mul_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: mul_8h:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr q1, [x1]
|
||||
; CHECK-NEXT: mul.8h v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <8 x i16>, ptr %A
|
||||
%tmp2 = load <8 x i16>, ptr %B
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = mul <8 x i16> %tmp1, %tmp3
|
||||
%tmp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = mul <8 x i16> %A, %tmp3
|
||||
ret <8 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <2 x i32> @mul_2s(ptr %A, ptr %B) nounwind {
|
||||
define <2 x i32> @mul_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: mul_2s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: mul.2s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = mul <2 x i32> %tmp1, %tmp3
|
||||
%tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = mul <2 x i32> %A, %tmp3
|
||||
ret <2 x i32> %tmp4
|
||||
}
|
||||
|
||||
define <4 x i32> @mul_4s(ptr %A, ptr %B) nounwind {
|
||||
define <4 x i32> @mul_4s(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: mul_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr q1, [x1]
|
||||
; CHECK-NEXT: mul.4s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i32>, ptr %A
|
||||
%tmp2 = load <4 x i32>, ptr %B
|
||||
%tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = mul <4 x i32> %tmp1, %tmp3
|
||||
%tmp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = mul <4 x i32> %A, %tmp3
|
||||
ret <4 x i32> %tmp4
|
||||
}
|
||||
|
||||
@@ -1153,45 +1139,34 @@ define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind {
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <2 x float> @fmul_lane_2s(ptr %A, ptr %B) nounwind {
|
||||
define <2 x float> @fmul_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {
|
||||
; CHECK-LABEL: fmul_lane_2s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: fmul.2s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x float>, ptr %A
|
||||
%tmp2 = load <2 x float>, ptr %B
|
||||
%tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = fmul <2 x float> %tmp1, %tmp3
|
||||
%tmp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = fmul <2 x float> %A, %tmp3
|
||||
ret <2 x float> %tmp4
|
||||
}
|
||||
|
||||
define <4 x float> @fmul_lane_4s(ptr %A, ptr %B) nounwind {
|
||||
define <4 x float> @fmul_lane_4s(<4 x float> %A, <4 x float> %B) nounwind {
|
||||
; CHECK-LABEL: fmul_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr q1, [x1]
|
||||
; CHECK-NEXT: fmul.4s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x float>, ptr %A
|
||||
%tmp2 = load <4 x float>, ptr %B
|
||||
%tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = fmul <4 x float> %tmp1, %tmp3
|
||||
%tmp3 = shufflevector <4 x float> %B, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = fmul <4 x float> %A, %tmp3
|
||||
ret <4 x float> %tmp4
|
||||
}
|
||||
|
||||
define <2 x double> @fmul_lane_2d(ptr %A, ptr %B) nounwind {
|
||||
define <2 x double> @fmul_lane_2d(<2 x double> %A, <2 x double> %B) nounwind {
|
||||
; CHECK-LABEL: fmul_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr q1, [x1]
|
||||
; CHECK-NEXT: fmul.2d v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x double>, ptr %A
|
||||
%tmp2 = load <2 x double>, ptr %B
|
||||
%tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = fmul <2 x double> %tmp1, %tmp3
|
||||
%tmp3 = shufflevector <2 x double> %B, <2 x double> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = fmul <2 x double> %A, %tmp3
|
||||
ret <2 x double> %tmp4
|
||||
}
|
||||
|
||||
@@ -1217,101 +1192,76 @@ define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
|
||||
|
||||
|
||||
|
||||
define <2 x float> @fmulx_lane_2s(ptr %A, ptr %B) nounwind {
|
||||
define <2 x float> @fmulx_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {
|
||||
; CHECK-LABEL: fmulx_lane_2s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: fmulx.2s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x float>, ptr %A
|
||||
%tmp2 = load <2 x float>, ptr %B
|
||||
%tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
|
||||
%tmp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %A, <2 x float> %tmp3)
|
||||
ret <2 x float> %tmp4
|
||||
}
|
||||
|
||||
define <4 x float> @fmulx_lane_4s(ptr %A, ptr %B) nounwind {
|
||||
define <4 x float> @fmulx_lane_4s(<4 x float> %A, <4 x float> %B) nounwind {
|
||||
; CHECK-LABEL: fmulx_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr q1, [x1]
|
||||
; CHECK-NEXT: fmulx.4s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x float>, ptr %A
|
||||
%tmp2 = load <4 x float>, ptr %B
|
||||
%tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
|
||||
%tmp3 = shufflevector <4 x float> %B, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %A, <4 x float> %tmp3)
|
||||
ret <4 x float> %tmp4
|
||||
}
|
||||
|
||||
define <2 x double> @fmulx_lane_2d(ptr %A, ptr %B) nounwind {
|
||||
define <2 x double> @fmulx_lane_2d(<2 x double> %A, <2 x double> %B) nounwind {
|
||||
; CHECK-LABEL: fmulx_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr q1, [x1]
|
||||
; CHECK-NEXT: fmulx.2d v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x double>, ptr %A
|
||||
%tmp2 = load <2 x double>, ptr %B
|
||||
%tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
|
||||
%tmp3 = shufflevector <2 x double> %B, <2 x double> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %A, <2 x double> %tmp3)
|
||||
ret <2 x double> %tmp4
|
||||
}
|
||||
|
||||
define <4 x i16> @sqdmulh_lane_4h(ptr %A, ptr %B) nounwind {
|
||||
define <4 x i16> @sqdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: sqdmulh_lane_4h:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: sqdmulh.4h v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
|
||||
%tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %A, <4 x i16> %tmp3)
|
||||
ret <4 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <8 x i16> @sqdmulh_lane_8h(ptr %A, ptr %B) nounwind {
|
||||
define <8 x i16> @sqdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: sqdmulh_lane_8h:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr q1, [x1]
|
||||
; CHECK-NEXT: sqdmulh.8h v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <8 x i16>, ptr %A
|
||||
%tmp2 = load <8 x i16>, ptr %B
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
|
||||
%tmp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %A, <8 x i16> %tmp3)
|
||||
ret <8 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <2 x i32> @sqdmulh_lane_2s(ptr %A, ptr %B) nounwind {
|
||||
define <2 x i32> @sqdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: sqdmulh_lane_2s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: sqdmulh.2s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
|
||||
%tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %A, <2 x i32> %tmp3)
|
||||
ret <2 x i32> %tmp4
|
||||
}
|
||||
|
||||
define <4 x i32> @sqdmulh_lane_4s(ptr %A, ptr %B) nounwind {
|
||||
define <4 x i32> @sqdmulh_lane_4s(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: sqdmulh_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr q1, [x1]
|
||||
; CHECK-NEXT: sqdmulh.4s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i32>, ptr %A
|
||||
%tmp2 = load <4 x i32>, ptr %B
|
||||
%tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
|
||||
%tmp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %A, <4 x i32> %tmp3)
|
||||
ret <4 x i32> %tmp4
|
||||
}
|
||||
|
||||
@@ -1327,59 +1277,45 @@ define i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
|
||||
ret i32 %tmp2
|
||||
}
|
||||
|
||||
define <4 x i16> @sqrdmulh_lane_4h(ptr %A, ptr %B) nounwind {
|
||||
define <4 x i16> @sqrdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: sqrdmulh_lane_4h:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: sqrdmulh.4h v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
|
||||
%tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %A, <4 x i16> %tmp3)
|
||||
ret <4 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <8 x i16> @sqrdmulh_lane_8h(ptr %A, ptr %B) nounwind {
|
||||
define <8 x i16> @sqrdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: sqrdmulh_lane_8h:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr q1, [x1]
|
||||
; CHECK-NEXT: sqrdmulh.8h v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <8 x i16>, ptr %A
|
||||
%tmp2 = load <8 x i16>, ptr %B
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
|
||||
%tmp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %A, <8 x i16> %tmp3)
|
||||
ret <8 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <2 x i32> @sqrdmulh_lane_2s(ptr %A, ptr %B) nounwind {
|
||||
define <2 x i32> @sqrdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: sqrdmulh_lane_2s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: sqrdmulh.2s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
|
||||
%tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %A, <2 x i32> %tmp3)
|
||||
ret <2 x i32> %tmp4
|
||||
}
|
||||
|
||||
define <4 x i32> @sqrdmulh_lane_4s(ptr %A, ptr %B) nounwind {
|
||||
define <4 x i32> @sqrdmulh_lane_4s(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: sqrdmulh_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr q1, [x1]
|
||||
; CHECK-NEXT: sqrdmulh.4s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i32>, ptr %A
|
||||
%tmp2 = load <4 x i32>, ptr %B
|
||||
%tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
|
||||
%tmp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %A, <4 x i32> %tmp3)
|
||||
ret <4 x i32> %tmp4
|
||||
}
|
||||
|
||||
@@ -1395,221 +1331,169 @@ define i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
|
||||
ret i32 %tmp2
|
||||
}
|
||||
|
||||
define <4 x i32> @sqdmull_lane_4s(ptr %A, ptr %B) nounwind {
|
||||
define <4 x i32> @sqdmull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: sqdmull_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: sqdmull.4s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
|
||||
%tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %tmp3)
|
||||
ret <4 x i32> %tmp4
|
||||
}
|
||||
|
||||
define <2 x i64> @sqdmull_lane_2d(ptr %A, ptr %B) nounwind {
|
||||
define <2 x i64> @sqdmull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: sqdmull_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: sqdmull.2d v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
|
||||
%tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %tmp3)
|
||||
ret <2 x i64> %tmp4
|
||||
}
|
||||
|
||||
define <4 x i32> @sqdmull2_lane_4s(ptr %A, ptr %B) nounwind {
|
||||
define <4 x i32> @sqdmull2_lane_4s(<8 x i16> %A, <8 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: sqdmull2_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0, #8]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: sqdmull.4s v0, v0, v1[1]
|
||||
; CHECK-NEXT: sqdmull2.4s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%load1 = load <8 x i16>, ptr %A
|
||||
%load2 = load <8 x i16>, ptr %B
|
||||
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%tmp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
|
||||
ret <4 x i32> %tmp4
|
||||
}
|
||||
|
||||
define <2 x i64> @sqdmull2_lane_2d(ptr %A, ptr %B) nounwind {
|
||||
define <2 x i64> @sqdmull2_lane_2d(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: sqdmull2_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0, #8]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: sqdmull.2d v0, v0, v1[1]
|
||||
; CHECK-NEXT: sqdmull2.2d v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%load1 = load <4 x i32>, ptr %A
|
||||
%load2 = load <4 x i32>, ptr %B
|
||||
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%tmp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
|
||||
ret <2 x i64> %tmp4
|
||||
}
|
||||
|
||||
define <4 x i32> @umull_lane_4s(ptr %A, ptr %B) nounwind {
|
||||
define <4 x i32> @umull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: umull_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: umull.4s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
|
||||
%tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %tmp3)
|
||||
ret <4 x i32> %tmp4
|
||||
}
|
||||
|
||||
define <2 x i64> @umull_lane_2d(ptr %A, ptr %B) nounwind {
|
||||
define <2 x i64> @umull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: umull_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: umull.2d v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
|
||||
%tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %tmp3)
|
||||
ret <2 x i64> %tmp4
|
||||
}
|
||||
|
||||
define <4 x i32> @smull_lane_4s(ptr %A, ptr %B) nounwind {
|
||||
define <4 x i32> @smull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: smull_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: smull.4s v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
|
||||
%tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %tmp3)
|
||||
ret <4 x i32> %tmp4
|
||||
}
|
||||
|
||||
define <2 x i64> @smull_lane_2d(ptr %A, ptr %B) nounwind {
|
||||
define <2 x i64> @smull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: smull_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: smull.2d v0, v0, v1[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
|
||||
%tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %tmp3)
|
||||
ret <2 x i64> %tmp4
|
||||
}
|
||||
|
||||
define <4 x i32> @smlal_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <4 x i32> @smlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
|
||||
; CHECK-LABEL: smlal_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: smlal.4s v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: smlal.4s v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = load <4 x i32>, ptr %C
|
||||
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
|
||||
%tmp6 = add <4 x i32> %tmp3, %tmp5
|
||||
%tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
|
||||
%tmp6 = add <4 x i32> %C, %tmp5
|
||||
ret <4 x i32> %tmp6
|
||||
}
|
||||
|
||||
define <2 x i64> @smlal_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <2 x i64> @smlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
|
||||
; CHECK-LABEL: smlal_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: smlal.2d v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: smlal.2d v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = load <2 x i64>, ptr %C
|
||||
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
|
||||
%tmp6 = add <2 x i64> %tmp3, %tmp5
|
||||
%tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
|
||||
%tmp6 = add <2 x i64> %C, %tmp5
|
||||
ret <2 x i64> %tmp6
|
||||
}
|
||||
|
||||
define <4 x i32> @sqdmlal_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <4 x i32> @sqdmlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
|
||||
; CHECK-LABEL: sqdmlal_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: sqdmlal.4s v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: sqdmlal.4s v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = load <4 x i32>, ptr %C
|
||||
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
|
||||
%tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
|
||||
%tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
|
||||
%tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %C, <4 x i32> %tmp5)
|
||||
ret <4 x i32> %tmp6
|
||||
}
|
||||
|
||||
define <2 x i64> @sqdmlal_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <2 x i64> @sqdmlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
|
||||
; CHECK-LABEL: sqdmlal_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: sqdmlal.2d v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: sqdmlal.2d v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = load <2 x i64>, ptr %C
|
||||
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
|
||||
%tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
|
||||
%tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
|
||||
%tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %C, <2 x i64> %tmp5)
|
||||
ret <2 x i64> %tmp6
|
||||
}
|
||||
|
||||
define <4 x i32> @sqdmlal2_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <4 x i32> @sqdmlal2_lane_4s(<8 x i16> %A, <8 x i16> %B, <4 x i32> %C) nounwind {
|
||||
; CHECK-LABEL: sqdmlal2_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: ldr d1, [x0, #8]
|
||||
; CHECK-NEXT: ldr d2, [x1]
|
||||
; CHECK-NEXT: sqdmlal.4s v0, v1, v2[1]
|
||||
; CHECK-NEXT: sqdmlal2.4s v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%load1 = load <8 x i16>, ptr %A
|
||||
%load2 = load <8 x i16>, ptr %B
|
||||
%tmp3 = load <4 x i32>, ptr %C
|
||||
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%tmp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
|
||||
%tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
|
||||
%tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %C, <4 x i32> %tmp5)
|
||||
ret <4 x i32> %tmp6
|
||||
}
|
||||
|
||||
define <2 x i64> @sqdmlal2_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <2 x i64> @sqdmlal2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nounwind {
|
||||
; CHECK-LABEL: sqdmlal2_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: ldr d1, [x0, #8]
|
||||
; CHECK-NEXT: ldr d2, [x1]
|
||||
; CHECK-NEXT: sqdmlal.2d v0, v1, v2[1]
|
||||
; CHECK-NEXT: sqdmlal2.2d v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%load1 = load <4 x i32>, ptr %A
|
||||
%load2 = load <4 x i32>, ptr %B
|
||||
%tmp3 = load <2 x i64>, ptr %C
|
||||
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%tmp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
|
||||
%tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
|
||||
%tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %C, <2 x i64> %tmp5)
|
||||
ret <2 x i64> %tmp6
|
||||
}
|
||||
|
||||
@@ -1715,176 +1599,134 @@ define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
|
||||
declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64)
|
||||
|
||||
|
||||
define <4 x i32> @umlal_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <4 x i32> @umlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
|
||||
; CHECK-LABEL: umlal_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: umlal.4s v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: umlal.4s v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = load <4 x i32>, ptr %C
|
||||
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
|
||||
%tmp6 = add <4 x i32> %tmp3, %tmp5
|
||||
%tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
|
||||
%tmp6 = add <4 x i32> %C, %tmp5
|
||||
ret <4 x i32> %tmp6
|
||||
}
|
||||
|
||||
define <2 x i64> @umlal_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <2 x i64> @umlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
|
||||
; CHECK-LABEL: umlal_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: umlal.2d v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: umlal.2d v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = load <2 x i64>, ptr %C
|
||||
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
|
||||
%tmp6 = add <2 x i64> %tmp3, %tmp5
|
||||
%tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
|
||||
%tmp6 = add <2 x i64> %C, %tmp5
|
||||
ret <2 x i64> %tmp6
|
||||
}
|
||||
|
||||
|
||||
define <4 x i32> @smlsl_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <4 x i32> @smlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
|
||||
; CHECK-LABEL: smlsl_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: smlsl.4s v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: smlsl.4s v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = load <4 x i32>, ptr %C
|
||||
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
|
||||
%tmp6 = sub <4 x i32> %tmp3, %tmp5
|
||||
%tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
|
||||
%tmp6 = sub <4 x i32> %C, %tmp5
|
||||
ret <4 x i32> %tmp6
|
||||
}
|
||||
|
||||
define <2 x i64> @smlsl_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <2 x i64> @smlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
|
||||
; CHECK-LABEL: smlsl_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: smlsl.2d v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: smlsl.2d v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = load <2 x i64>, ptr %C
|
||||
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
|
||||
%tmp6 = sub <2 x i64> %tmp3, %tmp5
|
||||
%tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
|
||||
%tmp6 = sub <2 x i64> %C, %tmp5
|
||||
ret <2 x i64> %tmp6
|
||||
}
|
||||
|
||||
define <4 x i32> @sqdmlsl_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <4 x i32> @sqdmlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
|
||||
; CHECK-LABEL: sqdmlsl_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: sqdmlsl.4s v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: sqdmlsl.4s v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = load <4 x i32>, ptr %C
|
||||
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
|
||||
%tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
|
||||
%tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
|
||||
%tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %C, <4 x i32> %tmp5)
|
||||
ret <4 x i32> %tmp6
|
||||
}
|
||||
|
||||
define <2 x i64> @sqdmlsl_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <2 x i64> @sqdmlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
|
||||
; CHECK-LABEL: sqdmlsl_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: sqdmlsl.2d v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: sqdmlsl.2d v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = load <2 x i64>, ptr %C
|
||||
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
|
||||
%tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
|
||||
%tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
|
||||
%tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %C, <2 x i64> %tmp5)
|
||||
ret <2 x i64> %tmp6
|
||||
}
|
||||
|
||||
define <4 x i32> @sqdmlsl2_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16> %A, <8 x i16> %B, <4 x i32> %C) nounwind {
|
||||
; CHECK-LABEL: sqdmlsl2_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: ldr d1, [x0, #8]
|
||||
; CHECK-NEXT: ldr d2, [x1]
|
||||
; CHECK-NEXT: sqdmlsl.4s v0, v1, v2[1]
|
||||
; CHECK-NEXT: sqdmlsl2.4s v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%load1 = load <8 x i16>, ptr %A
|
||||
%load2 = load <8 x i16>, ptr %B
|
||||
%tmp3 = load <4 x i32>, ptr %C
|
||||
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%tmp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
|
||||
%tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
|
||||
%tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %C, <4 x i32> %tmp5)
|
||||
ret <4 x i32> %tmp6
|
||||
}
|
||||
|
||||
define <2 x i64> @sqdmlsl2_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nounwind {
|
||||
; CHECK-LABEL: sqdmlsl2_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: ldr d1, [x0, #8]
|
||||
; CHECK-NEXT: ldr d2, [x1]
|
||||
; CHECK-NEXT: sqdmlsl.2d v0, v1, v2[1]
|
||||
; CHECK-NEXT: sqdmlsl2.2d v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%load1 = load <4 x i32>, ptr %A
|
||||
%load2 = load <4 x i32>, ptr %B
|
||||
%tmp3 = load <2 x i64>, ptr %C
|
||||
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%tmp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
|
||||
%tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
|
||||
%tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %C, <2 x i64> %tmp5)
|
||||
ret <2 x i64> %tmp6
|
||||
}
|
||||
|
||||
define <4 x i32> @umlsl_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <4 x i32> @umlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
|
||||
; CHECK-LABEL: umlsl_lane_4s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: umlsl.4s v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: umlsl.4s v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = load <4 x i16>, ptr %B
|
||||
%tmp3 = load <4 x i32>, ptr %C
|
||||
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
|
||||
%tmp6 = sub <4 x i32> %tmp3, %tmp5
|
||||
%tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
|
||||
%tmp6 = sub <4 x i32> %C, %tmp5
|
||||
ret <4 x i32> %tmp6
|
||||
}
|
||||
|
||||
define <2 x i64> @umlsl_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
|
||||
define <2 x i64> @umlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
|
||||
; CHECK-LABEL: umlsl_lane_2d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x0]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: umlsl.2d v0, v2, v1[1]
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: umlsl.2d v2, v0, v1[1]
|
||||
; CHECK-NEXT: mov.16b v0, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = load <2 x i32>, ptr %B
|
||||
%tmp3 = load <2 x i64>, ptr %C
|
||||
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
|
||||
%tmp6 = sub <2 x i64> %tmp3, %tmp5
|
||||
%tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
|
||||
%tmp6 = sub <2 x i64> %C, %tmp5
|
||||
ret <2 x i64> %tmp6
|
||||
}
|
||||
|
||||
|
||||
@@ -219,103 +219,79 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i8> @vduplane8(ptr %A) nounwind {
|
||||
define arm_aapcs_vfpcc <8 x i8> @vduplane8(<8 x i8> %A) nounwind {
|
||||
; CHECK-LABEL: vduplane8:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vdup.8 d16, d16[1]
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vdup.8 d0, d0[1]
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <8 x i8>, ptr %A
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
%tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <8 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i16> @vduplane16(ptr %A) nounwind {
|
||||
define arm_aapcs_vfpcc <4 x i16> @vduplane16(<4 x i16> %A) nounwind {
|
||||
; CHECK-LABEL: vduplane16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vdup.16 d16, d16[1]
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vdup.16 d0, d0[1]
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
%tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i32> @vduplane32(ptr %A) nounwind {
|
||||
define arm_aapcs_vfpcc <2 x i32> @vduplane32(<2 x i32> %A) nounwind {
|
||||
; CHECK-LABEL: vduplane32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vdup.32 d16, d16[1]
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vdup.32 d0, d0[1]
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
|
||||
%tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
|
||||
ret <2 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <2 x float> @vduplanefloat(ptr %A) nounwind {
|
||||
define arm_aapcs_vfpcc <2 x float> @vduplanefloat(<2 x float> %A) nounwind {
|
||||
; CHECK-LABEL: vduplanefloat:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vdup.32 d16, d16[1]
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vdup.32 d0, d0[1]
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <2 x float>, ptr %A
|
||||
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
|
||||
%tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
|
||||
ret <2 x float> %tmp2
|
||||
}
|
||||
|
||||
define <16 x i8> @vduplaneQ8(ptr %A) nounwind {
|
||||
define arm_aapcs_vfpcc <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
|
||||
; CHECK-LABEL: vduplaneQ8:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vdup.8 q8, d16[1]
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: vdup.8 q0, d0[1]
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <8 x i8>, ptr %A
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
%tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <16 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i16> @vduplaneQ16(ptr %A) nounwind {
|
||||
define arm_aapcs_vfpcc <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
|
||||
; CHECK-LABEL: vduplaneQ16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vdup.16 q8, d16[1]
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: vdup.16 q0, d0[1]
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <4 x i16>, ptr %A
|
||||
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
%tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <8 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i32> @vduplaneQ32(ptr %A) nounwind {
|
||||
define arm_aapcs_vfpcc <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
|
||||
; CHECK-LABEL: vduplaneQ32:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vdup.32 q8, d16[1]
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: vdup.32 q0, d0[1]
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <2 x i32>, ptr %A
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
%tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <4 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <4 x float> @vduplaneQfloat(ptr %A) nounwind {
|
||||
define arm_aapcs_vfpcc <4 x float> @vduplaneQfloat(<2 x float> %A) nounwind {
|
||||
; CHECK-LABEL: vduplaneQfloat:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vdup.32 q8, d16[1]
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: vdup.32 q0, d0[1]
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <2 x float>, ptr %A
|
||||
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
%tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user