mirror of
https://github.com/intel/llvm.git
synced 2026-01-22 23:49:22 +08:00
[NFC][InstCombine][AArch64] Add simplify tests for reversed SVE intrinsics.
Add missing tests for fdivr, fsubr, sdivr, subr & udivr. Add test case to demonstrate incorrect poison propagation.
This commit is contained in:
@@ -103,6 +103,16 @@ define <vscale x 4 x i32> @constant_mul_u_after_striping_inactive_lanes(<vscale
|
||||
ret <vscale x 4 x i32> %3
|
||||
}
|
||||
|
||||
; SVE intrinsics don't have the same poison propagation rules as the IR.
|
||||
define <vscale x 4 x i32> @dont_propagate_poison(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @dont_propagate_poison(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> poison
|
||||
;
|
||||
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 1))
|
||||
ret <vscale x 4 x i32> %r
|
||||
}
|
||||
|
||||
; The follow tests demonstrate the operations for which hooks are in place to
|
||||
; enable simplification. Given the simplications themselves are common code, it
|
||||
; is assumed they are already well tested elsewhere.
|
||||
@@ -202,6 +212,16 @@ define <vscale x 4 x float> @constant_fdiv_u(<vscale x 4 x i1> %pg) #0 {
|
||||
ret <vscale x 4 x float> %r
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @constant_fdivr(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x float> @constant_fdivr(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 6.000000e+00), <vscale x 4 x float> splat (float 1.200000e+01))
|
||||
; CHECK-NEXT: ret <vscale x 4 x float> [[R]]
|
||||
;
|
||||
%r = call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 6.0), <vscale x 4 x float> splat (float 12.0))
|
||||
ret <vscale x 4 x float> %r
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @constant_fmul(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x float> @constant_fmul(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
@@ -240,6 +260,16 @@ define <vscale x 4 x float> @constant_fsub_u(<vscale x 4 x i1> %pg) #0 {
|
||||
ret <vscale x 4 x float> %r
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @constant_fsubr(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x float> @constant_fsubr(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> splat (float 6.000000e+00), <vscale x 4 x float> splat (float 7.000000e+00))
|
||||
; CHECK-NEXT: ret <vscale x 4 x float> [[R]]
|
||||
;
|
||||
%r = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> splat (float 6.0), <vscale x 4 x float> splat (float 7.0))
|
||||
ret <vscale x 4 x float> %r
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @constant_mul(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @constant_mul(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
@@ -341,6 +371,39 @@ define <vscale x 4 x i32> @constant_sdiv_u_with_overflow(<vscale x 4 x i1> %pg)
|
||||
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 -1))
|
||||
ret <vscale x 4 x i32> %r
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @constant_sdivr(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 -7))
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
|
||||
;
|
||||
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 -7))
|
||||
ret <vscale x 4 x i32> %r
|
||||
}
|
||||
|
||||
; The intrinsic's IR equivalent does not support divide-by-zero.
|
||||
define <vscale x 4 x i32> @constant_sdivr_by_zero(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr_by_zero(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> splat (i32 -7))
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
|
||||
;
|
||||
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 0), <vscale x 4 x i32> splat (i32 -7))
|
||||
ret <vscale x 4 x i32> %r
|
||||
}
|
||||
|
||||
; The intrinsic's IR equivalent does not support overflow.
|
||||
define <vscale x 4 x i32> @constant_sdivr_with_overflow(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sdivr_with_overflow(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -1), <vscale x 4 x i32> splat (i32 -2147483648))
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
|
||||
;
|
||||
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 -1), <vscale x 4 x i32> splat (i32 -2147483648))
|
||||
ret <vscale x 4 x i32> %r
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @constant_sub(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @constant_sub(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
@@ -360,6 +423,16 @@ define <vscale x 4 x i32> @constant_sub_u(<vscale x 4 x i1> %pg) #0 {
|
||||
ret <vscale x 4 x i32> %r
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @constant_subr(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @constant_subr(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
|
||||
;
|
||||
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 7), <vscale x 4 x i32> splat (i32 3))
|
||||
ret <vscale x 4 x i32> %r
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @constant_udiv(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udiv(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
@@ -402,6 +475,27 @@ define <vscale x 4 x i32> @constant_udiv_u_by_zero(<vscale x 4 x i1> %pg) #0 {
|
||||
ret <vscale x 4 x i32> %r
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @constant_udivr(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udivr(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 7))
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
|
||||
;
|
||||
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 3), <vscale x 4 x i32> splat (i32 7))
|
||||
ret <vscale x 4 x i32> %r
|
||||
}
|
||||
|
||||
; The intrinsic's IR equivalent does not support divide-by-zero.
|
||||
define <vscale x 4 x i32> @constant_udivr_by_zero(<vscale x 4 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: define <vscale x 4 x i32> @constant_udivr_by_zero(
|
||||
; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> splat (i32 7))
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
|
||||
;
|
||||
%r = call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 0), <vscale x 4 x i32> splat (i32 7))
|
||||
ret <vscale x 4 x i32> %r
|
||||
}
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
@@ -410,8 +504,11 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vsc
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
@@ -424,8 +521,10 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1>, <
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
|
||||
Reference in New Issue
Block a user