[GlobalISel] Fix miscompile when narrowing vector loads/stores to non-byte-sized types (#136739)

LegalizerHelper::reduceLoadStoreWidth does not work for non-byte-sized
types, because this would require (un)packing of bits across byte
boundaries.

Precommit tests: #134904
This commit is contained in:
Tobias Stadler
2025-04-29 12:36:34 +01:00
committed by GitHub
parent 81870cbcc2
commit 0b5daeb2e5
4 changed files with 370 additions and 655 deletions

View File

@@ -5210,6 +5210,11 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
if (TypeIdx != 0)
return UnableToLegalize;
if (!NarrowTy.isByteSized()) {
LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
return UnableToLegalize;
}
// This implementation doesn't work for atomics. Give up instead of doing
// something invalid.
if (LdStMI.isAtomic())

View File

@@ -2,9 +2,11 @@
# RUN: llc -O0 -mtriple=aarch64 -verify-machineinstrs -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' -o - %s 2> %t.err | FileCheck %s
# RUN: FileCheck -check-prefix=ERR %s < %t.err
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s128) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<2 x s63>)) (in function: load-narrow-scalar-high-bits)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %{{[0-9]+}}:_(<8 x s9>), %{{[0-9]+}}:_(p0) :: (store (<8 x s9>), align 16) (in function: store-narrow-non-byte-sized)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(<8 x s9>) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<8 x s9>), align 16) (in function: load-narrow-non-byte-sized)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s128) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<2 x s63>)) (in function: load-narrow-scalar-high-bits)
# FIXME: Scalarized stores for non-byte-sized vector elements store incorrect partial values.
# FIXME: Non-byte-sized vector elements cause fallback in LegalizerHelper::reduceLoadStoreWidth
---
name: store-narrow-non-byte-sized
tracksRegLiveness: true
@@ -15,60 +17,10 @@ body: |
; CHECK: liveins: $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 511
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY]](p0) :: (store (s16), align 16)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 257
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 1, align 1)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32)
; CHECK-NEXT: G_STORE [[TRUNC2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into unknown-address + 2)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[COPY7]]
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32)
; CHECK-NEXT: G_STORE [[TRUNC3]](s16), [[PTR_ADD2]](p0) :: (store (s16) into unknown-address + 3, align 1)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[COPY9]]
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32)
; CHECK-NEXT: G_STORE [[TRUNC4]](s16), [[PTR_ADD3]](p0) :: (store (s16) into unknown-address + 4, align 4)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[COPY11]]
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[AND5]](s32)
; CHECK-NEXT: G_STORE [[TRUNC5]](s16), [[PTR_ADD4]](p0) :: (store (s16) into unknown-address + 5, align 1)
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[C]], [[COPY12]]
; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[AND6]](s32)
; CHECK-NEXT: G_STORE [[TRUNC6]](s16), [[PTR_ADD5]](p0) :: (store (s16) into unknown-address + 6)
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[C3]], [[COPY13]]
; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[AND7]](s32)
; CHECK-NEXT: G_STORE [[TRUNC7]](s16), [[PTR_ADD6]](p0) :: (store (s16) into unknown-address + 7, align 1)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s9) = G_CONSTANT i9 -256
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s9) = G_CONSTANT i9 -255
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s9>) = G_BUILD_VECTOR [[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9)
; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s9>), [[COPY]](p0) :: (store (<8 x s9>), align 16)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x8
%1:_(s9) = G_CONSTANT i9 256
@@ -153,7 +105,7 @@ body: |
...
# FIXME: Scalarized loads for non-byte-sized vector elements load incorrect partial values.
# FIXME: Non-byte-sized vector elements cause fallback in LegalizerHelper::reduceLoadStoreWidth
---
name: load-narrow-non-byte-sized
tracksRegLiveness: true
@@ -164,41 +116,9 @@ body: |
; CHECK: liveins: $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load (s16), align 16)
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD]], 9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 1, align 1)
; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD1]], 9
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 2)
; CHECK-NEXT: [[ASSERT_ZEXT2:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD2]], 9
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 3, align 1)
; CHECK-NEXT: [[ASSERT_ZEXT3:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD3]], 9
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 4, align 4)
; CHECK-NEXT: [[ASSERT_ZEXT4:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD4]], 9
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 5, align 1)
; CHECK-NEXT: [[ASSERT_ZEXT5:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD5]], 9
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 6)
; CHECK-NEXT: [[ASSERT_ZEXT6:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD6]], 9
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 7, align 1)
; CHECK-NEXT: [[ASSERT_ZEXT7:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD7]], 9
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[ASSERT_ZEXT]](s16), [[ASSERT_ZEXT1]](s16), [[ASSERT_ZEXT2]](s16), [[ASSERT_ZEXT3]](s16), [[ASSERT_ZEXT4]](s16), [[ASSERT_ZEXT5]](s16), [[ASSERT_ZEXT6]](s16), [[ASSERT_ZEXT7]](s16)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 511
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s16>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
; CHECK-NEXT: $q0 = COPY [[AND]](<8 x s16>)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s9>) = G_LOAD [[COPY]](p0) :: (load (<8 x s9>), align 16)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(<8 x s16>) = G_ZEXT [[LOAD]](<8 x s9>)
; CHECK-NEXT: $q0 = COPY [[ZEXT]](<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(p0) = COPY $x8
%2:_(<8 x s9>) = G_LOAD %0(p0) :: (load (<8 x s9>), align 16)

View File

@@ -8,6 +8,7 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v4f16_v4i50
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v8f16_v8i19
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v8f16_v8i50
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v8f16_v8i100
;
; Float to signed 32-bit -- Vector size variation
@@ -4136,304 +4137,198 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) {
}
define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-SD-LABEL: test_signed_v8f16_v8i100:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #192
; CHECK-SD-NEXT: str d10, [sp, #64] // 8-byte Folded Spill
; CHECK-SD-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x28, x27, [sp, #112] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #128] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #144] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 192
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: .cfi_offset w30, -88
; CHECK-SD-NEXT: .cfi_offset w29, -96
; CHECK-SD-NEXT: .cfi_offset b8, -104
; CHECK-SD-NEXT: .cfi_offset b9, -112
; CHECK-SD-NEXT: .cfi_offset b10, -128
; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: mov x19, x8
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: movi v10.2s, #241, lsl #24
; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: fmov s9, w8
; CHECK-SD-NEXT: mov x22, #-34359738368 // =0xfffffff800000000
; CHECK-SD-NEXT: mov x23, #34359738367 // =0x7ffffffff
; CHECK-SD-NEXT: mov h0, v0.h[3]
; CHECK-SD-NEXT: fcmp s8, s10
; CHECK-SD-NEXT: csel x8, x22, x1, lt
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: csinv x9, x9, xzr, le
; CHECK-SD-NEXT: csel x8, x23, x8, gt
; CHECK-SD-NEXT: fcmp s8, s8
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x8, xzr, x8, vs
; CHECK-SD-NEXT: str x8, [sp, #72] // 8-byte Folded Spill
; CHECK-SD-NEXT: csel x8, xzr, x9, vs
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: str x8, [sp, #24] // 8-byte Folded Spill
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: fcmp s8, s10
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: csel x8, xzr, x0, lt
; CHECK-SD-NEXT: csel x9, x22, x1, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: csel x9, x23, x9, gt
; CHECK-SD-NEXT: csinv x8, x8, xzr, le
; CHECK-SD-NEXT: fcmp s8, s8
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x10, xzr, x8, vs
; CHECK-SD-NEXT: csel x8, xzr, x9, vs
; CHECK-SD-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: fcmp s8, s10
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: csel x8, x22, x1, lt
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: csinv x9, x9, xzr, le
; CHECK-SD-NEXT: csel x8, x23, x8, gt
; CHECK-SD-NEXT: fcmp s8, s8
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x26, xzr, x8, vs
; CHECK-SD-NEXT: csel x8, xzr, x9, vs
; CHECK-SD-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: fcmp s8, s10
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: csel x8, x22, x1, lt
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: csinv x9, x9, xzr, le
; CHECK-SD-NEXT: csel x8, x23, x8, gt
; CHECK-SD-NEXT: fcmp s8, s8
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x27, xzr, x8, vs
; CHECK-SD-NEXT: csel x8, xzr, x9, vs
; CHECK-SD-NEXT: str x8, [sp] // 8-byte Folded Spill
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: fcmp s8, s10
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov h0, v0.h[3]
; CHECK-SD-NEXT: csel x8, x22, x1, lt
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: csinv x9, x9, xzr, le
; CHECK-SD-NEXT: csel x8, x23, x8, gt
; CHECK-SD-NEXT: fcmp s8, s8
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x20, xzr, x8, vs
; CHECK-SD-NEXT: csel x21, xzr, x9, vs
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: fcmp s8, s10
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: csel x8, xzr, x0, lt
; CHECK-SD-NEXT: csel x9, x22, x1, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: csel x9, x23, x9, gt
; CHECK-SD-NEXT: csinv x8, x8, xzr, le
; CHECK-SD-NEXT: fcmp s8, s8
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x28, xzr, x8, vs
; CHECK-SD-NEXT: csel x24, xzr, x9, vs
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: fcmp s8, s10
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: csel x8, x22, x1, lt
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: csinv x9, x9, xzr, le
; CHECK-SD-NEXT: csel x8, x23, x8, gt
; CHECK-SD-NEXT: fcmp s8, s8
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x25, xzr, x8, vs
; CHECK-SD-NEXT: csel x29, xzr, x9, vs
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: ldr x9, [sp] // 8-byte Folded Reload
; CHECK-SD-NEXT: extr x8, x24, x28, #28
; CHECK-SD-NEXT: fcmp s8, s10
; CHECK-SD-NEXT: bfi x25, x21, #36, #28
; CHECK-SD-NEXT: lsr x11, x20, #28
; CHECK-SD-NEXT: stur x9, [x19, #75]
; CHECK-SD-NEXT: extr x9, x20, x21, #28
; CHECK-SD-NEXT: stur x8, [x19, #41]
; CHECK-SD-NEXT: csel x8, x22, x1, lt
; CHECK-SD-NEXT: str x9, [x19, #16]
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT: stp x29, x25, [x19]
; CHECK-SD-NEXT: stur x10, [x19, #50]
; CHECK-SD-NEXT: lsr x10, x24, #28
; CHECK-SD-NEXT: csinv x9, x9, xzr, le
; CHECK-SD-NEXT: csel x8, x23, x8, gt
; CHECK-SD-NEXT: fcmp s8, s8
; CHECK-SD-NEXT: strb w10, [x19, #49]
; CHECK-SD-NEXT: ldp x14, x12, [sp, #8] // 16-byte Folded Reload
; CHECK-SD-NEXT: strb w11, [x19, #24]
; CHECK-SD-NEXT: csel x8, xzr, x8, vs
; CHECK-SD-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload
; CHECK-SD-NEXT: csel x9, xzr, x9, vs
; CHECK-SD-NEXT: bfi x8, x28, #36, #28
; CHECK-SD-NEXT: extr x10, x14, x12, #28
; CHECK-SD-NEXT: bfi x27, x12, #36, #28
; CHECK-SD-NEXT: ldr x12, [sp, #72] // 8-byte Folded Reload
; CHECK-SD-NEXT: bfi x26, x13, #36, #28
; CHECK-SD-NEXT: stur x9, [x19, #25]
; CHECK-SD-NEXT: lsr x9, x14, #28
; CHECK-SD-NEXT: extr x11, x12, x13, #28
; CHECK-SD-NEXT: stur x8, [x19, #33]
; CHECK-SD-NEXT: lsr x8, x12, #28
; CHECK-SD-NEXT: stur x10, [x19, #91]
; CHECK-SD-NEXT: stur x27, [x19, #83]
; CHECK-SD-NEXT: stur x11, [x19, #66]
; CHECK-SD-NEXT: stur x26, [x19, #58]
; CHECK-SD-NEXT: strb w9, [x19, #99]
; CHECK-SD-NEXT: strb w8, [x19, #74]
; CHECK-SD-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x28, x27, [sp, #112] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
; CHECK-SD-NEXT: add sp, sp, #192
; CHECK-SD-NEXT: ret
;
; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i100:
; CHECK-GI-CVT: // %bb.0:
; CHECK-GI-CVT-NEXT: mov h1, v0.h[1]
; CHECK-GI-CVT-NEXT: mov h2, v0.h[2]
; CHECK-GI-CVT-NEXT: mov x11, x8
; CHECK-GI-CVT-NEXT: fcvt s3, h0
; CHECK-GI-CVT-NEXT: mov h4, v0.h[3]
; CHECK-GI-CVT-NEXT: str wzr, [x8, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x8, #12]
; CHECK-GI-CVT-NEXT: fcvt s1, h1
; CHECK-GI-CVT-NEXT: fcvt s2, h2
; CHECK-GI-CVT-NEXT: fcvtzs x9, s3
; CHECK-GI-CVT-NEXT: fcvt s3, h4
; CHECK-GI-CVT-NEXT: fcvtzs x10, s1
; CHECK-GI-CVT-NEXT: mov h1, v0.h[4]
; CHECK-GI-CVT-NEXT: fcvtzs x12, s2
; CHECK-GI-CVT-NEXT: mov h2, v0.h[5]
; CHECK-GI-CVT-NEXT: str x9, [x8]
; CHECK-GI-CVT-NEXT: mov x9, x8
; CHECK-GI-CVT-NEXT: fcvt s1, h1
; CHECK-GI-CVT-NEXT: str x10, [x11, #12]!
; CHECK-GI-CVT-NEXT: fcvtzs x10, s3
; CHECK-GI-CVT-NEXT: mov h3, v0.h[6]
; CHECK-GI-CVT-NEXT: fcvt s2, h2
; CHECK-GI-CVT-NEXT: mov h0, v0.h[7]
; CHECK-GI-CVT-NEXT: str wzr, [x11, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x11, #12]
; CHECK-GI-CVT-NEXT: mov x11, x8
; CHECK-GI-CVT-NEXT: str x12, [x9, #25]!
; CHECK-GI-CVT-NEXT: fcvtzs x12, s1
; CHECK-GI-CVT-NEXT: str wzr, [x9, #8]
; CHECK-GI-CVT-NEXT: fcvt s1, h3
; CHECK-GI-CVT-NEXT: strb wzr, [x9, #12]
; CHECK-GI-CVT-NEXT: fcvt s0, h0
; CHECK-GI-CVT-NEXT: mov x9, x8
; CHECK-GI-CVT-NEXT: str x10, [x11, #37]!
; CHECK-GI-CVT-NEXT: fcvtzs x10, s2
; CHECK-GI-CVT-NEXT: str wzr, [x11, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x11, #12]
; CHECK-GI-CVT-NEXT: fcvtzs x11, s1
; CHECK-GI-CVT-NEXT: str x12, [x9, #50]!
; CHECK-GI-CVT-NEXT: str wzr, [x9, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x9, #12]
; CHECK-GI-CVT-NEXT: mov x9, x8
; CHECK-GI-CVT-NEXT: str x10, [x9, #62]!
; CHECK-GI-CVT-NEXT: fcvtzs x10, s0
; CHECK-GI-CVT-NEXT: str wzr, [x9, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x9, #12]
; CHECK-GI-CVT-NEXT: mov x9, x8
; CHECK-GI-CVT-NEXT: str x11, [x9, #75]!
; CHECK-GI-CVT-NEXT: str wzr, [x9, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x9, #12]
; CHECK-GI-CVT-NEXT: str x10, [x8, #87]!
; CHECK-GI-CVT-NEXT: str wzr, [x8, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x8, #12]
; CHECK-GI-CVT-NEXT: ret
;
; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i100:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
; CHECK-GI-FP16-NEXT: mov x11, x8
; CHECK-GI-FP16-NEXT: fcvtzs x9, h0
; CHECK-GI-FP16-NEXT: str wzr, [x8, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x8, #12]
; CHECK-GI-FP16-NEXT: fcvtzs x10, h1
; CHECK-GI-FP16-NEXT: mov h1, v0.h[3]
; CHECK-GI-FP16-NEXT: fcvtzs x12, h2
; CHECK-GI-FP16-NEXT: mov h2, v0.h[4]
; CHECK-GI-FP16-NEXT: str x9, [x8]
; CHECK-GI-FP16-NEXT: mov x9, x8
; CHECK-GI-FP16-NEXT: str x10, [x11, #12]!
; CHECK-GI-FP16-NEXT: fcvtzs x10, h1
; CHECK-GI-FP16-NEXT: mov h1, v0.h[5]
; CHECK-GI-FP16-NEXT: str wzr, [x11, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x11, #12]
; CHECK-GI-FP16-NEXT: mov x11, x8
; CHECK-GI-FP16-NEXT: str x12, [x9, #25]!
; CHECK-GI-FP16-NEXT: fcvtzs x12, h2
; CHECK-GI-FP16-NEXT: str wzr, [x9, #8]
; CHECK-GI-FP16-NEXT: mov h2, v0.h[6]
; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
; CHECK-GI-FP16-NEXT: strb wzr, [x9, #12]
; CHECK-GI-FP16-NEXT: fcvtzs x9, h1
; CHECK-GI-FP16-NEXT: str x10, [x11, #37]!
; CHECK-GI-FP16-NEXT: mov x10, x8
; CHECK-GI-FP16-NEXT: str wzr, [x11, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x11, #12]
; CHECK-GI-FP16-NEXT: fcvtzs x11, h2
; CHECK-GI-FP16-NEXT: str x12, [x10, #50]!
; CHECK-GI-FP16-NEXT: str wzr, [x10, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x10, #12]
; CHECK-GI-FP16-NEXT: mov x10, x8
; CHECK-GI-FP16-NEXT: str x9, [x10, #62]!
; CHECK-GI-FP16-NEXT: fcvtzs x9, h0
; CHECK-GI-FP16-NEXT: str wzr, [x10, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x10, #12]
; CHECK-GI-FP16-NEXT: mov x10, x8
; CHECK-GI-FP16-NEXT: str x11, [x10, #75]!
; CHECK-GI-FP16-NEXT: str wzr, [x10, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x10, #12]
; CHECK-GI-FP16-NEXT: str x9, [x8, #87]!
; CHECK-GI-FP16-NEXT: str wzr, [x8, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x8, #12]
; CHECK-GI-FP16-NEXT: ret
; CHECK-LABEL: test_signed_v8f16_v8i100:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #192
; CHECK-NEXT: str d10, [sp, #64] // 8-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #112] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #128] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #144] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 192
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: .cfi_offset b8, -104
; CHECK-NEXT: .cfi_offset b9, -112
; CHECK-NEXT: .cfi_offset b10, -128
; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: mov x19, x8
; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: mov h0, v0.h[1]
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: movi v10.2s, #241, lsl #24
; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: fmov s9, w8
; CHECK-NEXT: mov x22, #-34359738368 // =0xfffffff800000000
; CHECK-NEXT: mov x23, #34359738367 // =0x7ffffffff
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcmp s8, s10
; CHECK-NEXT: csel x8, x22, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csinv x9, x9, xzr, le
; CHECK-NEXT: csel x8, x23, x8, gt
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x8, xzr, x8, vs
; CHECK-NEXT: str x8, [sp, #72] // 8-byte Folded Spill
; CHECK-NEXT: csel x8, xzr, x9, vs
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s10
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x22, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csel x9, x23, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x10, xzr, x8, vs
; CHECK-NEXT: csel x8, xzr, x9, vs
; CHECK-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s10
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: mov h0, v0.h[2]
; CHECK-NEXT: csel x8, x22, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csinv x9, x9, xzr, le
; CHECK-NEXT: csel x8, x23, x8, gt
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x26, xzr, x8, vs
; CHECK-NEXT: csel x8, xzr, x9, vs
; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s10
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: mov h0, v0.h[1]
; CHECK-NEXT: csel x8, x22, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csinv x9, x9, xzr, le
; CHECK-NEXT: csel x8, x23, x8, gt
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x27, xzr, x8, vs
; CHECK-NEXT: csel x8, xzr, x9, vs
; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s10
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: csel x8, x22, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csinv x9, x9, xzr, le
; CHECK-NEXT: csel x8, x23, x8, gt
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x20, xzr, x8, vs
; CHECK-NEXT: csel x21, xzr, x9, vs
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s10
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x22, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csel x9, x23, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x28, xzr, x8, vs
; CHECK-NEXT: csel x24, xzr, x9, vs
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s10
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: mov h0, v0.h[2]
; CHECK-NEXT: csel x8, x22, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csinv x9, x9, xzr, le
; CHECK-NEXT: csel x8, x23, x8, gt
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x25, xzr, x8, vs
; CHECK-NEXT: csel x29, xzr, x9, vs
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: ldr x9, [sp] // 8-byte Folded Reload
; CHECK-NEXT: extr x8, x24, x28, #28
; CHECK-NEXT: fcmp s8, s10
; CHECK-NEXT: bfi x25, x21, #36, #28
; CHECK-NEXT: lsr x11, x20, #28
; CHECK-NEXT: stur x9, [x19, #75]
; CHECK-NEXT: extr x9, x20, x21, #28
; CHECK-NEXT: stur x8, [x19, #41]
; CHECK-NEXT: csel x8, x22, x1, lt
; CHECK-NEXT: str x9, [x19, #16]
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload
; CHECK-NEXT: stp x29, x25, [x19]
; CHECK-NEXT: stur x10, [x19, #50]
; CHECK-NEXT: lsr x10, x24, #28
; CHECK-NEXT: csinv x9, x9, xzr, le
; CHECK-NEXT: csel x8, x23, x8, gt
; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: strb w10, [x19, #49]
; CHECK-NEXT: ldp x14, x12, [sp, #8] // 16-byte Folded Reload
; CHECK-NEXT: strb w11, [x19, #24]
; CHECK-NEXT: csel x8, xzr, x8, vs
; CHECK-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload
; CHECK-NEXT: csel x9, xzr, x9, vs
; CHECK-NEXT: bfi x8, x28, #36, #28
; CHECK-NEXT: extr x10, x14, x12, #28
; CHECK-NEXT: bfi x27, x12, #36, #28
; CHECK-NEXT: ldr x12, [sp, #72] // 8-byte Folded Reload
; CHECK-NEXT: bfi x26, x13, #36, #28
; CHECK-NEXT: stur x9, [x19, #25]
; CHECK-NEXT: lsr x9, x14, #28
; CHECK-NEXT: extr x11, x12, x13, #28
; CHECK-NEXT: stur x8, [x19, #33]
; CHECK-NEXT: lsr x8, x12, #28
; CHECK-NEXT: stur x10, [x19, #91]
; CHECK-NEXT: stur x27, [x19, #83]
; CHECK-NEXT: stur x11, [x19, #66]
; CHECK-NEXT: stur x26, [x19, #58]
; CHECK-NEXT: strb w9, [x19, #99]
; CHECK-NEXT: strb w8, [x19, #74]
; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #112] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #192
; CHECK-NEXT: ret
%x = call <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half> %f)
ret <8 x i100> %x
}

View File

@@ -8,6 +8,7 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v4f16_v4i50
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i19
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i50
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i100
;
; Float to unsigned 32-bit -- Vector size variation
@@ -3405,271 +3406,165 @@ define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) {
}
define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-SD-LABEL: test_unsigned_v8f16_v8i100:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #176
; CHECK-SD-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 176
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: .cfi_offset w30, -88
; CHECK-SD-NEXT: .cfi_offset w29, -96
; CHECK-SD-NEXT: .cfi_offset b8, -104
; CHECK-SD-NEXT: .cfi_offset b9, -112
; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: mov x19, x8
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov w8, #1904214015 // =0x717fffff
; CHECK-SD-NEXT: fcmp s8, #0.0
; CHECK-SD-NEXT: fmov s9, w8
; CHECK-SD-NEXT: mov x23, #68719476735 // =0xfffffffff
; CHECK-SD-NEXT: mov h0, v0.h[3]
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: csel x8, xzr, x1, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x10, x23, x8, gt
; CHECK-SD-NEXT: csinv x8, x9, xzr, le
; CHECK-SD-NEXT: stp x8, x10, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: fcmp s8, #0.0
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: csel x8, xzr, x0, lt
; CHECK-SD-NEXT: csel x9, xzr, x1, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x9, x23, x9, gt
; CHECK-SD-NEXT: csinv x8, x8, xzr, le
; CHECK-SD-NEXT: stp x8, x9, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: fcmp s8, #0.0
; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: csel x8, xzr, x0, lt
; CHECK-SD-NEXT: csel x9, xzr, x1, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csinv x8, x8, xzr, le
; CHECK-SD-NEXT: csel x25, x23, x9, gt
; CHECK-SD-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: fcmp s8, #0.0
; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: csel x8, xzr, x0, lt
; CHECK-SD-NEXT: csel x9, xzr, x1, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x26, x23, x9, gt
; CHECK-SD-NEXT: csinv x28, x8, xzr, le
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: fcmp s8, #0.0
; CHECK-SD-NEXT: mov h0, v0.h[3]
; CHECK-SD-NEXT: csel x8, xzr, x0, lt
; CHECK-SD-NEXT: csel x9, xzr, x1, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x29, x23, x9, gt
; CHECK-SD-NEXT: csinv x20, x8, xzr, le
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: fcmp s8, #0.0
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: csel x8, xzr, x0, lt
; CHECK-SD-NEXT: csel x9, xzr, x1, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x21, x23, x9, gt
; CHECK-SD-NEXT: csinv x27, x8, xzr, le
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: fcmp s8, #0.0
; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: csel x8, xzr, x0, lt
; CHECK-SD-NEXT: csel x9, xzr, x1, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: fcvt s8, h0
; CHECK-SD-NEXT: csel x22, x23, x9, gt
; CHECK-SD-NEXT: csinv x24, x8, xzr, le
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: extr x8, x21, x27, #28
; CHECK-SD-NEXT: extr x9, x29, x20, #28
; CHECK-SD-NEXT: stur x28, [x19, #75]
; CHECK-SD-NEXT: fcmp s8, #0.0
; CHECK-SD-NEXT: bfi x22, x20, #36, #28
; CHECK-SD-NEXT: lsr x11, x29, #28
; CHECK-SD-NEXT: stur x8, [x19, #41]
; CHECK-SD-NEXT: str x9, [x19, #16]
; CHECK-SD-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT: csel x8, xzr, x0, lt
; CHECK-SD-NEXT: csel x9, xzr, x1, lt
; CHECK-SD-NEXT: fcmp s8, s9
; CHECK-SD-NEXT: stp x24, x22, [x19]
; CHECK-SD-NEXT: stur x10, [x19, #50]
; CHECK-SD-NEXT: lsr x10, x21, #28
; CHECK-SD-NEXT: strb w11, [x19, #24]
; CHECK-SD-NEXT: strb w10, [x19, #49]
; CHECK-SD-NEXT: csel x9, x23, x9, gt
; CHECK-SD-NEXT: csinv x8, x8, xzr, le
; CHECK-SD-NEXT: ldp x12, x11, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: bfi x9, x27, #36, #28
; CHECK-SD-NEXT: stur x8, [x19, #25]
; CHECK-SD-NEXT: stur x9, [x19, #33]
; CHECK-SD-NEXT: extr x10, x11, x12, #28
; CHECK-SD-NEXT: bfi x26, x12, #36, #28
; CHECK-SD-NEXT: stur x10, [x19, #91]
; CHECK-SD-NEXT: ldp x10, x9, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: stur x26, [x19, #83]
; CHECK-SD-NEXT: extr x8, x9, x10, #28
; CHECK-SD-NEXT: bfi x25, x10, #36, #28
; CHECK-SD-NEXT: lsr x9, x9, #28
; CHECK-SD-NEXT: stur x8, [x19, #66]
; CHECK-SD-NEXT: lsr x8, x11, #28
; CHECK-SD-NEXT: stur x25, [x19, #58]
; CHECK-SD-NEXT: strb w8, [x19, #99]
; CHECK-SD-NEXT: strb w9, [x19, #74]
; CHECK-SD-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: add sp, sp, #176
; CHECK-SD-NEXT: ret
;
; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i100:
; CHECK-GI-CVT: // %bb.0:
; CHECK-GI-CVT-NEXT: mov h1, v0.h[1]
; CHECK-GI-CVT-NEXT: mov h2, v0.h[2]
; CHECK-GI-CVT-NEXT: mov x11, x8
; CHECK-GI-CVT-NEXT: fcvt s3, h0
; CHECK-GI-CVT-NEXT: mov h4, v0.h[3]
; CHECK-GI-CVT-NEXT: str wzr, [x8, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x8, #12]
; CHECK-GI-CVT-NEXT: fcvt s1, h1
; CHECK-GI-CVT-NEXT: fcvt s2, h2
; CHECK-GI-CVT-NEXT: fcvtzu x9, s3
; CHECK-GI-CVT-NEXT: fcvt s3, h4
; CHECK-GI-CVT-NEXT: fcvtzu x10, s1
; CHECK-GI-CVT-NEXT: mov h1, v0.h[4]
; CHECK-GI-CVT-NEXT: fcvtzu x12, s2
; CHECK-GI-CVT-NEXT: mov h2, v0.h[5]
; CHECK-GI-CVT-NEXT: str x9, [x8]
; CHECK-GI-CVT-NEXT: mov x9, x8
; CHECK-GI-CVT-NEXT: fcvt s1, h1
; CHECK-GI-CVT-NEXT: str x10, [x11, #12]!
; CHECK-GI-CVT-NEXT: fcvtzu x10, s3
; CHECK-GI-CVT-NEXT: mov h3, v0.h[6]
; CHECK-GI-CVT-NEXT: fcvt s2, h2
; CHECK-GI-CVT-NEXT: mov h0, v0.h[7]
; CHECK-GI-CVT-NEXT: str wzr, [x11, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x11, #12]
; CHECK-GI-CVT-NEXT: mov x11, x8
; CHECK-GI-CVT-NEXT: str x12, [x9, #25]!
; CHECK-GI-CVT-NEXT: fcvtzu x12, s1
; CHECK-GI-CVT-NEXT: str wzr, [x9, #8]
; CHECK-GI-CVT-NEXT: fcvt s1, h3
; CHECK-GI-CVT-NEXT: strb wzr, [x9, #12]
; CHECK-GI-CVT-NEXT: fcvt s0, h0
; CHECK-GI-CVT-NEXT: mov x9, x8
; CHECK-GI-CVT-NEXT: str x10, [x11, #37]!
; CHECK-GI-CVT-NEXT: fcvtzu x10, s2
; CHECK-GI-CVT-NEXT: str wzr, [x11, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x11, #12]
; CHECK-GI-CVT-NEXT: fcvtzu x11, s1
; CHECK-GI-CVT-NEXT: str x12, [x9, #50]!
; CHECK-GI-CVT-NEXT: str wzr, [x9, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x9, #12]
; CHECK-GI-CVT-NEXT: mov x9, x8
; CHECK-GI-CVT-NEXT: str x10, [x9, #62]!
; CHECK-GI-CVT-NEXT: fcvtzu x10, s0
; CHECK-GI-CVT-NEXT: str wzr, [x9, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x9, #12]
; CHECK-GI-CVT-NEXT: mov x9, x8
; CHECK-GI-CVT-NEXT: str x11, [x9, #75]!
; CHECK-GI-CVT-NEXT: str wzr, [x9, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x9, #12]
; CHECK-GI-CVT-NEXT: str x10, [x8, #87]!
; CHECK-GI-CVT-NEXT: str wzr, [x8, #8]
; CHECK-GI-CVT-NEXT: strb wzr, [x8, #12]
; CHECK-GI-CVT-NEXT: ret
;
; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i100:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
; CHECK-GI-FP16-NEXT: mov x11, x8
; CHECK-GI-FP16-NEXT: fcvtzu x9, h0
; CHECK-GI-FP16-NEXT: str wzr, [x8, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x8, #12]
; CHECK-GI-FP16-NEXT: fcvtzu x10, h1
; CHECK-GI-FP16-NEXT: mov h1, v0.h[3]
; CHECK-GI-FP16-NEXT: fcvtzu x12, h2
; CHECK-GI-FP16-NEXT: mov h2, v0.h[4]
; CHECK-GI-FP16-NEXT: str x9, [x8]
; CHECK-GI-FP16-NEXT: mov x9, x8
; CHECK-GI-FP16-NEXT: str x10, [x11, #12]!
; CHECK-GI-FP16-NEXT: fcvtzu x10, h1
; CHECK-GI-FP16-NEXT: mov h1, v0.h[5]
; CHECK-GI-FP16-NEXT: str wzr, [x11, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x11, #12]
; CHECK-GI-FP16-NEXT: mov x11, x8
; CHECK-GI-FP16-NEXT: str x12, [x9, #25]!
; CHECK-GI-FP16-NEXT: fcvtzu x12, h2
; CHECK-GI-FP16-NEXT: str wzr, [x9, #8]
; CHECK-GI-FP16-NEXT: mov h2, v0.h[6]
; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
; CHECK-GI-FP16-NEXT: strb wzr, [x9, #12]
; CHECK-GI-FP16-NEXT: fcvtzu x9, h1
; CHECK-GI-FP16-NEXT: str x10, [x11, #37]!
; CHECK-GI-FP16-NEXT: mov x10, x8
; CHECK-GI-FP16-NEXT: str wzr, [x11, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x11, #12]
; CHECK-GI-FP16-NEXT: fcvtzu x11, h2
; CHECK-GI-FP16-NEXT: str x12, [x10, #50]!
; CHECK-GI-FP16-NEXT: str wzr, [x10, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x10, #12]
; CHECK-GI-FP16-NEXT: mov x10, x8
; CHECK-GI-FP16-NEXT: str x9, [x10, #62]!
; CHECK-GI-FP16-NEXT: fcvtzu x9, h0
; CHECK-GI-FP16-NEXT: str wzr, [x10, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x10, #12]
; CHECK-GI-FP16-NEXT: mov x10, x8
; CHECK-GI-FP16-NEXT: str x11, [x10, #75]!
; CHECK-GI-FP16-NEXT: str wzr, [x10, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x10, #12]
; CHECK-GI-FP16-NEXT: str x9, [x8, #87]!
; CHECK-GI-FP16-NEXT: str wzr, [x8, #8]
; CHECK-GI-FP16-NEXT: strb wzr, [x8, #12]
; CHECK-GI-FP16-NEXT: ret
; CHECK-LABEL: test_unsigned_v8f16_v8i100:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #176
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 176
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: .cfi_offset b8, -104
; CHECK-NEXT: .cfi_offset b9, -112
; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: mov x19, x8
; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: mov h0, v0.h[1]
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: fmov s9, w8
; CHECK-NEXT: mov x23, #68719476735 // =0xfffffffff
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: csel x8, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x10, x23, x8, gt
; CHECK-NEXT: csinv x8, x9, xzr, le
; CHECK-NEXT: stp x8, x10, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x9, x23, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: stp x8, x9, [sp] // 16-byte Folded Spill
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: mov h0, v0.h[2]
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: csel x25, x23, x9, gt
; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: mov h0, v0.h[1]
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x26, x23, x9, gt
; CHECK-NEXT: csinv x28, x8, xzr, le
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x29, x23, x9, gt
; CHECK-NEXT: csinv x20, x8, xzr, le
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x21, x23, x9, gt
; CHECK-NEXT: csinv x27, x8, xzr, le
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: mov h0, v0.h[2]
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: csel x22, x23, x9, gt
; CHECK-NEXT: csinv x24, x8, xzr, le
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: extr x8, x21, x27, #28
; CHECK-NEXT: extr x9, x29, x20, #28
; CHECK-NEXT: stur x28, [x19, #75]
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: bfi x22, x20, #36, #28
; CHECK-NEXT: lsr x11, x29, #28
; CHECK-NEXT: stur x8, [x19, #41]
; CHECK-NEXT: str x9, [x19, #16]
; CHECK-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: stp x24, x22, [x19]
; CHECK-NEXT: stur x10, [x19, #50]
; CHECK-NEXT: lsr x10, x21, #28
; CHECK-NEXT: strb w11, [x19, #24]
; CHECK-NEXT: strb w10, [x19, #49]
; CHECK-NEXT: csel x9, x23, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: ldp x12, x11, [sp] // 16-byte Folded Reload
; CHECK-NEXT: bfi x9, x27, #36, #28
; CHECK-NEXT: stur x8, [x19, #25]
; CHECK-NEXT: stur x9, [x19, #33]
; CHECK-NEXT: extr x10, x11, x12, #28
; CHECK-NEXT: bfi x26, x12, #36, #28
; CHECK-NEXT: stur x10, [x19, #91]
; CHECK-NEXT: ldp x10, x9, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: stur x26, [x19, #83]
; CHECK-NEXT: extr x8, x9, x10, #28
; CHECK-NEXT: bfi x25, x10, #36, #28
; CHECK-NEXT: lsr x9, x9, #28
; CHECK-NEXT: stur x8, [x19, #66]
; CHECK-NEXT: lsr x8, x11, #28
; CHECK-NEXT: stur x25, [x19, #58]
; CHECK-NEXT: strb w8, [x19, #99]
; CHECK-NEXT: strb w9, [x19, #74]
; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #176
; CHECK-NEXT: ret
%x = call <8 x i100> @llvm.fptoui.sat.v8f16.v8i100(<8 x half> %f)
ret <8 x i100> %x
}