[AArch64][GlobalISel] Add missing legalization for v16i8 extract element.

This commit is contained in:
David Green
2024-02-19 07:26:57 +00:00
parent dd7386d85f
commit dc1b772933
3 changed files with 42 additions and 21 deletions

View File

@@ -863,6 +863,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(1, s64, 2)
.clampMaxNumElements(1, s32, 4)
.clampMaxNumElements(1, s16, 8)
.clampMaxNumElements(1, s8, 16)
.clampMaxNumElements(1, p0, 2);
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)

View File

@@ -858,21 +858,24 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000)
; CHECK-NEXT: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cond:_(s1) = G_IMPLICIT_DEF
; CHECK-NEXT: %val_1:_(<32 x s8>) = G_IMPLICIT_DEF
; CHECK-NEXT: G_BRCOND %cond(s1), %bb.2
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF1]], [[C]]
; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2
; CHECK-NEXT: G_BR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %val_2:_(<32 x s8>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: %phi:_(<32 x s8>) = G_PHI %val_2(<32 x s8>), %bb.1, %val_1(<32 x s8>), %bb.0
; CHECK-NEXT: %one:_(s8) = G_CONSTANT i8 1
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %one(s8)
; CHECK-NEXT: %extract:_(s8) = G_EXTRACT_VECTOR_ELT %phi(<32 x s8>), [[SEXT]](s64)
; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s8>) = G_PHI [[BUILD_VECTOR1]](<16 x s8>), %bb.1, [[BUILD_VECTOR]](<16 x s8>), %bb.0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: %extract:_(s8) = G_EXTRACT_VECTOR_ELT [[PHI]](<16 x s8>), [[C1]](s64)
; CHECK-NEXT: $b0 = COPY %extract(s8)
; CHECK-NEXT: RET_ReallyLR implicit $b0
bb.0:

View File

@@ -39,9 +39,6 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i64_0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i64_2
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i64_c
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v32i8_0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v32i8_2
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v32i8_c
define <2 x double> @insert_v2f64_0(<2 x double> %a, double %b, i32 %c) {
; CHECK-LABEL: insert_v2f64_0:
@@ -1670,16 +1667,36 @@ entry:
}
define i8 @extract_v32i8_c(<32 x i8> %a, i32 %c) {
; CHECK-LABEL: extract_v32i8_c:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: and x8, x0, #0x1f
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldrb w0, [x9, x8]
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
; CHECK-SD-LABEL: extract_v32i8_c:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
; CHECK-SD-NEXT: and x8, x0, #0x1f
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldrb w0, [x9, x8]
; CHECK-SD-NEXT: add sp, sp, #32
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: extract_v32i8_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-GI-NEXT: sub x9, sp, #48
; CHECK-GI-NEXT: mov x29, sp
; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0
; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
; CHECK-GI-NEXT: .cfi_offset w30, -8
; CHECK-GI-NEXT: .cfi_offset w29, -16
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: stp q0, q1, [sp]
; CHECK-GI-NEXT: mov x10, sp
; CHECK-GI-NEXT: and x8, x8, #0x1f
; CHECK-GI-NEXT: lsl x9, x8, #1
; CHECK-GI-NEXT: sub x8, x9, x8
; CHECK-GI-NEXT: ldrb w0, [x10, x8]
; CHECK-GI-NEXT: mov sp, x29
; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-GI-NEXT: ret
entry:
%d = extractelement <32 x i8> %a, i32 %c
ret i8 %d