mirror of
https://github.com/intel/llvm.git
synced 2026-01-19 09:31:59 +08:00
[LLVM][CodeGen][SVE] Add lowering for 3-way VECTOR_(DE)INTERLEAVE operations. (#162502)
SVE has no in-register instructions to do this, but we can perform the operation through memory by using ld3/st3.
This commit is contained in:
@@ -30606,6 +30606,43 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op,
|
||||
assert(OpVT.isScalableVector() &&
|
||||
"Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
|
||||
|
||||
if (Op->getNumOperands() == 3) {
|
||||
// aarch64_sve_ld3 only supports packed datatypes.
|
||||
EVT PackedVT = getPackedSVEVectorVT(OpVT.getVectorElementCount());
|
||||
Align Alignment = DAG.getReducedAlign(PackedVT, /*UseABI=*/false);
|
||||
SDValue StackPtr =
|
||||
DAG.CreateStackTemporary(PackedVT.getStoreSize() * 3, Alignment);
|
||||
|
||||
// Write out unmodified operands.
|
||||
SmallVector<SDValue, 3> Chains;
|
||||
for (unsigned I = 0; I < 3; ++I) {
|
||||
SDValue Ptr =
|
||||
DAG.getMemBasePlusOffset(StackPtr, PackedVT.getStoreSize() * I, DL);
|
||||
SDValue V = getSVESafeBitCast(PackedVT, Op.getOperand(I), DAG);
|
||||
Chains.push_back(
|
||||
DAG.getStore(DAG.getEntryNode(), DL, V, Ptr, MachinePointerInfo()));
|
||||
}
|
||||
|
||||
Intrinsic::ID IntID = Intrinsic::aarch64_sve_ld3_sret;
|
||||
EVT PredVT = PackedVT.changeVectorElementType(MVT::i1);
|
||||
|
||||
SmallVector<SDValue, 7> Ops;
|
||||
Ops.push_back(DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains));
|
||||
Ops.push_back(DAG.getTargetConstant(IntID, DL, MVT::i64));
|
||||
Ops.push_back(DAG.getConstant(1, DL, PredVT));
|
||||
Ops.push_back(StackPtr);
|
||||
|
||||
// Read back and deinterleave data.
|
||||
SDVTList VTs = DAG.getVTList(PackedVT, PackedVT, PackedVT, MVT::Other);
|
||||
SDValue LD3 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
|
||||
|
||||
SmallVector<SDValue, 3> Results;
|
||||
Results.push_back(getSVESafeBitCast(OpVT, LD3.getValue(0), DAG));
|
||||
Results.push_back(getSVESafeBitCast(OpVT, LD3.getValue(1), DAG));
|
||||
Results.push_back(getSVESafeBitCast(OpVT, LD3.getValue(2), DAG));
|
||||
return DAG.getMergeValues(Results, DL);
|
||||
}
|
||||
|
||||
// Are multi-register uzp instructions available?
|
||||
if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
|
||||
OpVT.getVectorElementType() != MVT::i1) {
|
||||
@@ -30647,6 +30684,42 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op,
|
||||
assert(OpVT.isScalableVector() &&
|
||||
"Expected scalable vector in LowerVECTOR_INTERLEAVE.");
|
||||
|
||||
if (Op->getNumOperands() == 3) {
|
||||
// aarch64_sve_st3 only supports packed datatypes.
|
||||
EVT PackedVT = getPackedSVEVectorVT(OpVT.getVectorElementCount());
|
||||
SmallVector<SDValue, 3> InVecs;
|
||||
for (SDValue V : Op->ops())
|
||||
InVecs.push_back(getSVESafeBitCast(PackedVT, V, DAG));
|
||||
|
||||
Align Alignment = DAG.getReducedAlign(PackedVT, /*UseABI=*/false);
|
||||
SDValue StackPtr =
|
||||
DAG.CreateStackTemporary(PackedVT.getStoreSize() * 3, Alignment);
|
||||
|
||||
Intrinsic::ID IntID = Intrinsic::aarch64_sve_st3;
|
||||
EVT PredVT = PackedVT.changeVectorElementType(MVT::i1);
|
||||
|
||||
SmallVector<SDValue, 7> Ops;
|
||||
Ops.push_back(DAG.getEntryNode());
|
||||
Ops.push_back(DAG.getTargetConstant(IntID, DL, MVT::i64));
|
||||
Ops.append(InVecs);
|
||||
Ops.push_back(DAG.getConstant(1, DL, PredVT));
|
||||
Ops.push_back(StackPtr);
|
||||
|
||||
// Interleave operands and store.
|
||||
SDValue Chain = DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops);
|
||||
|
||||
// Read back the interleaved data.
|
||||
SmallVector<SDValue, 3> Results;
|
||||
for (unsigned I = 0; I < 3; ++I) {
|
||||
SDValue Ptr =
|
||||
DAG.getMemBasePlusOffset(StackPtr, PackedVT.getStoreSize() * I, DL);
|
||||
SDValue L = DAG.getLoad(PackedVT, DL, Chain, Ptr, MachinePointerInfo());
|
||||
Results.push_back(getSVESafeBitCast(OpVT, L, DAG));
|
||||
}
|
||||
|
||||
return DAG.getMergeValues(Results, DL);
|
||||
}
|
||||
|
||||
// Are multi-register zip instructions available?
|
||||
if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
|
||||
OpVT.getVectorElementType() != MVT::i1) {
|
||||
|
||||
@@ -231,6 +231,274 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
|
||||
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_nxv6f16(<vscale x 6 x half> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv6f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: uunpkhi z1.s, z0.h
|
||||
; CHECK-NEXT: uunpklo z0.s, z0.h
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z2.d, z0.s
|
||||
; CHECK-NEXT: uunpklo z0.d, z0.s
|
||||
; CHECK-NEXT: str z1, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z2, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>} @llvm.vector.deinterleave3.nxv6f16(<vscale x 6 x half> %vec)
|
||||
ret {<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_nxv12f16(<vscale x 12 x half> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv12f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: uunpklo z1.s, z1.h
|
||||
; CHECK-NEXT: uunpkhi z2.s, z0.h
|
||||
; CHECK-NEXT: uunpklo z0.s, z0.h
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: str z1, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z2, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave3.nxv12f16(<vscale x 12 x half> %vec)
|
||||
ret {<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_nxv24f16(<vscale x 24 x half> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv24f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: str z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave3.nxv24f16(<vscale x 24 x half> %vec)
|
||||
ret {<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32_nxv6f32(<vscale x 6 x float> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv2f32_nxv6f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z2.d, z0.s
|
||||
; CHECK-NEXT: uunpklo z0.d, z0.s
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: str z1, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z2, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>} @llvm.vector.deinterleave3.nxv6f32(<vscale x 6 x float> %vec)
|
||||
ret {<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @vector_deinterleave_nxv4f32_nxv12f32(<vscale x 12 x float> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv4f32_nxv12f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: str z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave3.nxv12f32(<vscale x 12 x float> %vec)
|
||||
ret {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f64_nxv6f64(<vscale x 6 x double> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv6f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: str z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave3.nxv6f64(<vscale x 6 x double> %vec)
|
||||
ret {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @vector_deinterleave_nxv2bf16_nxv6bf16(<vscale x 6 x bfloat> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv2bf16_nxv6bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: uunpkhi z1.s, z0.h
|
||||
; CHECK-NEXT: uunpklo z0.s, z0.h
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z2.d, z0.s
|
||||
; CHECK-NEXT: uunpklo z0.d, z0.s
|
||||
; CHECK-NEXT: str z1, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z2, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @llvm.vector.deinterleave3.nxv6bf16(<vscale x 6 x bfloat> %vec)
|
||||
ret {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @vector_deinterleave_nxv4bf16_nxv12bf16(<vscale x 12 x bfloat> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv4bf16_nxv12bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: uunpklo z1.s, z1.h
|
||||
; CHECK-NEXT: uunpkhi z2.s, z0.h
|
||||
; CHECK-NEXT: uunpklo z0.s, z0.h
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: str z1, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z2, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @llvm.vector.deinterleave3.nxv12bf16(<vscale x 12 x bfloat> %vec)
|
||||
ret {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_nxv8bf16_nxv24bf16(<vscale x 24 x bfloat> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv8bf16_nxv24bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: str z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave3.nxv24bf16(<vscale x 24 x bfloat> %vec)
|
||||
ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %retval
|
||||
}
|
||||
|
||||
; Integers
|
||||
|
||||
define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv48i8(<vscale x 48 x i8> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv48i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: str z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave3.nxv48i8(<vscale x 48 x i8> %vec)
|
||||
ret {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv24i16(<vscale x 24 x i16> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv24i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: str z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave3.nxv24i16(<vscale x 24 x i16> %vec)
|
||||
ret {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxvv12i32(<vscale x 12 x i32> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv4i32_nxvv12i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: str z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave3.nxv12i32(<vscale x 12 x i32> %vec)
|
||||
ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv6i64(<vscale x 6 x i64> %vec) {
|
||||
; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv6i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: str z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: str z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp]
|
||||
; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave3.nxv6i64(<vscale x 6 x i64> %vec)
|
||||
ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
|
||||
}
|
||||
|
||||
define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv64i8(<vscale x 64 x i8> %vec) {
|
||||
; SVE-LABEL: vector_deinterleave_nxv16i8_nxv64i8:
|
||||
; SVE: // %bb.0:
|
||||
@@ -599,31 +867,3 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @vector_deinterleave_nxv2i32_nxv
|
||||
%retval = call {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %vec)
|
||||
ret {<vscale x 2 x i32>, <vscale x 2 x i32>} %retval
|
||||
}
|
||||
|
||||
; Floating declarations
|
||||
declare {<vscale x 2 x half>,<vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half>)
|
||||
declare {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half>)
|
||||
declare {<vscale x 2 x float>, <vscale x 2 x float>} @llvm.vector.deinterleave2.nxv4f32(<vscale x 4 x float>)
|
||||
declare {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half>)
|
||||
declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float>)
|
||||
declare {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
|
||||
|
||||
; Integer declarations
|
||||
declare {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
|
||||
declare {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>)
|
||||
declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
|
||||
declare {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64>)
|
||||
|
||||
; Predicated declarations
|
||||
declare {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1>)
|
||||
declare {<vscale x 8 x i1>, <vscale x 8 x i1>} @llvm.vector.deinterleave2.nxv16i1(<vscale x 16 x i1>)
|
||||
declare {<vscale x 4 x i1>, <vscale x 4 x i1>} @llvm.vector.deinterleave2.nxv8i1(<vscale x 8 x i1>)
|
||||
declare {<vscale x 2 x i1>, <vscale x 2 x i1>} @llvm.vector.deinterleave2.nxv4i1(<vscale x 4 x i1>)
|
||||
|
||||
; Illegal size type
|
||||
declare {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64>)
|
||||
declare {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv16i64(<vscale x 16 x i64>)
|
||||
|
||||
declare {<vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave2.nxv16i8(<vscale x 16 x i8>)
|
||||
declare {<vscale x 4 x i16>, <vscale x 4 x i16>} @llvm.vector.deinterleave2.nxv8i16(<vscale x 8 x i16>)
|
||||
declare {<vscale x 2 x i32>, <vscale x 2 x i32>} @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32>)
|
||||
|
||||
@@ -221,6 +221,318 @@ define <vscale x 4 x i64> @interleave2_nxv4i64(<vscale x 2 x i64> %vec0, <vscale
|
||||
ret <vscale x 4 x i64> %retval
|
||||
}
|
||||
|
||||
define <vscale x 6 x half> @interleave3_nxv6f16(<vscale x 2 x half> %vec0, <vscale x 2 x half> %vec1, <vscale x 2 x half> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv6f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z2, [sp]
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||
; CHECK-NEXT: uzp1 z1.s, z2.s, z1.s
|
||||
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 6 x half> @llvm.vector.interleave3.nxv6f16(<vscale x 2 x half> %vec0, <vscale x 2 x half> %vec1, <vscale x 2 x half> %vec2)
|
||||
ret <vscale x 6 x half> %retval
|
||||
}
|
||||
|
||||
define <vscale x 12 x half> @interleave3_nxv12f16(<vscale x 4 x half> %vec0, <vscale x 4 x half> %vec1, <vscale x 4 x half> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv12f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-5
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x28, 0x1e, 0x22 // sp + 16 + 40 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: addpl x8, sp, #4
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z1, [sp]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
|
||||
; CHECK-NEXT: st1h { z2.s }, p0, [x8, #7, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: ldr z1, [sp, #4, mul vl]
|
||||
; CHECK-NEXT: ldr z0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #5
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 12 x half> @llvm.vector.interleave3.nxv12f16(<vscale x 4 x half> %vec0, <vscale x 4 x half> %vec1, <vscale x 4 x half> %vec2)
|
||||
ret <vscale x 12 x half> %retval
|
||||
}
|
||||
|
||||
define <vscale x 24 x half> @interleave3_nxv24f16(<vscale x 8 x half> %vec0, <vscale x 8 x half> %vec1, <vscale x 8 x half> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv24f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp]
|
||||
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 24 x half> @llvm.vector.interleave3.nxv24f16(<vscale x 8 x half> %vec0, <vscale x 8 x half> %vec1, <vscale x 8 x half> %vec2)
|
||||
ret <vscale x 24 x half> %retval
|
||||
}
|
||||
|
||||
define <vscale x 6 x float> @interleave3_nxv6f32(<vscale x 2 x float> %vec0, <vscale x 2 x float> %vec1, <vscale x 2 x float> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv6f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-5
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x28, 0x1e, 0x22 // sp + 16 + 40 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: addpl x8, sp, #4
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z1, [sp]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
|
||||
; CHECK-NEXT: st1w { z2.d }, p0, [x8, #7, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: ldr z1, [sp, #4, mul vl]
|
||||
; CHECK-NEXT: ldr z0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #5
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 6 x float> @llvm.vector.interleave3.nxv6f32(<vscale x 2 x float> %vec0, <vscale x 2 x float> %vec1, <vscale x 2 x float> %vec2)
|
||||
ret <vscale x 6 x float> %retval
|
||||
}
|
||||
|
||||
define <vscale x 12 x float> @interleave3_nxv12f32(<vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1, <vscale x 4 x float> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv12f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp]
|
||||
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 12 x float> @llvm.vector.interleave3.nxv12f32(<vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1, <vscale x 4 x float> %vec2)
|
||||
ret <vscale x 12 x float> %retval
|
||||
}
|
||||
|
||||
define <vscale x 6 x double> @interleave3_nxv6f64(<vscale x 2 x double> %vec0, <vscale x 2 x double> %vec1, <vscale x 2 x double> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv6f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp]
|
||||
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 6 x double>@llvm.vector.interleave3.nxv6f64(<vscale x 2 x double> %vec0, <vscale x 2 x double> %vec1, <vscale x 2 x double> %vec2)
|
||||
ret <vscale x 6 x double> %retval
|
||||
}
|
||||
|
||||
define <vscale x 6 x bfloat> @interleave3_nxv6bf16(<vscale x 2 x bfloat> %vec0, <vscale x 2 x bfloat> %vec1, <vscale x 2 x bfloat> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv6bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z2, [sp]
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||
; CHECK-NEXT: uzp1 z1.s, z2.s, z1.s
|
||||
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 6 x bfloat> @llvm.vector.interleave3.nxv6bf16(<vscale x 2 x bfloat> %vec0, <vscale x 2 x bfloat> %vec1, <vscale x 2 x bfloat> %vec2)
|
||||
ret <vscale x 6 x bfloat> %retval
|
||||
}
|
||||
|
||||
define <vscale x 12 x bfloat> @interleave3_nxv12bf16(<vscale x 4 x bfloat> %vec0, <vscale x 4 x bfloat> %vec1, <vscale x 4 x bfloat> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv12bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-5
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x28, 0x1e, 0x22 // sp + 16 + 40 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: addpl x8, sp, #4
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z1, [sp]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
|
||||
; CHECK-NEXT: st1h { z2.s }, p0, [x8, #7, mul vl]
|
||||
; CHECK-NEXT: str z0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: ldr z1, [sp, #4, mul vl]
|
||||
; CHECK-NEXT: ldr z0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #5
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 12 x bfloat> @llvm.vector.interleave3.nxv12bf16(<vscale x 4 x bfloat> %vec0, <vscale x 4 x bfloat> %vec1, <vscale x 4 x bfloat> %vec2)
|
||||
ret <vscale x 12 x bfloat> %retval
|
||||
}
|
||||
|
||||
define <vscale x 24 x bfloat> @interleave3_nxv24bf16(<vscale x 8 x bfloat> %vec0, <vscale x 8 x bfloat> %vec1, <vscale x 8 x bfloat> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv24bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp]
|
||||
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 24 x bfloat> @llvm.vector.interleave3.nxv24bf16(<vscale x 8 x bfloat> %vec0, <vscale x 8 x bfloat> %vec1, <vscale x 8 x bfloat> %vec2)
|
||||
ret <vscale x 24 x bfloat> %retval
|
||||
}
|
||||
|
||||
; Integers
|
||||
|
||||
define <vscale x 48 x i8> @interleave3_nxv48i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv48i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp]
|
||||
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 48 x i8> @llvm.vector.interleave3.nxv48i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2)
|
||||
ret <vscale x 48 x i8> %retval
|
||||
}
|
||||
|
||||
define <vscale x 24 x i16> @interleave3_nxv24i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv24i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp]
|
||||
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 24 x i16> @llvm.vector.interleave3.nxv24i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2)
|
||||
ret <vscale x 24 x i16> %retval
|
||||
}
|
||||
|
||||
define <vscale x 12 x i32> @interleave3_nxv12i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv12i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp]
|
||||
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 12 x i32> @llvm.vector.interleave3.nxv12i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2)
|
||||
ret <vscale x 12 x i32> %retval
|
||||
}
|
||||
|
||||
define <vscale x 6 x i64> @interleave3_nxv6i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2) {
|
||||
; CHECK-LABEL: interleave3_nxv6i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
|
||||
; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [sp]
|
||||
; CHECK-NEXT: ldr z0, [sp]
|
||||
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ldr z2, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 6 x i64> @llvm.vector.interleave3.nxv6i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2)
|
||||
ret <vscale x 6 x i64> %retval
|
||||
}
|
||||
|
||||
define <vscale x 64 x i8> @interleave4_nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3) {
|
||||
; SVE-LABEL: interleave4_nxv16i8:
|
||||
; SVE: // %bb.0:
|
||||
|
||||
Reference in New Issue
Block a user