Updated synctools/tablegen/AArch64 files to LLVM 14.0.5.

2022-09-30 15:21:50 +01:00 · 2022-09-30 15:21:50 +01:00 · e385fc5267
parent 62fee65c54
commit e385fc5267
39 changed files with 26048 additions and 3540 deletions
--- a/suite/synctools/tablegen/AArch64/AArch64.td
+++ b/suite/synctools/tablegen/AArch64/AArch64.td
--- a/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td
+++ b/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td
@ -1,9 +1,8 @@
 //=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -11,17 +10,19 @@
 //
 //===----------------------------------------------------------------------===//

-/// CCIfAlign - Match of the original alignment of the arg
-class CCIfAlign<string Align, CCAction A> :
-  CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
 /// CCIfBigEndian - Match only if we're in big endian mode.
 class CCIfBigEndian<CCAction A> :
  CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;

+class CCIfILP32<CCAction A> :
+  CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>;
+
+
 //===----------------------------------------------------------------------===//
 // ARM AAPCS64 Calling Convention
 //===----------------------------------------------------------------------===//

+let Entry = 1 in
 def CC_AArch64_AAPCS : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,
  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@ -29,13 +30,29 @@ def CC_AArch64_AAPCS : CallingConv<[

  // Big endian vectors must be passed as if they were 1-element vectors so that
  // their lanes are in a consistent order.
-  CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
+  CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v4bf16, v8i8],
                         CCBitConvertToType<f64>>>,
-  CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
+  CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
                         CCBitConvertToType<f128>>>,

-  // An SRet is passed in X8, not X0 like a normal pointer parameter.
-  CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+  // In AAPCS, an SRet is passed in X8, not X0 like a normal pointer parameter.
+  // However, on windows, in some circumstances, the SRet is passed in X0 or X1
+  // instead.  The presence of the inreg attribute indicates that SRet is
+  // passed in the alternative register (X0 or X1), not X8:
+  // - X0 for non-instance methods.
+  // - X1 for instance methods.
+
+  // The "sret" attribute identifies indirect returns.
+  // The "inreg" attribute identifies non-aggregate types.
+  // The position of the "sret" attribute identifies instance/non-instance
+  // methods.
+  // "sret" on argument 0 means non-instance methods.
+  // "sret" on argument 1 means instance methods.
+
+  CCIfInReg<CCIfType<[i64],
+    CCIfSRet<CCIfType<[i64], CCAssignToReg<[X0, X1]>>>>>,
+
+  CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,

  // Put ByVal arguments directly on the stack. Minimum size and alignment of a
  // slot is 64-bit.
@ -47,18 +64,33 @@ def CC_AArch64_AAPCS : CallingConv<[
  CCIfNest<CCAssignToReg<[X18]>>,

  // Pass SwiftSelf in a callee saved register.
-  CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+  CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,

  // A SwiftError is passed in X21.
-  CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
+
+  // Pass SwiftAsync in an otherwise callee saved register so that it will be
+  // preserved for normal function calls.
+  CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,

  CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,

+  CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+            nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+           CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
+  CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+            nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+           CCPassIndirect<i64>>,
+
+  CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+           CCAssignToReg<[P0, P1, P2, P3]>>,
+  CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+           CCPassIndirect<i64>>,
+
  // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
  // up to eight each of GPR and FPR.
  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
  // i128 is split to two i64s, we can't fit half to register X7.
  CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
                                                    [X0, X1, X3, X5]>>>,
@ -66,129 +98,145 @@ def CC_AArch64_AAPCS : CallingConv<[
  // i128 is split to two i64s, and its stack alignment is 16 bytes.
  CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,

-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
-           CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                   [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+           CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
           CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,

  // If more than will fit in registers, pass them on the stack instead.
-  CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>,
+  CCIfType<[i1, i8, i16, f16, bf16], CCAssignToStack<8, 8>>,
  CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
-  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
+  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16],
           CCAssignToStack<8, 8>>,
-  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
           CCAssignToStack<16, 16>>
 ]>;

+let Entry = 1 in
 def RetCC_AArch64_AAPCS : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,
  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
  CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,

-  CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+  CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,

  // Big endian vectors must be passed as if they were 1-element vectors so that
  // their lanes are in a consistent order.
-  CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
+  CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v4bf16, v8i8],
                         CCBitConvertToType<f64>>>,
-  CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
+  CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
                         CCBitConvertToType<f128>>>,

  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
-      CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                              [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
-      CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+      CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+      CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+  CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+            nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+           CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
+
+  CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+           CCAssignToReg<[P0, P1, P2, P3]>>
 ]>;

 // Vararg functions on windows pass floats in integer registers
+let Entry = 1 in
 def CC_AArch64_Win64_VarArg : CallingConv<[
-  CCIfType<[f16, f32],    CCPromoteToType<f64>>,
+  CCIfType<[f16, bf16], CCBitConvertToType<i16>>,
+  CCIfType<[f32], CCBitConvertToType<i32>>,
  CCIfType<[f64], CCBitConvertToType<i64>>,
  CCDelegateTo<CC_AArch64_AAPCS>
 ]>;

+// Windows Control Flow Guard checks take a single argument (the target function
+// address) and have no return value.
+let Entry = 1 in
+def CC_AArch64_Win64_CFGuard_Check : CallingConv<[
+  CCIfType<[i64], CCAssignToReg<[X15]>>
+]>;
+

 // Darwin uses a calling convention which differs in only two ways
 // from the standard one at this level:
 //     + i128s (i.e. split i64s) don't need even registers.
 //     + Stack slots are sized as needed rather than being at least 64-bit.
+let Entry = 1 in
 def CC_AArch64_DarwinPCS : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,
  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,

  // An SRet is passed in X8, not X0 like a normal pointer parameter.
-  CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+  CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,

  // Put ByVal arguments directly on the stack. Minimum size and alignment of a
  // slot is 64-bit.
  CCIfByVal<CCPassByVal<8, 8>>,

  // Pass SwiftSelf in a callee saved register.
-  CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+  CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,

  // A SwiftError is passed in X21.
-  CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
+
+  // Pass SwiftAsync in an otherwise callee saved register so that it will be
+  // preserved for normal function calls.
+  CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,

  CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,

  // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
  // up to eight each of GPR and FPR.
  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
  // i128 is split to two i64s, we can't fit half to register X7.
  CCIfType<[i64],
-           CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6],
-                                             [W0, W1, W2, W3, W4, W5, W6]>>>,
+           CCIfSplit<CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6]>>>,
  // i128 is split to two i64s, and its stack alignment is 16 bytes.
  CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,

-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
-           CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                   [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+           CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
           CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,

  // If more than will fit in registers, pass them on the stack instead.
  CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
-  CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
+  CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16 || ValVT == MVT::bf16",
+  CCAssignToStack<2, 2>>,
  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
-  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
+
+  // Re-demote pointers to 32-bits so we don't end up storing 64-bit
+  // values and clobbering neighbouring stack locations. Not very pretty.
+  CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+  CCIfPtr<CCIfILP32<CCAssignToStack<4, 4>>>,
+
+  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16],
           CCAssignToStack<8, 8>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
           CCAssignToStack<16, 16>>
 ]>;

+let Entry = 1 in
 def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,
  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@ -198,41 +246,62 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[

  // Handle all scalar types as either i64 or f64.
  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
-  CCIfType<[f16, f32],     CCPromoteToType<f64>>,
+  CCIfType<[f16, bf16, f32], CCPromoteToType<f64>>,

  // Everything is on the stack.
  // i128 is split to two i64s, and its stack alignment is 16 bytes.
  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
-  CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+  CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
           CCAssignToStack<8, 8>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
           CCAssignToStack<16, 16>>
 ]>;

+// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the
+// same as the normal Darwin VarArgs handling.
+let Entry = 1 in
+def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
+  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+
+  // Handle all scalar types as either i32 or f32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[f16, bf16], CCPromoteToType<f32>>,
+
+  // Everything is on the stack.
+  // i128 is split to two i64s, and its stack alignment is 16 bytes.
+  CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+  CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
+           CCAssignToStack<8, 8>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
+           CCAssignToStack<16, 16>>
+]>;
+
+
 // The WebKit_JS calling convention only passes the first argument (the callee)
 // in register and the remaining arguments on stack. We allow 32bit stack slots,
 // so that WebKit can write partial values in the stack and define the other
 // 32bit quantity as undef.
+let Entry = 1 in
 def CC_AArch64_WebKit_JS : CallingConv<[
  // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
+  CCIfType<[i32], CCAssignToReg<[W0]>>,
+  CCIfType<[i64], CCAssignToReg<[X0]>>,

  // Pass the remaining arguments on the stack instead.
  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
  CCIfType<[i64, f64], CCAssignToStack<8, 8>>
 ]>;

+let Entry = 1 in
 def RetCC_AArch64_WebKit_JS : CallingConv<[
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
+  CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>
 ]>;

 //===----------------------------------------------------------------------===//
@ -257,6 +326,7 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
 // The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
 // register mapping".

+let Entry = 1 in
 def CC_AArch64_GHC : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,

@ -275,6 +345,12 @@ def CC_AArch64_GHC : CallingConv<[
  CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
 ]>;

+// The order of the callee-saves in this file is important, because the
+// FrameLowering code will use this order to determine the layout the
+// callee-save area in the stack frame. As can be observed below, Darwin
+// requires the frame-record (LR, FP) to be at the top the callee-save area,
+// whereas for other platforms they are at the bottom.
+
 // FIXME: LR is only callee-saved in the sense that *we* preserve it and are
 // presumably a callee to someone. External functions may not do so, but this
 // is currently safe since BL has LR as an implicit-def and what happens after a
@ -283,11 +359,45 @@ def CC_AArch64_GHC : CallingConv<[
 // It would be better to model its preservation semantics properly (create a
 // vreg on entry, use it in RET & tail call generation; make that vreg def if we
 // end up saving LR as part of a call frame). Watch this space...
-def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
-                                           X23, X24, X25, X26, X27, X28,
+def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+                                           X25, X26, X27, X28, LR, FP,
                                           D8,  D9,  D10, D11,
                                           D12, D13, D14, D15)>;

+// A variant for treating X18 as callee saved, when interfacing with
+// code that needs X18 to be preserved.
+def CSR_AArch64_AAPCS_X18 : CalleeSavedRegs<(add X18, CSR_AArch64_AAPCS)>;
+
+// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
+// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
+// and not (LR,FP) pairs.
+def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+                                               X25, X26, X27, X28, FP, LR,
+                                               D8, D9, D10, D11,
+                                               D12, D13, D14, D15)>;
+
+// The Control Flow Guard check call uses a custom calling convention that also
+// preserves X0-X8 and Q0-Q7.
+def CSR_Win_AArch64_CFGuard_Check : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS,
+                                               (sequence "X%u", 0, 8),
+                                               (sequence "Q%u", 0, 7))>;
+
+// AArch64 PCS for vector functions (VPCS)
+// must (additionally) preserve full Q8-Q23 registers
+def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+                                          X25, X26, X27, X28, LR, FP,
+                                          (sequence "Q%u", 8, 23))>;
+
+// Functions taking SVE arguments or returning an SVE type
+// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
+def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
+                                                 (sequence "P%u", 4, 15),
+                                                 X19, X20, X21, X22, X23, X24,
+                                                 X25, X26, X27, X28, LR, FP)>;
+
+def CSR_AArch64_AAPCS_SwiftTail
+    : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X20, X22)>;
+
 // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
 // 'this' and the pointer return value are both passed in X0 in these cases,
 // this can be partially modelled by treating X0 as a callee-saved register;
@ -301,32 +411,6 @@ def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
 def CSR_AArch64_AAPCS_SwiftError
    : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;

-// The function used by Darwin to obtain the address of a thread-local variable
-// guarantees more than a normal AAPCS function. x16 and x17 are used on the
-// fast path for calculation, but other registers except X0 (argument/return)
-// and LR (it is a call, after all) are preserved.
-def CSR_AArch64_TLS_Darwin
-    : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
-                           FP,
-                           (sequence "Q%u", 0, 31))>;
-
-// We can only handle a register pair with adjacent registers, the register pair
-// should belong to the same class as well. Since the access function on the
-// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
-// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
-def CSR_AArch64_CXX_TLS_Darwin
-    : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
-                           (sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
-                           (sequence "D%u", 0, 31))>;
-
-// CSRs that are handled by prologue, epilogue.
-def CSR_AArch64_CXX_TLS_Darwin_PE
-    : CalleeSavedRegs<(add LR, FP)>;
-
-// CSRs that are handled explicitly via copies.
-def CSR_AArch64_CXX_TLS_Darwin_ViaCopy
-    : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>;
-
 // The ELF stub used for TLS-descriptor access saves every feasible
 // register. Only X0 and LR are clobbered.
 def CSR_AArch64_TLS_ELF
@ -350,17 +434,67 @@ def CSR_AArch64_StackProbe_Windows
                           (sequence "X%u", 18, 28), FP, SP,
                           (sequence "Q%u", 0, 31))>;

+// Darwin variants of AAPCS.
+// Darwin puts the frame-record at the top of the callee-save area.
+def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+                                                X23, X24, X25, X26, X27, X28,
+                                                D8,  D9,  D10, D11,
+                                                D12, D13, D14, D15)>;
+
+def CSR_Darwin_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21,
+                                                 X22, X23, X24, X25, X26, X27,
+                                                 X28, (sequence "Q%u", 8, 23))>;
+def CSR_Darwin_AArch64_AAPCS_ThisReturn
+    : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, X0)>;
+
+def CSR_Darwin_AArch64_AAPCS_SwiftError
+    : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>;
+
+def CSR_Darwin_AArch64_AAPCS_SwiftTail
+    : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X20, X22)>;
+
+// The function used by Darwin to obtain the address of a thread-local variable
+// guarantees more than a normal AAPCS function. x16 and x17 are used on the
+// fast path for calculation, but other registers except X0 (argument/return)
+// and LR (it is a call, after all) are preserved.
+def CSR_Darwin_AArch64_TLS
+    : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
+                           FP,
+                           (sequence "Q%u", 0, 31))>;
+
+// We can only handle a register pair with adjacent registers, the register pair
+// should belong to the same class as well. Since the access function on the
+// fast path calls a function that follows CSR_Darwin_AArch64_TLS,
+// CSR_Darwin_AArch64_CXX_TLS should be a subset of CSR_Darwin_AArch64_TLS.
+def CSR_Darwin_AArch64_CXX_TLS
+    : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS,
+                           (sub (sequence "X%u", 1, 28), X9, X15, X16, X17, X18, X19),
+                           (sequence "D%u", 0, 31))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_Darwin_AArch64_CXX_TLS_PE
+    : CalleeSavedRegs<(add LR, FP)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_Darwin_AArch64_CXX_TLS_ViaCopy
+    : CalleeSavedRegs<(sub CSR_Darwin_AArch64_CXX_TLS, LR, FP)>;
+
+def CSR_Darwin_AArch64_RT_MostRegs
+    : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sequence "X%u", 9, 15))>;
+
 // Variants of the standard calling conventions for shadow call stack.
 // These all preserve x18 in addition to any other registers.
 def CSR_AArch64_NoRegs_SCS
    : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>;
 def CSR_AArch64_AllRegs_SCS
    : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>;
-def CSR_AArch64_CXX_TLS_Darwin_SCS
-    : CalleeSavedRegs<(add CSR_AArch64_CXX_TLS_Darwin, X18)>;
 def CSR_AArch64_AAPCS_SwiftError_SCS
    : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
 def CSR_AArch64_RT_MostRegs_SCS
    : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
+def CSR_AArch64_AAVPCS_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
+def CSR_AArch64_SVE_AAPCS_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>;
 def CSR_AArch64_AAPCS_SCS
    : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
--- a/suite/synctools/tablegen/AArch64/AArch64Combine.td
+++ b/suite/synctools/tablegen/AArch64/AArch64Combine.td
@ -0,0 +1,233 @@
+//=- AArch64.td - Define AArch64 Combine Rules ---------------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/GlobalISel/Combine.td"
+
+def fconstant_to_constant : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_FCONSTANT):$root,
+         [{ return matchFConstantToConstant(*${root}, MRI); }]),
+  (apply [{ applyFConstantToConstant(*${root}); }])>;
+
+def icmp_redundant_trunc_matchdata : GIDefMatchData<"Register">;
+def icmp_redundant_trunc : GICombineRule<
+  (defs root:$root, icmp_redundant_trunc_matchdata:$matchinfo),
+  (match (wip_match_opcode G_ICMP):$root,
+         [{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]),
+  (apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>;
+
+// AArch64-specific offset folding for G_GLOBAL_VALUE.
+def fold_global_offset_matchdata : GIDefMatchData<"std::pair<uint64_t, uint64_t>">;
+def fold_global_offset : GICombineRule<
+  (defs root:$root, fold_global_offset_matchdata:$matchinfo),
+  (match (wip_match_opcode G_GLOBAL_VALUE):$root,
+          [{ return matchFoldGlobalOffset(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{  return applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}])
+>;
+
+def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
+  "AArch64GenPreLegalizerCombinerHelper", [all_combines,
+                                           fconstant_to_constant,
+                                           icmp_redundant_trunc,
+                                           fold_global_offset]> {
+  let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
+  let StateClass = "AArch64PreLegalizerCombinerHelperState";
+  let AdditionalArguments = [];
+}
+
+def AArch64O0PreLegalizerCombinerHelper: GICombinerHelper<
+  "AArch64GenO0PreLegalizerCombinerHelper", [optnone_combines]> {
+  let DisableRuleOption = "aarch64O0prelegalizercombiner-disable-rule";
+  let StateClass = "AArch64O0PreLegalizerCombinerHelperState";
+  let AdditionalArguments = [];
+}
+
+// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a
+// target-specific opcode.
+def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">;
+
+def rev : GICombineRule<
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchREV(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def zip : GICombineRule<
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchZip(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def uzp : GICombineRule<
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchUZP(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def dup: GICombineRule <
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchDup(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def trn : GICombineRule<
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchTRN(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def ext: GICombineRule <
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchEXT(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyEXT(*${root}, ${matchinfo}); }])
+>;
+
+def shuf_to_ins_matchdata : GIDefMatchData<"std::tuple<Register, int, Register, int>">;
+def shuf_to_ins: GICombineRule <
+  (defs root:$root, shuf_to_ins_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchINS(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ return applyINS(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
+def vashr_vlshr_imm : GICombineRule<
+  (defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo),
+  (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+          [{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
+>;
+
+def form_duplane_matchdata :
+  GIDefMatchData<"std::pair<unsigned, int>">;
+def form_duplane : GICombineRule <
+  (defs root:$root, form_duplane_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+          [{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
+                                              form_duplane,
+                                              shuf_to_ins]>;
+
+def adjust_icmp_imm_matchdata :
+  GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
+def adjust_icmp_imm : GICombineRule <
+  (defs root:$root, adjust_icmp_imm_matchdata:$matchinfo),
+  (match (wip_match_opcode G_ICMP):$root,
+          [{ return matchAdjustICmpImmAndPred(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }])
+>;
+
+def swap_icmp_operands : GICombineRule <
+  (defs root:$root),
+  (match (wip_match_opcode G_ICMP):$root,
+          [{ return trySwapICmpOperands(*${root}, MRI); }]),
+  (apply [{ applySwapICmpOperands(*${root}, Observer); }])
+>;
+
+def icmp_lowering : GICombineGroup<[adjust_icmp_imm, swap_icmp_operands]>;
+
+def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
+def extractvecelt_pairwise_add : GICombineRule<
+  (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo),
+  (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
+          [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def mul_const_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder&, Register)>">;
+def mul_const : GICombineRule<
+  (defs root:$root, mul_const_matchdata:$matchinfo),
+  (match (wip_match_opcode G_MUL):$root,
+          [{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def build_vector_to_dup : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_BUILD_VECTOR):$root,
+          [{ return matchBuildVectorToDup(*${root}, MRI); }]),
+  (apply [{ return applyBuildVectorToDup(*${root}, MRI, B); }])
+>;
+
+def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
+
+def lower_vector_fcmp : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_FCMP):$root,
+    [{ return lowerVectorFCMP(*${root}, MRI, B); }]),
+  (apply [{}])>;
+
+def form_truncstore_matchdata : GIDefMatchData<"Register">;
+def form_truncstore : GICombineRule<
+  (defs root:$root, form_truncstore_matchdata:$matchinfo),
+  (match (wip_match_opcode G_STORE):$root,
+          [{ return matchFormTruncstore(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyFormTruncstore(*${root}, MRI, B, Observer, ${matchinfo}); }])
+>;
+
+def fold_merge_to_zext : GICombineRule<
+  (defs root:$d),
+  (match (wip_match_opcode G_MERGE_VALUES):$d,
+          [{ return matchFoldMergeToZext(*${d}, MRI); }]),
+  (apply [{ applyFoldMergeToZext(*${d}, MRI, B, Observer); }])
+>;
+
+def mutate_anyext_to_zext : GICombineRule<
+  (defs root:$d),
+  (match (wip_match_opcode G_ANYEXT):$d,
+          [{ return matchMutateAnyExtToZExt(*${d}, MRI); }]),
+  (apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }])
+>;
+
+def split_store_zero_128 : GICombineRule<
+  (defs root:$d),
+  (match (wip_match_opcode G_STORE):$d,
+          [{ return matchSplitStoreZero128(*${d}, MRI); }]),
+  (apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }])
+>;
+
+// Post-legalization combines which should happen at all optimization levels.
+// (E.g. ones that facilitate matching for the selector) For example, matching
+// pseudos.
+def AArch64PostLegalizerLoweringHelper
+    : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
+                       [shuffle_vector_lowering, vashr_vlshr_imm,
+                        icmp_lowering, build_vector_lowering,
+                        lower_vector_fcmp, form_truncstore]> {
+  let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
+}
+
+// Post-legalization combines which are primarily optimizations.
+def AArch64PostLegalizerCombinerHelper
+    : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
+                       [copy_prop, erase_undef_store, combines_for_extload,
+                        sext_trunc_sextload, mutate_anyext_to_zext,
+                        hoist_logic_op_with_same_opcode_hands,
+                        redundant_and, xor_of_and_with_same_reg,
+                        extractvecelt_pairwise_add, redundant_or,
+                        mul_const, redundant_sext_inreg,
+                        form_bitfield_extract, rotate_out_of_range,
+                        icmp_to_true_false_known_bits, merge_unmerge,
+                        select_combines, fold_merge_to_zext,
+                        constant_fold, identity_combines,
+                        ptr_add_immed_chain, overlapping_and,
+                        split_store_zero_128]> {
+  let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
+}
--- a/suite/synctools/tablegen/AArch64/AArch64GenRegisterBankInfo.def
+++ b/suite/synctools/tablegen/AArch64/AArch64GenRegisterBankInfo.def
@ -0,0 +1,275 @@
+//===- AArch64GenRegisterBankInfo.def ----------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines all the static objects used by AArch64RegisterBankInfo.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
+    /* StartIdx, Length, RegBank */
+    // 0: FPR 16-bit value.
+    {0, 16, AArch64::FPRRegBank},
+    // 1: FPR 32-bit value.
+    {0, 32, AArch64::FPRRegBank},
+    // 2: FPR 64-bit value.
+    {0, 64, AArch64::FPRRegBank},
+    // 3: FPR 128-bit value.
+    {0, 128, AArch64::FPRRegBank},
+    // 4: FPR 256-bit value.
+    {0, 256, AArch64::FPRRegBank},
+    // 5: FPR 512-bit value.
+    {0, 512, AArch64::FPRRegBank},
+    // 6: GPR 32-bit value.
+    {0, 32, AArch64::GPRRegBank},
+    // 7: GPR 64-bit value.
+    {0, 64, AArch64::GPRRegBank},
+    // 8: GPR 128-bit value.
+    {0, 128, AArch64::GPRRegBank},
+};
+
+// ValueMappings.
+RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
+    /* BreakDown, NumBreakDowns */
+    // 0: invalid
+    {nullptr, 0},
+    // 3-operands instructions (all binary operations should end up with one of
+    // those mapping).
+    // 1: FPR 16-bit value. <-- This must match First3OpsIdx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    // 4: FPR 32-bit value. <-- This must match First3OpsIdx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    // 7: FPR 64-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    // 10: FPR 128-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+    // 13: FPR 256-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+    // 16: FPR 512-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+    // 19: GPR 32-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    // 22: GPR 64-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+    // 25: GPR 128-bit value. <-- This must match Last3OpsIdx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1},
+    // Cross register bank copies.
+    // 28: FPR 16-bit value to GPR 16-bit. <-- This must match
+    //                                         FirstCrossRegCpyIdx.
+    // Note: This is the kind of copy we see with physical registers.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    // 30: FPR 32-bit value to GPR 32-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    // 32: FPR 64-bit value to GPR 64-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+    // 34: FPR 128-bit value to GPR 128-bit value (invalid)
+    {nullptr, 1},
+    {nullptr, 1},
+    // 36: FPR 256-bit value to GPR 256-bit value (invalid)
+    {nullptr, 1},
+    {nullptr, 1},
+    // 38: FPR 512-bit value to GPR 512-bit value (invalid)
+    {nullptr, 1},
+    {nullptr, 1},
+    // 40: GPR 32-bit value to FPR 32-bit value.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    // 42: GPR 64-bit value to FPR 64-bit value. <-- This must match
+    //                                               LastCrossRegCpyIdx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    // 44: FPExt: 16 to 32. <-- This must match FPExt16To32Idx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    // 46: FPExt: 16 to 32. <-- This must match FPExt16To64Idx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+    // 48: FPExt: 32 to 64. <-- This must match FPExt32To64Idx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+    // 50: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx.
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    // 52: Shift scalar with 64 bit shift imm
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+};
+
+bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,
+                                                 unsigned ValStartIdx,
+                                                 unsigned ValLength,
+                                                 const RegisterBank &RB) {
+  const PartialMapping &Map = PartMappings[Idx - PartialMappingIdx::PMI_Min];
+  return Map.StartIdx == ValStartIdx && Map.Length == ValLength &&
+         Map.RegBank == &RB;
+}
+
+bool AArch64GenRegisterBankInfo::checkValueMapImpl(unsigned Idx,
+                                                   unsigned FirstInBank,
+                                                   unsigned Size,
+                                                   unsigned Offset) {
+  unsigned PartialMapBaseIdx = Idx - PartialMappingIdx::PMI_Min;
+  const ValueMapping &Map =
+      AArch64GenRegisterBankInfo::getValueMapping((PartialMappingIdx)FirstInBank, Size)[Offset];
+  return Map.BreakDown == &PartMappings[PartialMapBaseIdx] &&
+         Map.NumBreakDowns == 1;
+}
+
+bool AArch64GenRegisterBankInfo::checkPartialMappingIdx(
+    PartialMappingIdx FirstAlias, PartialMappingIdx LastAlias,
+    ArrayRef<PartialMappingIdx> Order) {
+  if (Order.front() != FirstAlias)
+    return false;
+  if (Order.back() != LastAlias)
+    return false;
+  if (Order.front() > Order.back())
+    return false;
+
+  PartialMappingIdx Previous = Order.front();
+  bool First = true;
+  for (const auto &Current : Order) {
+    if (First) {
+      First = false;
+      continue;
+    }
+    if (Previous + 1 != Current)
+      return false;
+    Previous = Current;
+  }
+  return true;
+}
+
+unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
+                                                             unsigned Size) {
+  if (RBIdx == PMI_FirstGPR) {
+    if (Size <= 32)
+      return 0;
+    if (Size <= 64)
+      return 1;
+    if (Size <= 128)
+      return 2;
+    return -1;
+  }
+  if (RBIdx == PMI_FirstFPR) {
+    if (Size <= 16)
+      return 0;
+    if (Size <= 32)
+      return 1;
+    if (Size <= 64)
+      return 2;
+    if (Size <= 128)
+      return 3;
+    if (Size <= 256)
+      return 4;
+    if (Size <= 512)
+      return 5;
+    return -1;
+  }
+  return -1;
+}
+
+const RegisterBankInfo::ValueMapping *
+AArch64GenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx,
+                                            unsigned Size) {
+  assert(RBIdx != PartialMappingIdx::PMI_None && "No mapping needed for that");
+  unsigned BaseIdxOffset = getRegBankBaseIdxOffset(RBIdx, Size);
+  if (BaseIdxOffset == -1u)
+    return &ValMappings[InvalidIdx];
+
+  unsigned ValMappingIdx =
+      First3OpsIdx + (RBIdx - PartialMappingIdx::PMI_Min + BaseIdxOffset) *
+                         ValueMappingIdx::DistanceBetweenRegBanks;
+  assert(ValMappingIdx >= First3OpsIdx && ValMappingIdx <= Last3OpsIdx &&
+         "Mapping out of bound");
+
+  return &ValMappings[ValMappingIdx];
+}
+
+AArch64GenRegisterBankInfo::PartialMappingIdx
+    AArch64GenRegisterBankInfo::BankIDToCopyMapIdx[]{
+        PMI_None,     // CCR
+        PMI_FirstFPR, // FPR
+        PMI_FirstGPR, // GPR
+    };
+
+const RegisterBankInfo::ValueMapping *
+AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID,
+                                           unsigned SrcBankID, unsigned Size) {
+  assert(DstBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
+  assert(SrcBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
+  PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];
+  PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID];
+  assert(DstRBIdx != PMI_None && "No such mapping");
+  assert(SrcRBIdx != PMI_None && "No such mapping");
+
+  if (DstRBIdx == SrcRBIdx)
+    return getValueMapping(DstRBIdx, Size);
+
+  assert(Size <= 64 && "GPR cannot handle that size");
+  unsigned ValMappingIdx =
+      FirstCrossRegCpyIdx +
+      (DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(DstRBIdx, Size)) *
+          ValueMappingIdx::DistanceBetweenCrossRegCpy;
+  assert(ValMappingIdx >= FirstCrossRegCpyIdx &&
+         ValMappingIdx <= LastCrossRegCpyIdx && "Mapping out of bound");
+  return &ValMappings[ValMappingIdx];
+}
+
+const RegisterBankInfo::ValueMapping *
+AArch64GenRegisterBankInfo::getFPExtMapping(unsigned DstSize,
+                                         unsigned SrcSize) {
+  // We support:
+  // - For Scalar:
+  //   - 16 to 32.
+  //   - 16 to 64.
+  //   - 32 to 64.
+  // => FPR 16 to FPR 32|64
+  // => FPR 32 to FPR 64
+  // - For vectors:
+  //   - v4f16 to v4f32
+  //   - v2f32 to v2f64
+  // => FPR 64 to FPR 128
+
+  // Check that we have been asked sensible sizes.
+  if (SrcSize == 16) {
+    assert((DstSize == 32 || DstSize == 64) && "Unexpected half extension");
+    if (DstSize == 32)
+      return &ValMappings[FPExt16To32Idx];
+    return &ValMappings[FPExt16To64Idx];
+  }
+
+  if (SrcSize == 32) {
+    assert(DstSize == 64 && "Unexpected float extension");
+    return &ValMappings[FPExt32To64Idx];
+  }
+  assert((SrcSize == 64 || DstSize == 128) && "Unexpected vector extension");
+  return &ValMappings[FPExt64To128Idx];
+}
+} // End llvm namespace.
--- a/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td
+++ b/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td
@ -1,9 +1,8 @@
 //=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -16,9 +15,9 @@
 //===----------------------------------
 let AddedComplexity = 15, Size = 0 in
 def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering),
-                             [(atomic_fence imm:$ordering, 0)]>, Sched<[]>;
-def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
-def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
+                             [(atomic_fence timm:$ordering, 0)]>, Sched<[]>;
+def : Pat<(atomic_fence (i64 4), (timm)), (DMB (i32 0x9))>;
+def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>;

 //===----------------------------------
 // Atomic loads
@ -103,6 +102,34 @@ def : Pat<(relaxed_load<atomic_load_64>
               (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
          (LDURXi GPR64sp:$Rn, simm9:$offset)>;

+// FP 32-bit loads
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+                                                       ro_Wextend32:$extend))))),
+          (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+                                                       ro_Xextend32:$extend))))),
+          (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (am_indexed32 GPR64sp:$Rn,
+                                                      uimm12s8:$offset))))),
+          (LDRSui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32>
+               (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
+          (LDURSi GPR64sp:$Rn, simm9:$offset)>;
+
+// FP 64-bit loads
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+                                                       ro_Wextend64:$extend))))),
+          (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+                                                       ro_Xextend64:$extend))))),
+          (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (am_indexed64 GPR64sp:$Rn,
+                                                      uimm12s8:$offset))))),
+          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64>
+               (am_unscaled64 GPR64sp:$Rn, simm9:$offset))))),
+          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+
 //===----------------------------------
 // Atomic stores
 //===----------------------------------
@ -197,6 +224,38 @@ def : Pat<(relaxed_store<atomic_store_64>
               (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
          (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;

+// FP 32-bit stores
+def : Pat<(relaxed_store<atomic_store_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+                                                         ro_Wextend32:$extend),
+                                          (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSroW FPR32Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+                                                         ro_Xextend32:$extend),
+                                          (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSroX FPR32Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32>
+              (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSui FPR32Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store<atomic_store_32>
+               (am_unscaled32 GPR64sp:$Rn, simm9:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STURSi FPR32Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// FP 64-bit stores
+def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+                                                         ro_Wextend64:$extend),
+                                          (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDroW FPR64Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+                                                         ro_Xextend64:$extend),
+                                          (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64>
+              (am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store<atomic_store_64>
+               (am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+
 //===----------------------------------
 // Low-level exclusive operations
 //===----------------------------------
@ -205,19 +264,27 @@ def : Pat<(relaxed_store<atomic_store_64>

 def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}

 def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}

 def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}

 def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}

 def : Pat<(ldxr_1 GPR64sp:$addr),
          (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>;
@ -238,19 +305,27 @@ def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff),

 def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}

 def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}

 def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}

 def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}

 def : Pat<(ldaxr_1 GPR64sp:$addr),
          (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>;
@ -272,22 +347,30 @@ def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff),
 def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}

 def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}

 def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}

 def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}


 def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr),
@ -318,22 +401,30 @@ def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
 def stlxr_1 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stlxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
+}

 def stlxr_2 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stlxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
+}

 def stlxr_4 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stlxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
+}

 def stlxr_8 : PatFrag<(ops node:$val, node:$ptr),
                     (int_aarch64_stlxr node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
+}


 def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr),
@ -398,11 +489,16 @@ def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
 }

 let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch",
-    mayLoad = 1, mayStore = 1 in
-def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch),
-                          (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
-                               GPR64:$newLo, GPR64:$newHi), []>,
-                   Sched<[WriteAtomic]>;
+    mayLoad = 1, mayStore = 1 in {
+class cmp_swap_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32common:$scratch),
+                           (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
+                                GPR64:$newLo, GPR64:$newHi), []>,
+                     Sched<[WriteAtomic]>;
+def CMP_SWAP_128 : cmp_swap_128;
+def CMP_SWAP_128_RELEASE : cmp_swap_128;
+def CMP_SWAP_128_ACQUIRE : cmp_swap_128;
+def CMP_SWAP_128_MONOTONIC : cmp_swap_128;
+}

 // v8.1 Atomic instructions:
 let Predicates = [HasLSE] in {
@ -423,4 +519,3 @@ let Predicates = [HasLSE] in {
  defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">;
  defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
 }
-
--- a/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td
+++ b/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td
--- a/suite/synctools/tablegen/AArch64/AArch64InstrGISel.td
+++ b/suite/synctools/tablegen/AArch64/AArch64InstrGISel.td
@ -0,0 +1,287 @@
+//=----- AArch64InstrGISel.td - AArch64 GISel target pseudos -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 GlobalISel target pseudo instruction definitions. This is kept
+// separately from the other tablegen files for organizational purposes, but
+// share the same infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+class AArch64GenericInstruction : GenericInstruction {
+  let Namespace = "AArch64";
+}
+
+// A pseudo to represent a relocatable add instruction as part of address
+// computation.
+def G_ADD_LOW : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src, type2:$imm);
+  let hasSideEffects = 0;
+}
+
+// Pseudo for a rev16 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV16 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+// Pseudo for a rev32 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV32 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+// Pseudo for a rev64 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV64 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+// Represents an uzp1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_UZP1 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents an uzp2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_UZP2 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents a zip1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_ZIP1 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents a zip2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_ZIP2 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents a dup instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_DUP: AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$lane);
+  let hasSideEffects = 0;
+}
+
+// Represents a lane duplicate operation.
+def G_DUPLANE8 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+def G_DUPLANE16 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+def G_DUPLANE32 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+def G_DUPLANE64 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, type1:$lane);
+  let hasSideEffects = 0;
+}
+
+// Represents a trn1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_TRN1 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents a trn2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_TRN2 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+  let hasSideEffects = 0;
+}
+
+// Represents an ext instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_EXT: AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm);
+  let hasSideEffects = 0;
+}
+
+// Represents a vector G_ASHR with an immediate.
+def G_VASHR : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
+  let hasSideEffects = 0;
+}
+
+// Represents a vector G_LSHR with an immediate.
+def G_VLSHR : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
+  let hasSideEffects = 0;
+}
+
+// Represents an integer to FP conversion on the FPR bank.
+def G_SITOF : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+def G_UITOF : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMEQ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type1:$src2);
+  let hasSideEffects = 0;
+}
+
+def G_FCMGE : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type1:$src2);
+  let hasSideEffects = 0;
+}
+
+def G_FCMGT : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type1:$src2);
+  let hasSideEffects = 0;
+}
+
+def G_FCMEQZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMGEZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMGTZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMLEZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FCMLTZ : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def : GINodeEquiv<G_REV16, AArch64rev16>;
+def : GINodeEquiv<G_REV32, AArch64rev32>;
+def : GINodeEquiv<G_REV64, AArch64rev64>;
+def : GINodeEquiv<G_UZP1, AArch64uzp1>;
+def : GINodeEquiv<G_UZP2, AArch64uzp2>;
+def : GINodeEquiv<G_ZIP1, AArch64zip1>;
+def : GINodeEquiv<G_ZIP2, AArch64zip2>;
+def : GINodeEquiv<G_DUP, AArch64dup>;
+def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>;
+def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>;
+def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>;
+def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>;
+def : GINodeEquiv<G_TRN1, AArch64trn1>;
+def : GINodeEquiv<G_TRN2, AArch64trn2>;
+def : GINodeEquiv<G_EXT, AArch64ext>;
+def : GINodeEquiv<G_VASHR, AArch64vashr>;
+def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
+def : GINodeEquiv<G_SITOF, AArch64sitof>;
+def : GINodeEquiv<G_UITOF, AArch64uitof>;
+
+def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
+def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
+def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
+
+def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
+def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
+def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
+def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
+def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
+
+def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+
+// These are patterns that we only use for GlobalISel via the importer.
+def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
+                     (vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
+           (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>;
+
+let Predicates = [HasNEON] in {
+  def : Pat<(v2f64 (sint_to_fp v2i32:$src)),
+            (SCVTFv2f64 (SSHLLv2i32_shift V64:$src, 0))>;
+  def : Pat<(v2f64 (uint_to_fp v2i32:$src)),
+            (UCVTFv2f64 (USHLLv2i32_shift V64:$src, 0))>;
+  def : Pat<(v2f32 (sint_to_fp v2i64:$src)),
+            (FCVTNv2i32 (SCVTFv2f64 V128:$src))>;
+  def : Pat<(v2f32 (uint_to_fp v2i64:$src)),
+            (FCVTNv2i32 (UCVTFv2f64 V128:$src))>;
+
+  def : Pat<(v2i64 (fp_to_sint v2f32:$src)),
+            (FCVTZSv2f64 (FCVTLv2i32 V64:$src))>;
+  def : Pat<(v2i64 (fp_to_uint v2f32:$src)),
+            (FCVTZUv2f64 (FCVTLv2i32 V64:$src))>;
+  def : Pat<(v2i32 (fp_to_sint v2f64:$src)),
+            (XTNv2i32 (FCVTZSv2f64 V128:$src))>;
+  def : Pat<(v2i32 (fp_to_uint v2f64:$src)),
+            (XTNv2i32 (FCVTZUv2f64 V128:$src))>;
+
+}
+
+let Predicates = [HasNoLSE] in {
+def : Pat<(atomic_cmp_swap_8 GPR64:$addr, GPR32:$desired, GPR32:$new),
+          (CMP_SWAP_8 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
+
+def : Pat<(atomic_cmp_swap_16 GPR64:$addr, GPR32:$desired, GPR32:$new),
+          (CMP_SWAP_16 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
+
+def : Pat<(atomic_cmp_swap_32 GPR64:$addr, GPR32:$desired, GPR32:$new),
+          (CMP_SWAP_32 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
+
+def : Pat<(atomic_cmp_swap_64 GPR64:$addr, GPR64:$desired, GPR64:$new),
+          (CMP_SWAP_64 GPR64:$addr, GPR64:$desired, GPR64:$new)>;
+}
+
+def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
+          (STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
+def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
+          (STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
--- a/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td
+++ b/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td
--- a/suite/synctools/tablegen/AArch64/AArch64PfmCounters.td
+++ b/suite/synctools/tablegen/AArch64/AArch64PfmCounters.td
@ -0,0 +1,18 @@
+//===-- AArch64PfmCounters.td - AArch64 Hardware Counters --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the available hardware counters for AArch64.
+//
+//===----------------------------------------------------------------------===//
+
+def CpuCyclesPfmCounter : PfmCounter<"CPU_CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+  let CycleCounter = CpuCyclesPfmCounter;
+}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
--- a/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td
+++ b/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td
@ -1,9 +1,8 @@
 //=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -11,7 +10,7 @@
 //===----------------------------------------------------------------------===//

 /// General Purpose Registers: W, X.
-def GPRRegBank : RegisterBank<"GPR", [GPR64all]>;
+def GPRRegBank : RegisterBank<"GPR", [XSeqPairsClass]>;

 /// Floating Point/Vector Registers: B, H, S, D, Q.
 def FPRRegBank : RegisterBank<"FPR", [QQQQ]>;
--- a/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td
+++ b/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td
@ -1,9 +1,8 @@
 //=- AArch64RegisterInfo.td - Describe the AArch64 Registers -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -25,11 +24,9 @@ let Namespace = "AArch64" in {
  def bsub : SubRegIndex<8>;
  def hsub : SubRegIndex<16>;
  def ssub : SubRegIndex<32>;
-  def dsub : SubRegIndex<32>;
+  def dsub : SubRegIndex<64>;
  def sube32 : SubRegIndex<32>;
  def subo32 : SubRegIndex<32>;
-  def qhisub : SubRegIndex<64>;
-  def qsub : SubRegIndex<64>;
  def sube64 : SubRegIndex<64>;
  def subo64 : SubRegIndex<64>;
  // SVE
@ -48,6 +45,16 @@ let Namespace = "AArch64" in {
  def qsub1 : SubRegIndex<128>;
  def qsub2 : SubRegIndex<128>;
  def qsub3 : SubRegIndex<128>;
+  // Note: Code depends on these having consecutive numbers
+  def zasubb  : SubRegIndex<2048>; // (16 x 16)/1 bytes  = 2048 bits
+  def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes  = 1024 bits
+  def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes  = 1024 bits
+  def zasubs0 : SubRegIndex<512>;  // (16 x 16)/4 bytes  = 512 bits
+  def zasubs1 : SubRegIndex<512>;  // (16 x 16)/4 bytes  = 512 bits
+  def zasubd0 : SubRegIndex<256>;  // (16 x 16)/8 bytes  = 256 bits
+  def zasubd1 : SubRegIndex<256>;  // (16 x 16)/8 bytes  = 256 bits
+  def zasubq0 : SubRegIndex<128>;  // (16 x 16)/16 bytes = 128 bits
+  def zasubq1 : SubRegIndex<128>;  // (16 x 16)/16 bytes = 128 bits
 }

 let Namespace = "AArch64" in {
@ -134,6 +141,9 @@ def NZCV  : AArch64Reg<0, "nzcv">;
 // First fault status register
 def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>;

+// Purely virtual Vector Granule (VG) Dwarf register
+def VG : AArch64Reg<0, "vg">, DwarfRegNum<[46]>;
+
 // GPR register classes with the intersections of GPR32/GPR32sp and
 // GPR64/GPR64sp for use by the coalescer.
 def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> {
@ -188,6 +198,10 @@ def GPR64z : RegisterOperand<GPR64> {
  let GIZeroRegister = XZR;
 }

+// GPR argument registers.
+def GPR32arg : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 7)>;
+def GPR64arg : RegisterClass<"AArch64", [i64], 64, (sequence "X%u", 0, 7)>;
+
 // GPR register classes which include WZR/XZR AND SP/WSP. This is not a
 // constraint used by any instructions, it is used as a common super-class.
 def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>;
@ -200,6 +214,17 @@ def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X2
                                                     X22, X23, X24, X25, X26,
                                                     X27, X28, FP, LR)>;

+// Restricted set of tail call registers, for use when branch target
+// enforcement is enabled. These are the only registers which can be used to
+// indirectly branch (not call) to the "BTI c" instruction at the start of a
+// BTI-protected function.
+def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>;
+
+// Register set that excludes registers that are reserved for procedure calls.
+// This is used for pseudo-instructions that are actually implemented using a
+// procedure call.
+def GPR64noip : RegisterClass<"AArch64", [i64], 64, (sub GPR64, X16, X17, LR)>;
+
 // GPR register classes for post increment amount of vector load/store that
 // has alternate printing when Rm=31 and prints a constant immediate value
 // equal to the total number of bytes transferred.
@ -408,25 +433,35 @@ def Q31   : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
 def FPR8  : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> {
  let Size = 8;
 }
-def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> {
+def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> {
+  let Size = 16;
+}
+
+def FPR16_lo : RegisterClass<"AArch64", [f16], 16, (trunc FPR16, 16)> {
  let Size = 16;
 }
 def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
 def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
-                                    v1i64, v4f16],
-                                    64, (sequence "D%u", 0, 31)>;
+                                      v1i64, v4f16, v4bf16],
+                                     64, (sequence "D%u", 0, 31)>;
+def FPR64_lo : RegisterClass<"AArch64",
+                             [v8i8, v4i16, v2i32, v1i64, v4f16, v4bf16, v2f32,
+                              v1f64],
+                             64, (trunc FPR64, 16)>;
+
 // We don't (yet) have an f128 legal type, so don't use that here. We
 // normalize 128-bit vectors to v2f64 for arg passing and such, so use
 // that here.
 def FPR128 : RegisterClass<"AArch64",
                           [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128,
-                            v8f16],
+                            v8f16, v8bf16],
                           128, (sequence "Q%u", 0, 31)>;

 // The lower 16 vector registers.  Some instructions can only take registers
 // in this range.
 def FPR128_lo : RegisterClass<"AArch64",
-                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
+                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16,
+                               v8bf16],
                              128, (trunc FPR128, 16)>;

 // Pairs, triples, and quads of 64-bit vector registers.
@ -467,7 +502,7 @@ def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> {


 // Vector operand versions of the FP registers. Alternate name printing and
-// assmebler matching.
+// assembler matching.
 def VectorReg64AsmOperand : AsmOperandClass {
  let Name = "VectorReg64";
  let PredicateMethod = "isNeonVectorReg";
@ -489,6 +524,9 @@ def VectorRegLoAsmOperand : AsmOperandClass {
  let Name = "VectorRegLo";
  let PredicateMethod = "isNeonVectorRegLo";
 }
+def V64_lo : RegisterOperand<FPR64_lo, "printVRegOperand"> {
+  let ParserMatchClass = VectorRegLoAsmOperand;
+}
 def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
  let ParserMatchClass = VectorRegLoAsmOperand;
 }
@ -627,6 +665,10 @@ def FPR16Op  : RegisterOperand<FPR16, "printOperand"> {
  let ParserMatchClass = FPRAsmOperand<"FPR16">;
 }

+def FPR16Op_lo  : RegisterOperand<FPR16_lo, "printOperand"> {
+  let ParserMatchClass = FPRAsmOperand<"FPR16_lo">;
+}
+
 def FPR32Op  : RegisterOperand<FPR32, "printOperand"> {
  let ParserMatchClass = FPRAsmOperand<"FPR32">;
 }
@ -643,16 +685,18 @@ def FPR128Op : RegisterOperand<FPR128, "printOperand"> {
 // ARMv8.1a atomic CASP register operands


-def WSeqPairs : RegisterTuples<[sube32, subo32], 
-                               [(rotl GPR32, 0), (rotl GPR32, 1)]>;
-def XSeqPairs : RegisterTuples<[sube64, subo64], 
-                               [(rotl GPR64, 0), (rotl GPR64, 1)]>;
+def WSeqPairs : RegisterTuples<[sube32, subo32],
+                               [(decimate (rotl GPR32, 0), 2),
+                                (decimate (rotl GPR32, 1), 2)]>;
+def XSeqPairs : RegisterTuples<[sube64, subo64],
+                               [(decimate (rotl GPR64, 0), 2),
+                                (decimate (rotl GPR64, 1), 2)]>;

-def WSeqPairsClass   : RegisterClass<"AArch64", [untyped], 32, 
+def WSeqPairsClass   : RegisterClass<"AArch64", [untyped], 32,
                                     (add WSeqPairs)>{
  let Size = 64;
 }
-def XSeqPairsClass   : RegisterClass<"AArch64", [untyped], 64, 
+def XSeqPairsClass   : RegisterClass<"AArch64", [untyped], 64,
                                     (add XSeqPairs)>{
  let Size = 128;
 }
@ -675,6 +719,34 @@ def XSeqPairClassOperand :

 //===----- END: v8.1a atomic CASP register operands -----------------------===//

+//===----------------------------------------------------------------------===//
+// Armv8.7a accelerator extension register operands: 8 consecutive GPRs
+// starting with an even one
+
+let Namespace = "AArch64" in {
+  foreach i = 0-7 in
+    def "x8sub_"#i : SubRegIndex<64, !mul(64, i)>;
+}
+
+def Tuples8X : RegisterTuples<
+  !foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)),
+  !foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>;
+
+def GPR64x8Class : RegisterClass<"AArch64", [i64x8], 512, (trunc Tuples8X, 12)> {
+  let Size = 512;
+}
+def GPR64x8AsmOp : AsmOperandClass {
+  let Name = "GPR64x8";
+  let ParserMethod = "tryParseGPR64x8";
+  let RenderMethod = "addRegOperands";
+}
+def GPR64x8 : RegisterOperand<GPR64x8Class, "printGPR64x8"> {
+  let ParserMatchClass = GPR64x8AsmOp;
+  let PrintMethod = "printGPR64x8";
+}
+
+//===----- END: v8.7a accelerator extension register operands -------------===//
+
 // SVE predicate registers
 def P0    : AArch64Reg<0,   "p0">, DwarfRegNum<[48]>;
 def P1    : AArch64Reg<1,   "p1">, DwarfRegNum<[49]>;
@ -764,7 +836,7 @@ def Z30   : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
 def Z31   : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
 }

-// Enum descibing the element size for destructive
+// Enum describing the element size for destructive
 // operations.
 class ElementSizeEnum<bits<3> val> {
  bits<3> Value = val;
@ -829,48 +901,25 @@ def PPR32  : PPRRegOp<"s", PPRAsmOp32,  ElementSizeS,  PPR>;
 def PPR64  : PPRRegOp<"d", PPRAsmOp64,  ElementSizeD,  PPR>;

 def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b",  0>;
-def PPRAsmOp3b8   : PPRAsmOperand<"Predicate3bB",   "PPR_3b",  8>;
-def PPRAsmOp3b16  : PPRAsmOperand<"Predicate3bH",   "PPR_3b", 16>;
-def PPRAsmOp3b32  : PPRAsmOperand<"Predicate3bS",   "PPR_3b", 32>;
-def PPRAsmOp3b64  : PPRAsmOperand<"Predicate3bD",   "PPR_3b", 64>;

 def PPR3bAny : PPRRegOp<"",  PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
-def PPR3b8   : PPRRegOp<"b", PPRAsmOp3b8,   ElementSizeB, PPR_3b>;
-def PPR3b16  : PPRRegOp<"h", PPRAsmOp3b16,  ElementSizeH, PPR_3b>;
-def PPR3b32  : PPRRegOp<"s", PPRAsmOp3b32,  ElementSizeS, PPR_3b>;
-def PPR3b64  : PPRRegOp<"d", PPRAsmOp3b64,  ElementSizeD, PPR_3b>;

 //******************************************************************************

-// SVE vector register class
-def ZPR : RegisterClass<"AArch64",
-                        [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
-                         nxv2f16, nxv4f16, nxv8f16,
-                         nxv1f32, nxv2f32, nxv4f32,
-                         nxv1f64, nxv2f64],
-                        128, (sequence "Z%u", 0, 31)> {
+// SVE vector register classes
+class ZPRClass<int lastreg> : RegisterClass<"AArch64",
+                                            [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
+                                             nxv2f16, nxv4f16, nxv8f16,
+                                             nxv2bf16, nxv4bf16, nxv8bf16,
+                                             nxv2f32, nxv4f32,
+                                             nxv2f64],
+                                            128, (sequence "Z%u", 0, lastreg)> {
  let Size = 128;
 }

-// SVE restricted 4 bit scalable vector register class
-def ZPR_4b : RegisterClass<"AArch64",
-                         [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
-                          nxv2f16, nxv4f16, nxv8f16,
-                          nxv1f32, nxv2f32, nxv4f32,
-                          nxv1f64, nxv2f64],
-                         128, (sequence "Z%u", 0, 15)> {
-  let Size = 128;
-}
-
-// SVE restricted 3 bit scalable vector register class
-def ZPR_3b : RegisterClass<"AArch64",
-                         [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
-                          nxv2f16, nxv4f16, nxv8f16,
-                          nxv1f32, nxv2f32, nxv4f32,
-                          nxv1f64, nxv2f64],
-                         128, (sequence "Z%u", 0, 7)> {
-  let Size = 128;
-}
+def ZPR    : ZPRClass<31>;
+def ZPR_4b : ZPRClass<15>; // Restricted 4 bit SVE vector register class.
+def ZPR_3b : ZPRClass<7>;  // Restricted 3 bit SVE vector register class.

 class ZPRAsmOperand<string name, int Width, string RegClassSuffix = "">
    : AsmOperandClass {
@ -1104,10 +1153,235 @@ class GPR64ExtendRegisterOperand<string Name, int Scale, RegisterClass RegClass>
  let PrintMethod = "printRegWithShiftExtend<false, " # Scale # ", 'x', 0>";
 }

-foreach Scale = [8, 16, 32, 64] in {
+foreach Scale = [8, 16, 32, 64, 128] in {
  def GPR64shiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64shifted", Scale, "GPR64">;
  def GPR64shifted # Scale : GPR64ExtendRegisterOperand<"GPR64shiftedAsmOpnd" # Scale, Scale, GPR64>;

  def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">;
  def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>;
 }
+
+// Accumulator array tiles.
+def ZAQ0  : AArch64Reg<0,  "za0.q">;
+def ZAQ1  : AArch64Reg<1,  "za1.q">;
+def ZAQ2  : AArch64Reg<2,  "za2.q">;
+def ZAQ3  : AArch64Reg<3,  "za3.q">;
+def ZAQ4  : AArch64Reg<4,  "za4.q">;
+def ZAQ5  : AArch64Reg<5,  "za5.q">;
+def ZAQ6  : AArch64Reg<6,  "za6.q">;
+def ZAQ7  : AArch64Reg<7,  "za7.q">;
+def ZAQ8  : AArch64Reg<8,  "za8.q">;
+def ZAQ9  : AArch64Reg<9,  "za9.q">;
+def ZAQ10 : AArch64Reg<10, "za10.q">;
+def ZAQ11 : AArch64Reg<11, "za11.q">;
+def ZAQ12 : AArch64Reg<12, "za12.q">;
+def ZAQ13 : AArch64Reg<13, "za13.q">;
+def ZAQ14 : AArch64Reg<14, "za14.q">;
+def ZAQ15 : AArch64Reg<15, "za15.q">;
+
+let SubRegIndices = [zasubq0, zasubq1] in {
+  def ZAD0 : AArch64Reg<0, "za0.d", [ZAQ0, ZAQ8]>;
+  def ZAD1 : AArch64Reg<1, "za1.d", [ZAQ1, ZAQ9]>;
+  def ZAD2 : AArch64Reg<2, "za2.d", [ZAQ2, ZAQ10]>;
+  def ZAD3 : AArch64Reg<3, "za3.d", [ZAQ3, ZAQ11]>;
+  def ZAD4 : AArch64Reg<4, "za4.d", [ZAQ4, ZAQ12]>;
+  def ZAD5 : AArch64Reg<5, "za5.d", [ZAQ5, ZAQ13]>;
+  def ZAD6 : AArch64Reg<6, "za6.d", [ZAQ6, ZAQ14]>;
+  def ZAD7 : AArch64Reg<7, "za7.d", [ZAQ7, ZAQ15]>;
+}
+
+let SubRegIndices = [zasubd0, zasubd1] in {
+  def ZAS0 : AArch64Reg<0, "za0.s", [ZAD0, ZAD4]>;
+  def ZAS1 : AArch64Reg<1, "za1.s", [ZAD1, ZAD5]>;
+  def ZAS2 : AArch64Reg<2, "za2.s", [ZAD2, ZAD6]>;
+  def ZAS3 : AArch64Reg<3, "za3.s", [ZAD3, ZAD7]>;
+}
+
+let SubRegIndices = [zasubs0, zasubs1] in {
+  def ZAH0 : AArch64Reg<0, "za0.h", [ZAS0, ZAS2]>;
+  def ZAH1 : AArch64Reg<1, "za1.h", [ZAS1, ZAS3]>;
+}
+
+let SubRegIndices = [zasubh0, zasubh1] in {
+  def ZAB0 : AArch64Reg<0, "za0.b", [ZAH0, ZAH1]>;
+}
+
+let SubRegIndices = [zasubb] in {
+  def ZA : AArch64Reg<0, "za", [ZAB0]>;
+}
+
+// SME Register Classes
+
+// Accumulator array
+def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> {
+  let Size = 2048;
+}
+
+// Accumulator array as single tiles
+def MPR8    : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> {
+  let Size = 2048;
+}
+def MPR16   : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> {
+  let Size = 1024;
+}
+def MPR32   : RegisterClass<"AArch64", [untyped],  512, (add (sequence "ZAS%u", 0, 3))> {
+  let Size = 512;
+}
+def MPR64   : RegisterClass<"AArch64", [untyped],  256, (add (sequence "ZAD%u", 0, 7))> {
+  let Size = 256;
+}
+def MPR128  : RegisterClass<"AArch64", [untyped],  128, (add (sequence "ZAQ%u", 0, 15))> {
+  let Size = 128;
+}
+
+// SME Register Operands
+// There are three types of SME matrix register operands:
+// * Tiles:
+//
+//   These tiles make up the larger accumulator matrix. The tile representation
+//   has an element type suffix, e.g. za0.b or za15.q and can be any of the
+//   registers:
+//          ZAQ0..ZAQ15
+//          ZAD0..ZAD7
+//          ZAS0..ZAS3
+//          ZAH0..ZAH1
+//       or ZAB0
+//
+// * Tile vectors:
+//
+//   Their representation is similar to regular tiles, but they have an extra
+//   'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile,
+//   horizontally or vertically.
+//
+//   e.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and
+//   ZAQ15, respectively. The horizontal/vertical is more a property of the
+//   instruction, than a property of the asm-operand itself, or its register.
+//   The distinction is required for the parsing/printing of the operand,
+//   as from a compiler's perspective, the whole tile is read/written.
+//
+// * Accumulator matrix:
+//
+//   This is the entire matrix accumulator register ZA (<=> ZAB0), printed as
+//   'za'.
+
+//
+// Tiles
+//
+
+class MatrixTileAsmOperand<string RC, int EltSize> : AsmOperandClass {
+  let Name = "MatrixTile" # EltSize;
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::Tile" # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileOperand<int EltSize, int NumBitsForTile, RegisterClass RC>
+    : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixTileAsmOperand<!cast<string>(RC), EltSize>;
+  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+  let PrintMethod = "printMatrixTile";
+}
+
+def TileOp32  : MatrixTileOperand<32, 2, MPR32>;
+def TileOp64  : MatrixTileOperand<64, 3, MPR64>;
+
+//
+// Tile vectors (horizontal and vertical)
+//
+
+class MatrixTileVectorAsmOperand<string RC, int EltSize, int IsVertical>
+    : AsmOperandClass {
+  let Name = "MatrixTileVector" # !if(IsVertical, "V", "H") # EltSize;
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::"
+                          # !if(IsVertical, "Col", "Row") # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileVectorOperand<int EltSize, int NumBitsForTile,
+                              RegisterClass RC, int IsVertical>
+    : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixTileVectorAsmOperand<!cast<string>(RC), EltSize,
+                                                    IsVertical>;
+  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+  let PrintMethod = "printMatrixTileVector<" # IsVertical # ">";
+}
+
+def TileVectorOpH8   : MatrixTileVectorOperand<  8, 0, MPR8,   0>;
+def TileVectorOpH16  : MatrixTileVectorOperand< 16, 1, MPR16,  0>;
+def TileVectorOpH32  : MatrixTileVectorOperand< 32, 2, MPR32,  0>;
+def TileVectorOpH64  : MatrixTileVectorOperand< 64, 3, MPR64,  0>;
+def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>;
+
+def TileVectorOpV8   : MatrixTileVectorOperand<  8, 0, MPR8,   1>;
+def TileVectorOpV16  : MatrixTileVectorOperand< 16, 1, MPR16,  1>;
+def TileVectorOpV32  : MatrixTileVectorOperand< 32, 2, MPR32,  1>;
+def TileVectorOpV64  : MatrixTileVectorOperand< 64, 3, MPR64,  1>;
+def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>;
+
+//
+// Accumulator matrix
+//
+
+class MatrixAsmOperand<string RC, int EltSize> : AsmOperandClass {
+  let Name = "Matrix";
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::Array" # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixOperand<RegisterClass RC, int EltSize> : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixAsmOperand<!cast<string>(RC), EltSize>;
+  let PrintMethod = "printMatrix<" # EltSize # ">";
+}
+
+def MatrixOp : MatrixOperand<MPR, 0>;
+
+class MatrixTileListAsmOperand : AsmOperandClass {
+  let Name = "MatrixTileList";
+  let ParserMethod = "tryParseMatrixTileList";
+  let RenderMethod = "addMatrixTileListOperands";
+  let PredicateMethod = "isMatrixTileList";
+}
+
+class MatrixTileListOperand : Operand<i8> {
+  let ParserMatchClass = MatrixTileListAsmOperand<>;
+  let DecoderMethod = "DecodeMatrixTileListRegisterClass";
+  let EncoderMethod = "EncodeMatrixTileListRegisterClass";
+  let PrintMethod = "printMatrixTileList";
+}
+
+def MatrixTileList : MatrixTileListOperand<>;
+
+def MatrixIndexGPR32_12_15 : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 12, 15)> {
+  let DiagnosticType = "InvalidMatrixIndexGPR32_12_15";
+}
+def MatrixIndexGPR32Op12_15 : RegisterOperand<MatrixIndexGPR32_12_15> {
+  let EncoderMethod = "encodeMatrixIndexGPR32";
+}
+
+def SVCROperand : AsmOperandClass {
+  let Name = "SVCR";
+  let ParserMethod = "tryParseSVCR";
+  let DiagnosticType = "Invalid" # Name;
+}
+
+def svcr_op : Operand<i32> {
+  let ParserMatchClass = SVCROperand;
+  let PrintMethod = "printSVCROp";
+  let DecoderMethod = "DecodeSVCROp";
+  let MCOperandPredicate = [{
+    if (!MCOp.isImm())
+      return false;
+    return AArch64SVCR::lookupSVCRByEncoding(MCOp.getImm()) != nullptr;
+  }];
+}
--- a/suite/synctools/tablegen/AArch64/AArch64SMEInstrInfo.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SMEInstrInfo.td
@ -0,0 +1,143 @@
+//=- AArch64SMEInstrInfo.td -  AArch64 SME Instructions -*- tablegen -*-----=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Add vector elements horizontally or vertically to ZA tile.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSME] in {
+def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">;
+def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">;
+}
+
+let Predicates = [HasSMEI64] in {
+def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">;
+def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">;
+}
+
+let Predicates = [HasSME] in {
+//===----------------------------------------------------------------------===//
+// Outer products
+//===----------------------------------------------------------------------===//
+
+defm BFMOPA_MPPZZ  : sme_bf16_outer_product<0b0, "bfmopa">;
+defm BFMOPS_MPPZZ  : sme_bf16_outer_product<0b1, "bfmops">;
+
+def FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa">;
+def FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops">;
+}
+
+let Predicates = [HasSMEF64] in {
+def FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa">;
+def FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops">;
+}
+
+let Predicates = [HasSME] in {
+defm FMOPAL_MPPZZ  : sme_f16_outer_product<0b0, "fmopa">;
+defm FMOPSL_MPPZZ  : sme_f16_outer_product<0b1, "fmops">;
+
+def SMOPA_MPPZZ_S  : sme_int_outer_product_i32<0b000, "smopa">;
+def SMOPS_MPPZZ_S  : sme_int_outer_product_i32<0b001, "smops">;
+def UMOPA_MPPZZ_S  : sme_int_outer_product_i32<0b110, "umopa">;
+def UMOPS_MPPZZ_S  : sme_int_outer_product_i32<0b111, "umops">;
+def SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa">;
+def SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops">;
+def USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa">;
+def USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops">;
+}
+
+let Predicates = [HasSMEI64] in {
+def SMOPA_MPPZZ_D  : sme_int_outer_product_i64<0b000, "smopa">;
+def SMOPS_MPPZZ_D  : sme_int_outer_product_i64<0b001, "smops">;
+def UMOPA_MPPZZ_D  : sme_int_outer_product_i64<0b110, "umopa">;
+def UMOPS_MPPZZ_D  : sme_int_outer_product_i64<0b111, "umops">;
+def SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa">;
+def SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops">;
+def USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa">;
+def USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops">;
+}
+
+let Predicates = [HasSME] in {
+//===----------------------------------------------------------------------===//
+// Loads and stores
+//===----------------------------------------------------------------------===//
+
+defm LD1_MXIPXX : sme_mem_ld_ss<"ld1">;
+defm ST1_MXIPXX : sme_mem_st_ss<"st1">;
+
+//===----------------------------------------------------------------------===//
+// Spill + fill
+//===----------------------------------------------------------------------===//
+
+defm LDR_ZA : sme_fill<"ldr">;
+defm STR_ZA : sme_spill<"str">;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+defm INSERT_MXIPZ  : sme_vector_to_tile<"mova">;
+defm EXTRACT_ZPMXI : sme_tile_to_vector<"mova">;
+
+//===----------------------------------------------------------------------===//
+// Zero instruction
+//===----------------------------------------------------------------------===//
+
+defm ZERO_M : sme_zero<"zero">;
+
+//===----------------------------------------------------------------------===//
+// Mode selection and state access instructions
+//===----------------------------------------------------------------------===//
+
+// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or
+// both fields:
+//
+//   MSR SVCRSM, #<imm1>
+//   MSR SVCRZA, #<imm1>
+//   MSR SVCRSMZA, #<imm1>
+//
+// It's tricky to using the existing pstate operand defined in
+// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
+// when these fields are also encoded in CRm[3:1].
+class MSRpstatesvcrImm0_1
+  : PstateWriteSimple<(ins svcr_op:$pstatefield, imm0_1:$imm), "msr",
+                      "\t$pstatefield, $imm">,
+    Sched<[WriteSys]> {
+  bits<3> pstatefield;
+  bit imm;
+  let Inst{18-16} = 0b011; // op1
+  let Inst{11-9} = pstatefield;
+  let Inst{8} = imm;
+  let Inst{7-5} = 0b011; // op2
+}
+
+def MSRpstatesvcrImm1 : MSRpstatesvcrImm0_1;
+def : InstAlias<"smstart",    (MSRpstatesvcrImm1 0b011, 0b1)>;
+def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>;
+def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>;
+
+def : InstAlias<"smstop",     (MSRpstatesvcrImm1 0b011, 0b0)>;
+def : InstAlias<"smstop sm",  (MSRpstatesvcrImm1 0b001, 0b0)>;
+def : InstAlias<"smstop za",  (MSRpstatesvcrImm1 0b010, 0b0)>;
+
+//===----------------------------------------------------------------------===//
+// SVE2 instructions
+//===----------------------------------------------------------------------===//
+
+def REVD_ZPmZ : sve2_int_perm_revd<"revd">;
+
+defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0>;
+defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1>;
+
+defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel">;
+
+} // End let Predicates = [HasSME]
--- a/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td
--- a/suite/synctools/tablegen/AArch64/AArch64SchedA53.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedA53.td
@ -1,9 +1,8 @@
 //==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -27,7 +26,9 @@ def CortexA53Model : SchedMachineModel {
                             // v 1.0 Spreadsheet
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
 }


@ -126,7 +127,8 @@ def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; }
-def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVd, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVq, [A53UnitFPALU]> { let Latency = 6; }

 // FP Mul, Div, Sqrt
 def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
@ -148,6 +150,7 @@ def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32;
 // No forwarding for these reads.
 def : ReadAdvance<ReadExtrHi, 0>;
 def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
 def : ReadAdvance<ReadVLD, 0>;

 // ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
--- a/suite/synctools/tablegen/AArch64/AArch64SchedA55.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedA55.td
@ -0,0 +1,361 @@
+//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the ARM Cortex-A55 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the per-operand machine model.
+// This works with MachineScheduler. See MCSchedModel.h for details.
+
+// Cortex-A55 machine model for scheduling and other instruction cost heuristics.
+def CortexA55Model : SchedMachineModel {
+  let MicroOpBufferSize = 0;  // The Cortex-A55 is an in-order processor
+  let IssueWidth = 2;         // It dual-issues under most circumstances
+  let LoadLatency = 4;        // Cycles for loads to access the cache. The
+                              // optimisation guide shows that most loads have
+                              // a latency of 3, but some have a latency of 4
+                              // or 5. Setting it 4 looked to be good trade-off.
+  let MispredictPenalty = 8;  // A branch direction mispredict.
+  let PostRAScheduler = 1;    // Enable PostRA scheduler pass.
+  let CompleteModel = 0;      // Covers instructions applicable to Cortex-A55.
+
+  list<Predicate> UnsupportedFeatures = [HasSVE];
+
+  // FIXME: Remove when all errors have been fixed.
+  let FullInstRWOverlapCheck = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
+// Cortex-A55 is in-order.
+
+def CortexA55UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def CortexA55UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
+def CortexA55UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined
+def CortexA55UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load pipe
+def CortexA55UnitSt     : ProcResource<1> { let BufferSize = 0; } // Store pipe
+def CortexA55UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
+
+// The FP DIV/SQRT instructions execute totally differently from the FP ALU
+// instructions, which can mostly be dual-issued; that's why for now we model
+// them with 2 resources.
+def CortexA55UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
+def CortexA55UnitFPMAC  : ProcResource<2> { let BufferSize = 0; } // FP MAC
+def CortexA55UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types
+
+let SchedModel = CortexA55Model in {
+
+// These latencies are modeled without taking into account forwarding paths
+// (the software optimisation guide lists latencies taking into account
+// typical forwarding paths).
+def : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; }    // MOVN, MOVZ
+def : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; }      // ALU
+def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Shifted-Reg
+def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Extended-Reg
+def : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; }   // EXTR from a reg pair
+def : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; }     // Shift/Scale
+
+// MAC
+def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; }   // 32-bit Multiply
+def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; }   // 64-bit Multiply
+
+// Div
+def : WriteRes<WriteID32, [CortexA55UnitDiv]> {
+  let Latency = 8; let ResourceCycles = [8];
+}
+def : WriteRes<WriteID64, [CortexA55UnitDiv]> {
+  let Latency = 8; let ResourceCycles = [8];
+}
+
+// Load
+def : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; }
+def : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; }
+def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
+
+// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
+//               below, choosing the median of 3 which makes the latency 6.
+// An extra cycle is needed to get the swizzling right.
+def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
+                                           let ResourceCycles = [3]; }
+def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
+def CortexA55WriteVLD1SI : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; let SingleIssue = 1; }
+def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
+                                                  let ResourceCycles = [2]; }
+def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
+                                                  let ResourceCycles = [3]; }
+def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
+                                                  let ResourceCycles = [4]; }
+def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
+                                                  let ResourceCycles = [5]; }
+def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
+                                                  let ResourceCycles = [6]; }
+def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
+                                                  let ResourceCycles = [7]; }
+def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
+                                                  let ResourceCycles = [8]; }
+
+def CortexA55WriteLDP1 : SchedWriteRes<[]> { let Latency = 4; }
+def CortexA55WriteLDP2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; }
+def CortexA55WriteLDP4 : SchedWriteRes<[CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd]> { let Latency = 6; }
+
+// Pre/Post Indexing - Performed as part of address generation
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+let RetireOOO = 1 in {
+def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 1; }
+}
+def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
+
+// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
+def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
+                                          let ResourceCycles = [2];}
+def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
+def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
+                                                  let ResourceCycles = [2]; }
+def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
+                                                  let ResourceCycles = [3]; }
+def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
+                                                  let ResourceCycles = [4]; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [CortexA55UnitB]>;
+def : WriteRes<WriteBrReg, [CortexA55UnitB]>;
+def : WriteRes<WriteSys, [CortexA55UnitB]>;
+def : WriteRes<WriteBarrier, [CortexA55UnitB]>;
+def : WriteRes<WriteHint, [CortexA55UnitB]>;
+
+// FP ALU
+//   As WriteF result is produced in F5 and it can be mostly forwarded
+//   to consumer at F1, the effectively latency is set as 4.
+def : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteVd, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteVq, [CortexA55UnitFPALU,CortexA55UnitFPALU]> { let Latency = 4; let BeginGroup = 1; }
+
+// FP ALU specific new schedwrite definitions
+def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;}
+def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
+def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
+def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
+
+// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
+def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
+
+let RetireOOO = 1 in {
+def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
+                                            let ResourceCycles = [29]; }
+def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
+def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
+                                                     let ResourceCycles = [5]; }
+def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
+                                                     let ResourceCycles = [10]; }
+def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
+                                                     let ResourceCycles = [19]; }
+def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
+                                                      let ResourceCycles = [5]; }
+def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
+                                                      let ResourceCycles = [9]; }
+def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
+                                                      let ResourceCycles = [19]; }
+}
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadExtrHi, 1>;
+def : ReadAdvance<ReadAdrBase, 1>;
+def : ReadAdvance<ReadST, 1>;
+
+// ALU - ALU input operands are generally needed in EX1. An operand produced in
+//       in say EX2 can be forwarded for consumption to ALU in EX1, thereby
+//       allowing back-to-back ALU operations such as add. If an operand requires
+//       a shift, it will, however, be required in ISS stage.
+def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
+                             WriteISReg, WriteIEReg,WriteIS,
+                             WriteID32,WriteID64,
+                             WriteIM32,WriteIM64]>;
+// Shifted operand
+def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
+                                          WriteISReg, WriteIEReg,WriteIS,
+                                          WriteID32,WriteID64,
+                                          WriteIM32,WriteIM64]>;
+def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
+                                             WriteISReg, WriteIEReg,WriteIS,
+                                             WriteID32,WriteID64,
+                                             WriteIM32,WriteIM64]>;
+def CortexA55ReadISReg : SchedReadVariant<[
+        SchedVar<RegShiftedPred, [CortexA55ReadShifted]>,
+        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, CortexA55ReadISReg>;
+
+def CortexA55ReadIEReg : SchedReadVariant<[
+        SchedVar<RegExtendedPred, [CortexA55ReadShifted]>,
+        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, CortexA55ReadIEReg>;
+
+// MUL
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+                              WriteISReg, WriteIEReg,WriteIS,
+                              WriteID32,WriteID64,
+                              WriteIM32,WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
+                               WriteISReg, WriteIEReg,WriteIS,
+                               WriteID32,WriteID64,
+                               WriteIM32,WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
+                              WriteISReg, WriteIEReg,WriteIS,
+                              WriteID32,WriteID64,
+                              WriteIM32,WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRWs.
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?Wi")>;
+def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPSi")>;
+def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)i")>;
+def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQi")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>;
+def : InstRW<[WriteI], (instrs COPY)>;
+//---
+// Vector Loads - 64-bit per cycle
+//---
+//   1-element structures
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>;                // single element
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+//    2-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+//    3-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+//    4-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;                // load single 4-el structure to one lane of 4 regs.
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>;           // load multiple 4-el structures to 4 regs.
+def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+//---
+// Floating Point Conversions, MAC, DIV, SQRT
+//---
+def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>;
+def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>;
+def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
+
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
+
+def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>;
+def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>;
+def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
+def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+}
--- a/suite/synctools/tablegen/AArch64/AArch64SchedA57.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedA57.td
@ -1,9 +1,8 @@
 //=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -32,7 +31,9 @@ def CortexA57Model : SchedMachineModel {
  let LoopMicroOpBufferSize = 16;
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
 }

 //===----------------------------------------------------------------------===//
@ -93,9 +94,10 @@ def : SchedAlias<WriteFCmp,  A57Write_3cyc_1V>;
 def : SchedAlias<WriteFCvt,  A57Write_5cyc_1V>;
 def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
 def : SchedAlias<WriteFImm,  A57Write_3cyc_1V>;
-def : SchedAlias<WriteFMul,  A57Write_5cyc_1V>;
+def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
 def : SchedAlias<WriteFDiv,  A57Write_17cyc_1W>;
-def : SchedAlias<WriteV,     A57Write_3cyc_1V>;
+def : SchedAlias<WriteVd,    A57Write_3cyc_1V>;
+def : SchedAlias<WriteVq,    A57Write_3cyc_1V>;
 def : SchedAlias<WriteVLD,   A57Write_5cyc_1L>;
 def : SchedAlias<WriteVST,   A57Write_1cyc_1S>;

@ -115,6 +117,7 @@ def : ReadAdvance<ReadIM,      0>;
 def : ReadAdvance<ReadIMA,     2, [WriteIM32, WriteIM64]>;
 def : ReadAdvance<ReadID,      0>;
 def : ReadAdvance<ReadExtrHi,  0>;
+def : ReadAdvance<ReadST,      0>;
 def : ReadAdvance<ReadAdrBase, 0>;
 def : ReadAdvance<ReadVLD,     0>;

@ -350,12 +353,16 @@ def : InstRW<[A57Write_8cyc_8S, WriteAdr],      (instregex "ST4Fourv(2d)_POST$")
 //   D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
 //   Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64

+// Cortex A57 Software Optimization Guide Sec 3.14
+// Advance for absolute diff accum, pairwise add and accumulate, shift accumulate
+def A57ReadIVA3 : SchedReadAdvance<3, [A57Write_4cyc_1X_NonMul_Forward, A57Write_5cyc_2X_NonMul_Forward]>;
+
 // ASIMD absolute diff accum, D-form
-def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
 // ASIMD absolute diff accum, Q-form
-def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+def : InstRW<[A57Write_5cyc_2X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
 // ASIMD absolute diff accum long
-def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABAL")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABAL")>;

 // ASIMD arith, reduce, 4H/4S
 def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
@ -372,32 +379,41 @@ def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>
 def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;

 // ASIMD multiply, D-form
-def : InstRW<[A57Write_5cyc_1W], (instregex "^(P?MUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+// MUL
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^MUL(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+// PMUL, SQDMULH, SQRDMULH
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(PMUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+
 // ASIMD multiply, Q-form
-def : InstRW<[A57Write_6cyc_2W], (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// MUL
+def : InstRW<[A57Write_6cyc_2W_Mul_Forward], (instregex "^MUL(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// PMUL, SQDMULH, SQRDMULH
+def : InstRW<[A57Write_6cyc_2W], (instregex "^(PMUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// Cortex A57 Software Optimization Guide Sec 3.14
+def A57ReadIVMA4   : SchedReadAdvance<4 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
+def A57ReadIVMA3   : SchedReadAdvance<3 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;

 // ASIMD multiply accumulate, D-form
-def : InstRW<[A57Write_5cyc_1W], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
 // ASIMD multiply accumulate, Q-form
-def : InstRW<[A57Write_6cyc_2W], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[A57Write_6cyc_2W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;

 // ASIMD multiply accumulate long
 // ASIMD multiply accumulate saturating long
-def A57WriteIVMA   : SchedWriteRes<[A57UnitW]> { let Latency = 5;  }
-def A57ReadIVMA4   : SchedReadAdvance<4, [A57WriteIVMA]>;
-def : InstRW<[A57WriteIVMA, A57ReadIVMA4], (instregex "^(S|U|SQD)ML[AS]L")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^(S|U)ML[AS]L")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA3], (instregex "^SQDML[AS]L")>;

 // ASIMD multiply long
-def : InstRW<[A57Write_5cyc_1W], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^(S|U)MULL")>;
+def : InstRW<[A57Write_5cyc_1W], (instregex "^SQDMULL")>;
 def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>;
 def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>;

 // ASIMD pairwise add and accumulate
 // ASIMD shift accumulate
-def A57WriteIVA    : SchedWriteRes<[A57UnitX]> { let Latency = 4;  }
-def A57ReadIVA3    : SchedReadAdvance<3, [A57WriteIVA]>;
-def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^[SU]ADALP")>;
-def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ADALP")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;

 // ASIMD shift by immed, complex
 def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>;
@ -474,17 +490,22 @@ def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i6
 def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>;

 // ASIMD FP multiply, D-form, FZ
-def : InstRW<[A57Write_5cyc_1V], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[A57Write_5cyc_1V_FP_Forward], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
 // ASIMD FP multiply, Q-form, FZ
-def : InstRW<[A57Write_5cyc_2V], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57Write_5cyc_2V_FP_Forward], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;

 // ASIMD FP multiply accumulate, D-form, FZ
 // ASIMD FP multiply accumulate, Q-form, FZ
 def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
 def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10;  }
-def A57ReadFPVMA5  : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ]>;
+
+// Cortex A57 Software Optimization Guide Sec 3.15
+// Advances from FP mul and mul-accum to mul-accum
+def A57ReadFPVMA5  : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
+def A57ReadFPVMA6  : SchedReadAdvance<6, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
+
 def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
-def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA5], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA6], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;

 // ASIMD FP round, D-form
 def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>;
@ -502,10 +523,10 @@ def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
 //   Q form - v16i8, v8i16, v4i32, v2i64

 // ASIMD bitwise insert, Q-form
-def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL)v16i8")>;
+def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL|BSP)v16i8")>;

 // ASIMD duplicate, gen reg, D-form and Q-form
-def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUP(i8|i16|i32|i64)$")>;
 def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>;

 // ASIMD move, saturating
@ -547,8 +568,9 @@ def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v

 def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>;

+// Cortex A57 Software Optimization Guide Sec 3.10
 def A57WriteFPMA  : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
-def A57ReadFPMA5  : SchedReadAdvance<5, [A57WriteFPMA]>;
+def A57ReadFPMA5  : SchedReadAdvance<5, [A57WriteFPMA, WriteFMul]>;
 def A57ReadFPM    : SchedReadAdvance<0>;
 def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;

--- a/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td
@ -1,9 +1,8 @@
 //=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -14,6 +13,10 @@
 //   Prefix: A57Write
 //   Latency: #cyc
 //   MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
+//   Postfix (optional): (XYZ)_Forward
+//
+//   The postfix is added to differentiate SchedWriteRes that are used in
+//   subsequent SchedReadAdvances.
 //
 // e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
 //      11 micro-ops to be issued down one I pipe, six S pipes and four V pipes.
@ -26,7 +29,9 @@
 def A57Write_5cyc_1L  : SchedWriteRes<[A57UnitL]> { let Latency = 5;  }
 def A57Write_5cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 5;  }
 def A57Write_5cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 5;  }
+def A57Write_5cyc_1V_FP_Forward  : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
 def A57Write_5cyc_1W  : SchedWriteRes<[A57UnitW]> { let Latency = 5;  }
+def A57Write_5cyc_1W_Mul_Forward  : SchedWriteRes<[A57UnitW]> { let Latency = 5;  }
 def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
 def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
                                                    let ResourceCycles = [17]; }
@ -46,6 +51,7 @@ def A57Write_3cyc_1W  : SchedWriteRes<[A57UnitW]> { let Latency = 3;  }
 def A57Write_3cyc_1X  : SchedWriteRes<[A57UnitX]> { let Latency = 3;  }
 def A57Write_4cyc_1L  : SchedWriteRes<[A57UnitL]> { let Latency = 4;  }
 def A57Write_4cyc_1X  : SchedWriteRes<[A57UnitX]> { let Latency = 4;  }
+def A57Write_4cyc_1X_NonMul_Forward  : SchedWriteRes<[A57UnitX]> { let Latency = 4;  }
 def A57Write_9cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
 def A57Write_6cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 6;  }
 def A57Write_6cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 6;  }
@ -94,6 +100,10 @@ def A57Write_6cyc_2W     : SchedWriteRes<[A57UnitW, A57UnitW]> {
  let Latency     = 6;
  let NumMicroOps = 2;
 }
+def A57Write_6cyc_2W_Mul_Forward     : SchedWriteRes<[A57UnitW, A57UnitW]> {
+  let Latency     = 6;
+  let NumMicroOps = 2;
+}
 def A57Write_5cyc_1I_1L  : SchedWriteRes<[A57UnitI,
                                          A57UnitL]> {
  let Latency     = 5;
@ -103,10 +113,18 @@ def A57Write_5cyc_2V     : SchedWriteRes<[A57UnitV, A57UnitV]> {
  let Latency     = 5;
  let NumMicroOps = 2;
 }
+def A57Write_5cyc_2V_FP_Forward     : SchedWriteRes<[A57UnitV, A57UnitV]> {
+  let Latency     = 5;
+  let NumMicroOps = 2;
+}
 def A57Write_5cyc_2X     : SchedWriteRes<[A57UnitX, A57UnitX]> {
  let Latency     = 5;
  let NumMicroOps = 2;
 }
+def A57Write_5cyc_2X_NonMul_Forward     : SchedWriteRes<[A57UnitX, A57UnitX]> {
+  let Latency     = 5;
+  let NumMicroOps = 2;
+}
 def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
                                          A57UnitV]> {
  let Latency     = 10;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedA64FX.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedA64FX.td
--- a/suite/synctools/tablegen/AArch64/AArch64SchedAmpere1.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedAmpere1.td
--- a/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td
@ -1,9 +1,8 @@
 //=- AArch64SchedCyclone.td - Cyclone Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -19,7 +18,9 @@ def CycloneModel : SchedMachineModel {
  let MispredictPenalty = 16; // 14-19 cycles are typical.
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
 }

 //===----------------------------------------------------------------------===//
@ -257,6 +258,7 @@ def CyReadAdrBase : SchedReadVariant<[
  SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
  SchedVar<NoSchedPred,   [ReadDefault]>]>;   // Read base reg with no shift.
 def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map AArch64->Cyclone type.
+def : ReadAdvance<ReadST, 0>;

 //---
 // 7.8.9,7.8.11. Load/Store, paired
@ -302,7 +304,8 @@ def : WriteRes<WriteSys, []> {let Latency = -1;}
 // 7.9 Vector Unit Instructions

 // Simple vector operations take 2 cycles.
-def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
+def : WriteRes<WriteVd, [CyUnitV]> {let Latency = 2;}
+def : WriteRes<WriteVq, [CyUnitV]> {let Latency = 2;}

 // Define some longer latency vector op types for Cyclone.
 def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
@ -333,7 +336,7 @@ def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
 // COPY is handled above in the WriteMov Variant.
 def WriteVMov    : SchedWriteVariant<[
                     SchedVar<WriteVMovPred, [WriteX]>,
-                     SchedVar<NoSchedPred,   [WriteV]>]>;
+                     SchedVar<NoSchedPred,   [WriteVq]>]>;
 def : InstRW<[WriteVMov], (instrs ORRv16i8)>;

 // FMOVSr,FMOVDr are WriteF.
@ -353,7 +356,7 @@ def : WriteRes<WriteFCopy, [CyUnitLS]> {
 def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;

 // INS V[x],R
-def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
+def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteVq]>;
 def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;

 // SMOV,UMOV R,V[x]
@ -495,7 +498,7 @@ def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>;
 // WriteV includes:
 // SHLL,SSHLL,USHLL
 // SLI,SRI
-// BIF,BIT,BSL
+// BIF,BIT,BSL,BSP
 // EXT
 // CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN
 // XTN2
@ -569,7 +572,7 @@ def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
 //---

 // FCVT lengthen f16/s32
-def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
+def : InstRW<[WriteVq], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;

 // FCVT,FCVTN,FCVTXN
 // SCVTF,UCVTF V,V
@ -679,61 +682,61 @@ def : InstRW<[WriteVLDShuffle],
 def : InstRW<[WriteVLDShuffle, WriteAdr],
             (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;

-def : InstRW<[WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq],
             (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
             (instregex "LD2Twov(8b|4h|2s)_POST$")>;
 def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
             (instregex "LD2Twov(16b|8h|4s|2d)$")>;
 def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
             (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;

-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
             (instregex "LD2i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
             (instregex "LD2i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
             (instregex "LD2i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
             (instregex "LD2i64_POST")>;

-def : InstRW<[WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq],
             (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
             (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;

-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
             (instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
             (instregex "LD3Threev(8b|4h|2s)_POST")>;
 def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
             (instregex "LD3Threev(16b|8h|4s|2d)$")>;
 def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
             (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;

-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq],
             (instregex "LD3i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq],
             (instregex "LD3i(8|16|32)_POST")>;

-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq],
             (instregex "LD3i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
             (instregex "LD3i64_POST")>;

-def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq],
             (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq],
             (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;

-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
             (instrs LD3Rv1d,LD3Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
             (instrs LD3Rv1d_POST,LD3Rv2d_POST)>;

-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
             (instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
             (instregex "LD4Fourv(8b|4h|2s)_POST")>;
 def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
              WriteVLDPairShuffle, WriteVLDPairShuffle],
@ -742,25 +745,25 @@ def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
              WriteVLDPairShuffle, WriteVLDPairShuffle],
             (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;

-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq, WriteVq],
             (instregex "LD4i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq, WriteVq],
             (instregex "LD4i(8|16|32)_POST")>;


-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq, WriteVq],
             (instrs LD4i64)>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
             (instrs LD4i64_POST)>;

-def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq, WriteVq],
             (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq, WriteVq],
             (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;

-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
             (instrs LD4Rv1d,LD4Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
             (instrs LD4Rv1d_POST,LD4Rv2d_POST)>;

 //---
--- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td
@ -1,847 +0,0 @@
-//=- AArch64SchedExynosM1.td - Samsung Exynos M1 Sched Defs --*- tablegen -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the machine model for the Samsung Exynos M1 to support
-// instruction scheduling and other instruction cost heuristics.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// The Exynos-M1 is a traditional superscalar microprocessor with a
-// 4-wide in-order stage for decode and dispatch and a wider issue stage.
-// The execution units and loads and stores are out-of-order.
-
-def ExynosM1Model : SchedMachineModel {
-  let IssueWidth            =  4; // Up to 4 uops per cycle.
-  let MicroOpBufferSize     = 96; // ROB size.
-  let LoopMicroOpBufferSize = 24; // Based on the instruction queue size.
-  let LoadLatency           =  4; // Optimistic load cases.
-  let MispredictPenalty     = 14; // Minimum branch misprediction penalty.
-  let CompleteModel         =  1; // Use the default model otherwise.
-
-  list<Predicate> UnsupportedFeatures = [HasSVE];
-}
-
-//===----------------------------------------------------------------------===//
-// Define each kind of processor resource and number available on the Exynos-M1,
-// which has 9 pipelines, each with its own queue with out-of-order dispatch.
-
-let SchedModel = ExynosM1Model in {
-
-def M1UnitA  : ProcResource<2>; // Simple integer
-def M1UnitC  : ProcResource<1>; // Simple and complex integer
-def M1UnitD  : ProcResource<1>; // Integer division (inside C, serialized)
-def M1UnitB  : ProcResource<2>; // Branch
-def M1UnitL  : ProcResource<1>; // Load
-def M1UnitS  : ProcResource<1>; // Store
-def M1PipeF0 : ProcResource<1>; // FP #0
-let Super = M1PipeF0 in {
-  def M1UnitFMAC   : ProcResource<1>; // FP multiplication
-  def M1UnitNAL0   : ProcResource<1>; // Simple vector
-  def M1UnitNMISC  : ProcResource<1>; // Miscellanea
-  def M1UnitFCVT   : ProcResource<1>; // FP conversion
-  def M1UnitNCRYPT : ProcResource<1>; // Cryptographic
-}
-def M1PipeF1 : ProcResource<1>; // FP #1
-let Super = M1PipeF1 in {
-  def M1UnitFADD : ProcResource<1>; // Simple FP
-  def M1UnitNAL1 : ProcResource<1>; // Simple vector
-  def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized)
-  def M1UnitFST  : ProcResource<1>; // FP store
-}
-
-def M1UnitALU  : ProcResGroup<[M1UnitA,
-                               M1UnitC]>;    // All integer
-def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
-                               M1UnitNAL1]>; // All simple vector
-
-//===----------------------------------------------------------------------===//
-// Predicates.
-
-def M1BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
-                                            MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M1ShiftLeftFastPred  : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
-
-//===----------------------------------------------------------------------===//
-// Coarse scheduling model.
-
-def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
-def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; }
-def M1WriteAA : SchedWriteRes<[M1UnitALU]> { let Latency = 2;
-                                             let ResourceCycles = [2]; }
-def M1WriteAB : SchedWriteRes<[M1UnitALU,
-                               M1UnitC]>   { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M1WriteAC : SchedWriteRes<[M1UnitALU,
-                               M1UnitALU,
-                               M1UnitC]>   { let Latency = 2;
-                                             let NumMicroOps = 3; }
-def M1WriteAD : SchedWriteRes<[M1UnitALU,
-                               M1UnitC]>   { let Latency = 2;
-                                             let NumMicroOps = 2; }
-def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>,
-                                   SchedVar<NoSchedPred,         [M1WriteAA]>]>;
-def M1WriteC1 : SchedWriteRes<[M1UnitC]>   { let Latency = 1; }
-def M1WriteC2 : SchedWriteRes<[M1UnitC]>   { let Latency = 2; }
-
-def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
-def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkFastPred, [M1WriteAB]>,
-                                   SchedVar<NoSchedPred,          [M1WriteAC]>]>;
-
-def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
-def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
-def M1WriteLA : SchedWriteRes<[M1UnitL]> { let Latency = 6;
-                                           let ResourceCycles = [2]; }
-def M1WriteLB : SchedWriteRes<[M1UnitL,
-                               M1UnitA]> { let Latency = 4;
-                                           let NumMicroOps = 2; }
-def M1WriteLC : SchedWriteRes<[M1UnitL,
-                               M1UnitA]> { let Latency = 5;
-                                           let NumMicroOps = 2; }
-def M1WriteLD : SchedWriteRes<[M1UnitL,
-                               M1UnitA]> { let Latency = 6;
-                                           let NumMicroOps = 2;
-                                           let ResourceCycles = [2, 1]; }
-def M1WriteLH : SchedWriteRes<[]>        { let Latency = 5;
-                                           let NumMicroOps = 0; }
-def M1WriteLX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
-                                   SchedVar<NoSchedPred,         [M1WriteLC]>]>;
-def M1WriteLY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
-                                   SchedVar<NoSchedPred,         [M1WriteLD]>]>;
-
-def M1WriteS1 : SchedWriteRes<[M1UnitS]>   { let Latency = 1; }
-def M1WriteS3 : SchedWriteRes<[M1UnitS]>   { let Latency = 3; }
-def M1WriteS4 : SchedWriteRes<[M1UnitS]>   { let Latency = 4; }
-def M1WriteSA : SchedWriteRes<[M1UnitS,
-                               M1UnitFST,
-                               M1UnitS,
-                               M1UnitFST]> { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M1WriteSB : SchedWriteRes<[M1UnitS,
-                               M1UnitFST,
-                               M1UnitA]>   { let Latency = 3;
-                                             let NumMicroOps = 2; }
-def M1WriteSC : SchedWriteRes<[M1UnitS,
-                               M1UnitFST,
-                               M1UnitS,
-                               M1UnitFST,
-                               M1UnitA]>   { let Latency = 3;
-                                             let NumMicroOps = 3; }
-def M1WriteSD : SchedWriteRes<[M1UnitS,
-                               M1UnitFST,
-                               M1UnitA]>   { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M1WriteSE : SchedWriteRes<[M1UnitS,
-                               M1UnitA]>   { let Latency = 2;
-                                             let NumMicroOps = 2; }
-def M1WriteSX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M1WriteSE]>]>;
-def M1WriteSY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M1WriteSB]>]>;
-
-def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
-                                      SchedVar<NoSchedPred,   [ReadDefault]>]>;
-
-// Branch instructions.
-def : WriteRes<WriteBr,    []>        { let Latency = 0; }
-def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
-
-// Arithmetic and logical integer instructions.
-def : WriteRes<WriteI,     [M1UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIS,    [M1UnitALU]> { let Latency = 1; }
-
-// Move instructions.
-def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; }
-
-// Divide and multiply instructions.
-def : WriteRes<WriteID32, [M1UnitC,
-                           M1UnitD]> { let Latency = 13;
-                                       let ResourceCycles = [1, 13]; }
-def : WriteRes<WriteID64, [M1UnitC,
-                           M1UnitD]> { let Latency = 21;
-                                       let ResourceCycles = [1, 21]; }
-// TODO: Long multiplication take 5 cycles and also the ALU.
-def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
-def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4;
-                                       let ResourceCycles = [2]; }
-
-// Miscellaneous instructions.
-def : WriteRes<WriteExtr, [M1UnitALU,
-                           M1UnitALU]> { let Latency = 2;
-                                         let NumMicroOps = 2; }
-
-// Addressing modes.
-def : WriteRes<WriteAdr, []> { let Latency = 1;
-                               let NumMicroOps = 0; }
-def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
-
-// Load instructions.
-def : WriteRes<WriteLD,    [M1UnitL]>   { let Latency = 4; }
-def : WriteRes<WriteLDHi,  []>          { let Latency = 4;
-                                          let NumMicroOps = 0; }
-def : SchedAlias<WriteLDIdx, M1WriteLX>;
-
-// Store instructions.
-def : WriteRes<WriteST,    [M1UnitS]> { let Latency = 1; }
-def : WriteRes<WriteSTP,   [M1UnitS]> { let Latency = 1; }
-def : WriteRes<WriteSTX,   [M1UnitS]> { let Latency = 1; }
-def : SchedAlias<WriteSTIdx, M1WriteSX>;
-
-// FP data instructions.
-def : WriteRes<WriteF,    [M1UnitFADD]>  { let Latency = 3; }
-def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
-def : WriteRes<WriteFDiv, [M1UnitFVAR]>  { let Latency = 15;
-                                           let ResourceCycles = [15]; }
-def : WriteRes<WriteFMul, [M1UnitFMAC]>  { let Latency = 4; }
-
-// FP miscellaneous instructions.
-def : WriteRes<WriteFCvt,  [M1UnitFCVT]> { let Latency = 3; }
-def : WriteRes<WriteFImm,  [M1UnitNALU]> { let Latency = 1; }
-def : WriteRes<WriteFCopy, [M1UnitS]>    { let Latency = 4; }
-
-// FP load instructions.
-def : WriteRes<WriteVLD,   [M1UnitL]> { let Latency = 5; }
-
-// FP store instructions.
-def : WriteRes<WriteVST, [M1UnitS,
-                          M1UnitFST]> { let Latency = 1;
-                                        let NumMicroOps = 1; }
-
-// ASIMD FP instructions.
-def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
-
-// Other miscellaneous instructions.
-def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
-def : WriteRes<WriteBarrier, []> { let Latency = 1; }
-def : WriteRes<WriteHint,    []> { let Latency = 1; }
-def : WriteRes<WriteSys,     []> { let Latency = 1; }
-
-//===----------------------------------------------------------------------===//
-// Fast forwarding.
-
-// TODO: Add FP register forwarding rules.
-def : ReadAdvance<ReadI,       0>;
-def : ReadAdvance<ReadISReg,   0>;
-def : ReadAdvance<ReadIEReg,   0>;
-def : ReadAdvance<ReadIM,      0>;
-// TODO: The forwarding for WriteIM32 saves actually 2 cycles.
-def : ReadAdvance<ReadIMA,     3, [WriteIM32, WriteIM64]>;
-def : ReadAdvance<ReadID,      0>;
-def : ReadAdvance<ReadExtrHi,  0>;
-def : ReadAdvance<ReadAdrBase, 0>;
-def : ReadAdvance<ReadVLD,     0>;
-
-//===----------------------------------------------------------------------===//
-// Finer scheduling model.
-
-def M1WriteNEONA   : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitFADD]>   { let Latency = 9;
-                                                     let NumMicroOps = 3; }
-def M1WriteNEONB   : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitFST]>    { let Latency = 5;
-                                                     let NumMicroOps = 2;}
-def M1WriteNEONC   : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitFST]>    { let Latency = 6;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEOND   : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitFST,
-                                    M1UnitL]>      { let Latency = 10;
-                                                     let NumMicroOps = 3; }
-def M1WriteNEONE   : SchedWriteRes<[M1UnitFCVT,
-                                    M1UnitFST]>    { let Latency = 8;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONF   : SchedWriteRes<[M1UnitFCVT,
-                                    M1UnitFST,
-                                    M1UnitL]>      { let Latency = 13;
-                                                     let NumMicroOps = 3; }
-def M1WriteNEONG   : SchedWriteRes<[M1UnitNMISC,
-                                    M1UnitFST]>    { let Latency = 6;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONH   : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitFST]>    { let Latency = 3;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONI   : SchedWriteRes<[M1UnitFST,
-                                    M1UnitL]>      { let Latency = 9;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONJ   : SchedWriteRes<[M1UnitNMISC,
-                                    M1UnitFMAC]>   { let Latency = 6;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONK   : SchedWriteRes<[M1UnitNMISC,
-                                    M1UnitFMAC]>   { let Latency = 7;
-                                                     let NumMicroOps = 2; }
-def M1WriteNEONL   : SchedWriteRes<[M1UnitNALU]>   { let Latency = 2;
-                                                     let ResourceCycles = [2]; }
-def M1WriteFADD3   : SchedWriteRes<[M1UnitFADD]>   { let Latency = 3; }
-def M1WriteFCVT3   : SchedWriteRes<[M1UnitFCVT]>   { let Latency = 3; }
-def M1WriteFCVT4   : SchedWriteRes<[M1UnitFCVT]>   { let Latency = 4; }
-def M1WriteFMAC4   : SchedWriteRes<[M1UnitFMAC]>   { let Latency = 4; }
-def M1WriteFMAC5   : SchedWriteRes<[M1UnitFMAC]>   { let Latency = 5; }
-// TODO
-def M1WriteFVAR15  : SchedWriteRes<[M1UnitFVAR]>   { let Latency = 15;
-                                                     let ResourceCycles = [15]; }
-def M1WriteFVAR23  : SchedWriteRes<[M1UnitFVAR]>   { let Latency = 23;
-                                                     let ResourceCycles = [23]; }
-def M1WriteNALU1   : SchedWriteRes<[M1UnitNALU]>   { let Latency = 1; }
-def M1WriteNALU2   : SchedWriteRes<[M1UnitNALU]>   { let Latency = 2; }
-def M1WriteNAL11   : SchedWriteRes<[M1UnitNAL1]>   { let Latency = 1; }
-def M1WriteNAL12   : SchedWriteRes<[M1UnitNAL1]>   { let Latency = 2; }
-def M1WriteNAL13   : SchedWriteRes<[M1UnitNAL1]>   { let Latency = 3; }
-def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
-def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; }
-def M1WriteNMISC1  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 1; }
-def M1WriteNMISC2  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 2; }
-def M1WriteNMISC3  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 3; }
-def M1WriteNMISC4  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 4; }
-def M1WriteTB      : SchedWriteRes<[M1UnitC,
-                                    M1UnitALU]>    { let Latency = 2;
-                                                     let NumMicroOps = 2; }
-def M1WriteVLDA    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL]>      { let Latency = 6;
-                                                     let NumMicroOps = 2; }
-def M1WriteVLDB    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL]>      { let Latency = 7;
-                                                     let NumMicroOps = 3; }
-def M1WriteVLDC    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL]>      { let Latency = 8;
-                                                     let NumMicroOps = 4; }
-def M1WriteVLDD    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU]>   { let Latency = 7;
-                                                     let NumMicroOps = 2;
-                                                     let ResourceCycles = [2, 1]; }
-def M1WriteVLDE    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU]>   { let Latency = 6;
-                                                     let NumMicroOps = 2; }
-def M1WriteVLDF    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL]>      { let Latency = 10;
-                                                     let NumMicroOps = 2;
-                                                     let ResourceCycles = [1, 1]; }
-def M1WriteVLDG    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU]>   { let Latency = 7;
-                                                     let NumMicroOps = 3;
-                                                     let ResourceCycles = [2, 1, 1]; }
-def M1WriteVLDH    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU]>   { let Latency = 6;
-                                                     let NumMicroOps = 3; }
-def M1WriteVLDI    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL]>      { let Latency = 12;
-                                                     let NumMicroOps = 3;
-                                                     let ResourceCycles = [2, 2, 2]; }
-def M1WriteVLDJ    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitNALU]>   { let Latency = 9;
-                                                     let NumMicroOps = 4;
-                                                     let ResourceCycles = [2, 1, 1, 1]; }
-def M1WriteVLDK    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitNALU]>   { let Latency = 9;
-                                                     let NumMicroOps = 5;
-                                                     let ResourceCycles = [2, 1, 1, 1, 1]; }
-def M1WriteVLDL    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitL,
-                                    M1UnitNALU]>   { let Latency = 7;
-                                                     let NumMicroOps = 5;
-                                                     let ResourceCycles = [1, 1, 1, 1, 1]; }
-def M1WriteVLDM    : SchedWriteRes<[M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU,
-                                    M1UnitL,
-                                    M1UnitNALU,
-                                    M1UnitNALU]>   { let Latency = 7;
-                                                     let NumMicroOps = 6;
-                                                     let ResourceCycles = [1, 1, 1, 1, 1, 1]; }
-def M1WriteVLDN    : SchedWriteRes<[M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL,
-                                    M1UnitL]>      { let Latency = 14;
-                                                     let NumMicroOps = 4;
-                                                     let ResourceCycles = [2, 1, 2, 1]; }
-def M1WriteVSTA    : WriteSequence<[WriteVST], 2>;
-def M1WriteVSTB    : WriteSequence<[WriteVST], 3>;
-def M1WriteVSTC    : WriteSequence<[WriteVST], 4>;
-def M1WriteVSTD    : SchedWriteRes<[M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST]>    { let Latency = 7;
-                                                     let NumMicroOps = 2;
-                                                     let ResourceCycles = [7, 1, 1]; }
-def M1WriteVSTE    : SchedWriteRes<[M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST]>    { let Latency = 8;
-                                                     let NumMicroOps = 3;
-                                                     let ResourceCycles = [7, 1, 1, 1, 1]; }
-def M1WriteVSTF    : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST,
-                                    M1UnitFST]>     { let Latency = 15;
-                                                      let NumMicroOps = 5;
-                                                      let ResourceCycles = [1, 7, 1, 7, 1, 1, 1]; }
-def M1WriteVSTG    : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST,
-                                    M1UnitFST]>     { let Latency = 16;
-                                                      let NumMicroOps = 6;
-                                                      let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1]; }
-def M1WriteVSTH    : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST,
-                                    M1UnitFST]>      { let Latency = 14;
-                                                       let NumMicroOps = 4;
-                                                       let ResourceCycles = [1, 7, 1, 7, 1]; }
-def M1WriteVSTI    : SchedWriteRes<[M1UnitNALU,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitS,
-                                    M1UnitFST,
-                                    M1UnitFST,
-                                    M1UnitFST]>      { let Latency = 17;
-                                                       let NumMicroOps = 7;
-                                                       let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
-
-// Branch instructions
-def : InstRW<[M1WriteB1], (instrs Bcc)>;
-def : InstRW<[M1WriteA1], (instrs BL)>;
-def : InstRW<[M1WriteBX], (instrs BLR)>;
-def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
-def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
-
-// Arithmetic and logical integer instructions.
-def : InstRW<[M1WriteA1], (instrs COPY)>;
-def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
-
-// Divide and multiply instructions.
-
-// Miscellaneous instructions.
-
-// Load instructions.
-def : InstRW<[M1WriteLB,
-              WriteLDHi,
-              WriteAdr],    (instregex "^LDP(SW|W|X)(post|pre)")>;
-def : InstRW<[M1WriteLX,
-              ReadAdrBase], (instregex "^PRFMro[WX]")>;
-
-// Store instructions.
-
-// FP data instructions.
-def : InstRW<[M1WriteNALU1],  (instregex "^F(ABS|NEG)[DS]r")>;
-def : InstRW<[M1WriteFADD3],  (instregex "^F(ADD|SUB)[DS]rr")>;
-def : InstRW<[M1WriteNEONG],  (instregex "^FCCMPE?[DS]rr")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>;
-def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>;
-def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>;
-def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>;
-def : InstRW<[M1WriteFMAC4],  (instregex "^FN?MUL[DS]rr")>;
-def : InstRW<[M1WriteFMAC5],  (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
-def : InstRW<[M1WriteFCVT3],  (instregex "^FRINT.+r")>;
-def : InstRW<[M1WriteNEONH],  (instregex "^FCSEL[DS]rrr")>;
-def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>;
-def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>;
-
-// FP miscellaneous instructions.
-def : InstRW<[M1WriteFCVT3],  (instregex "^FCVT[DS][DS]r")>;
-def : InstRW<[M1WriteNEONF],  (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
-def : InstRW<[M1WriteNEONE],  (instregex "^[SU]CVTF[SU]")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^FMOV[DS][ir]")>;
-def : InstRW<[M1WriteFCVT4],  (instregex "^[FU](RECP|RSQRT)Ev1")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^FRECPXv1")>;
-def : InstRW<[M1WriteFMAC5],  (instregex "^F(RECP|RSQRT)S(16|32|64)")>;
-def : InstRW<[M1WriteS4],     (instregex "^FMOV[WX][DS](High)?r")>;
-def : InstRW<[M1WriteNEONI],  (instregex "^FMOV[DS][WX](High)?r")>;
-
-// FP load instructions.
-def : InstRW<[WriteVLD],    (instregex "^LDR[DSQ]l")>;
-def : InstRW<[WriteVLD],    (instregex "^LDUR[BDHSQ]i")>;
-def : InstRW<[WriteVLD,
-              WriteAdr],    (instregex "^LDR[BDHSQ](post|pre)")>;
-def : InstRW<[WriteVLD],    (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M1WriteLY,
-              ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
-def : InstRW<[M1WriteLD,
-              ReadAdrBase], (instregex "^LDRQro[WX]")>;
-def : InstRW<[WriteVLD,
-              M1WriteLH],   (instregex "^LDN?P[DS]i")>;
-def : InstRW<[M1WriteLA,
-              M1WriteLH],   (instregex "^LDN?PQi")>;
-def : InstRW<[M1WriteLC,
-              M1WriteLH,
-              WriteAdr],    (instregex "^LDP[DS](post|pre)")>;
-def : InstRW<[M1WriteLD,
-              M1WriteLH,
-              WriteAdr],    (instregex "^LDPQ(post|pre)")>;
-
-// FP store instructions.
-def : InstRW<[WriteVST],    (instregex "^STUR[BDHSQ]i")>;
-def : InstRW<[WriteVST,
-              WriteAdr],    (instregex "^STR[BDHSQ](post|pre)")>;
-def : InstRW<[WriteVST],    (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M1WriteSY,
-              ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
-def : InstRW<[M1WriteSB,
-              ReadAdrBase], (instregex "^STRQro[WX]")>;
-def : InstRW<[WriteVST],    (instregex "^STN?P[DSQ]i")>;
-def : InstRW<[WriteVST,
-              WriteAdr],    (instregex "^STP[DS](post|pre)")>;
-def : InstRW<[M1WriteSC,
-              WriteAdr],    (instregex "^STPQ(post|pre)")>;
-
-// ASIMD instructions.
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^(ADD|NEG|SUB)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^CMTSTv")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>;
-def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
-def : InstRW<[M1WriteNAL13],  (instregex "^(S|SR|U|UR)SRAv")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^SHL[dv]")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^[SU]SH[LR][dv]")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^S[RS]I[dv]")>;
-def : InstRW<[M1WriteNAL13],  (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>;
-def : InstRW<[M1WriteNAL13],  (instregex "^[SU]RSH[LR][dv]")>;
-def : InstRW<[M1WriteNAL13],  (instregex "^[SU]QR?SHLU?[bdhsv]")>;
-
-// ASIMD FP instructions.
-def : InstRW<[M1WriteNALU1],  (instregex "^F(ABS|NEG)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>;
-def : InstRW<[M1WriteNEONA],  (instregex "^FADDP")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
-def : InstRW<[M1WriteFCVT3],  (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>;
-def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>;
-def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>;
-def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>;
-def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>;
-def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
-def : InstRW<[M1WriteNEONJ],  (instregex "^FMULX?v.i")>;
-def : InstRW<[M1WriteFMAC4],  (instregex "^FMULX?v.f")>;
-def : InstRW<[M1WriteNEONK],  (instregex "^FML[AS]v.i")>;
-def : InstRW<[M1WriteFMAC5],  (instregex "^FML[AS]v.f")>;
-def : InstRW<[M1WriteFCVT3],  (instregex "^FRINT[AIMNPXZ]v")>;
-
-// ASIMD miscellaneous instructions.
-def : InstRW<[M1WriteNALU1],  (instregex "^RBITv")>;
-def : InstRW<[M1WriteNAL11],  (instregex "^(BIF|BIT|BSL)v")>;
-def : InstRW<[M1WriteNEONB],  (instregex "^DUPv.+gpr")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^DUPv.+lane")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^EXTv8")>;
-def : InstRW<[M1WriteNEONL],  (instregex "^EXTv16")>;
-def : InstRW<[M1WriteNAL13],  (instregex "^[SU]?Q?XTU?Nv")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^CPY")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^INSv.+lane")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^MOVI[Dv]")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^FMOVv")>;
-def : InstRW<[M1WriteFCVT4],  (instregex "^[FU](RECP|RSQRT)Ev[248]")>;
-def : InstRW<[M1WriteFMAC5],  (instregex "^F(RECP|RSQRT)Sv")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^REV(16|32|64)v")>;
-def : InstRW<[M1WriteNAL11],  (instregex "^TB[LX]v8i8One")>;
-def : InstRW<[WriteSequence<[M1WriteNAL11], 2>],
-                              (instregex "^TB[LX]v8i8Two")>;
-def : InstRW<[WriteSequence<[M1WriteNAL11], 3>],
-                              (instregex "^TB[LX]v8i8Three")>;
-def : InstRW<[WriteSequence<[M1WriteNAL11], 4>],
-                              (instregex "^TB[LX]v8i8Four")>;
-def : InstRW<[M1WriteNAL12],  (instregex "^TB[LX]v16i8One")>;
-def : InstRW<[WriteSequence<[M1WriteNAL12], 2>],
-                              (instregex "^TB[LX]v16i8Two")>;
-def : InstRW<[WriteSequence<[M1WriteNAL12], 3>],
-                              (instregex "^TB[LX]v16i8Three")>;
-def : InstRW<[WriteSequence<[M1WriteNAL12], 4>],
-                              (instregex "^TB[LX]v16i8Four")>;
-def : InstRW<[M1WriteNEOND],  (instregex "^[SU]MOVv")>;
-def : InstRW<[M1WriteNEONC],  (instregex "^INSv.+gpr")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>;
-def : InstRW<[M1WriteNALU2],  (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>;
-def : InstRW<[M1WriteNALU1],  (instregex "^ZIP[12]v")>;
-
-// ASIMD load instructions.
-def : InstRW<[M1WriteVLDD],   (instregex "LD1i(8|16|32)$")>;
-def : InstRW<[M1WriteVLDD,
-              WriteAdr],      (instregex "LD1i(8|16|32)_POST$")>;
-def : InstRW<[M1WriteVLDE],   (instregex "LD1i(64)$")>;
-def : InstRW<[M1WriteVLDE,
-              WriteAdr],      (instregex "LD1i(64)_POST$")>;
-
-def : InstRW<[M1WriteL5],     (instregex "LD1Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteL5,
-              WriteAdr],      (instregex "LD1Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteL5],     (instregex "LD1Rv(1d)$")>;
-def : InstRW<[M1WriteL5,
-              WriteAdr],      (instregex "LD1Rv(1d)_POST$")>;
-def : InstRW<[M1WriteL5],     (instregex "LD1Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
-              WriteAdr],      (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteL5],     (instregex "LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteL5,
-              WriteAdr],      (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteL5],     (instregex "LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
-              WriteAdr],      (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVLDA],   (instregex "LD1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVLDA,
-              WriteAdr],      (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVLDA],   (instregex "LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDA,
-              WriteAdr],      (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVLDB],   (instregex "LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVLDB],   (instregex "LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVLDC],   (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVLDC],   (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDG],   (instregex "LD2i(8|16)$")>;
-def : InstRW<[M1WriteVLDG,
-              WriteAdr],      (instregex "LD2i(8|16)_POST$")>;
-def : InstRW<[M1WriteVLDG],   (instregex "LD2i(32)$")>;
-def : InstRW<[M1WriteVLDG,
-              WriteAdr],      (instregex "LD2i(32)_POST$")>;
-def : InstRW<[M1WriteVLDH],   (instregex "LD2i(64)$")>;
-def : InstRW<[M1WriteVLDH,
-              WriteAdr],      (instregex "LD2i(64)_POST$")>;
-
-def : InstRW<[M1WriteVLDA],   (instregex "LD2Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDA,
-              WriteAdr],      (instregex "LD2Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDA],   (instregex "LD2Rv(1d)$")>;
-def : InstRW<[M1WriteVLDA,
-              WriteAdr],      (instregex "LD2Rv(1d)_POST$")>;
-def : InstRW<[M1WriteVLDA],   (instregex "LD2Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDA,
-              WriteAdr],      (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDF],   (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDF,
-              WriteAdr],      (instregex "LD2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDF],   (instregex "LD2Twov(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDF,
-              WriteAdr],      (instregex "LD2Twov(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDF],   (instregex "LD2Twov(2d)$")>;
-def : InstRW<[M1WriteVLDF,
-              WriteAdr],      (instregex "LD2Twov(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDJ],   (instregex "LD3i(8|16)$")>;
-def : InstRW<[M1WriteVLDJ,
-              WriteAdr],      (instregex "LD3i(8|16)_POST$")>;
-def : InstRW<[M1WriteVLDJ],   (instregex "LD3i(32)$")>;
-def : InstRW<[M1WriteVLDJ,
-              WriteAdr],      (instregex "LD3i(32)_POST$")>;
-def : InstRW<[M1WriteVLDL],   (instregex "LD3i(64)$")>;
-def : InstRW<[M1WriteVLDL,
-              WriteAdr],      (instregex "LD3i(64)_POST$")>;
-
-def : InstRW<[M1WriteVLDB],   (instregex "LD3Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD3Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDB],   (instregex "LD3Rv(1d)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD3Rv(1d)_POST$")>;
-def : InstRW<[M1WriteVLDB],   (instregex "LD3Rv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD3Rv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDB],   (instregex "LD3Rv(2d)$")>;
-def : InstRW<[M1WriteVLDB,
-              WriteAdr],      (instregex "LD3Rv(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDI],   (instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDI,
-              WriteAdr],      (instregex "LD3Threev(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDI],   (instregex "LD3Threev(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDI,
-              WriteAdr],      (instregex "LD3Threev(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDI],   (instregex "LD3Threev(2d)$")>;
-def : InstRW<[M1WriteVLDI,
-              WriteAdr],      (instregex "LD3Threev(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDK],   (instregex "LD4i(8|16)$")>;
-def : InstRW<[M1WriteVLDK,
-              WriteAdr],      (instregex "LD4i(8|16)_POST$")>;
-def : InstRW<[M1WriteVLDK],   (instregex "LD4i(32)$")>;
-def : InstRW<[M1WriteVLDK,
-              WriteAdr],      (instregex "LD4i(32)_POST$")>;
-def : InstRW<[M1WriteVLDM],   (instregex "LD4i(64)$")>;
-def : InstRW<[M1WriteVLDM,
-              WriteAdr],      (instregex "LD4i(64)_POST$")>;
-
-def : InstRW<[M1WriteVLDC],   (instregex "LD4Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD4Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDC],   (instregex "LD4Rv(1d)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD4Rv(1d)_POST$")>;
-def : InstRW<[M1WriteVLDC],   (instregex "LD4Rv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD4Rv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDC],   (instregex "LD4Rv(2d)$")>;
-def : InstRW<[M1WriteVLDC,
-              WriteAdr],      (instregex "LD4Rv(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDN],   (instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDN,
-              WriteAdr],      (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDN],   (instregex "LD4Fourv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDN,
-              WriteAdr],      (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDN],   (instregex "LD4Fourv(2d)$")>;
-def : InstRW<[M1WriteVLDN,
-              WriteAdr],      (instregex "LD4Fourv(2d)_POST$")>;
-
-// ASIMD store instructions.
-def : InstRW<[M1WriteVSTD],   (instregex "ST1i(8|16|32)$")>;
-def : InstRW<[M1WriteVSTD,
-              WriteAdr],      (instregex "ST1i(8|16|32)_POST$")>;
-def : InstRW<[M1WriteVSTD],   (instregex "ST1i(64)$")>;
-def : InstRW<[M1WriteVSTD,
-              WriteAdr],      (instregex "ST1i(64)_POST$")>;
-
-def : InstRW<[WriteVST],      (instregex "ST1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteVST,
-              WriteAdr],      (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[WriteVST],      (instregex "ST1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVST,
-              WriteAdr],      (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVSTA],   (instregex "ST1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVSTA,
-              WriteAdr],      (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVSTA],   (instregex "ST1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVSTA,
-              WriteAdr],      (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVSTB],   (instregex "ST1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVSTB,
-              WriteAdr],      (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVSTB],   (instregex "ST1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVSTB,
-              WriteAdr],      (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVSTC],   (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVSTC,
-              WriteAdr],      (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVSTC],   (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVSTC,
-              WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteVSTD],   (instregex "ST2i(8|16|32)$")>;
-def : InstRW<[M1WriteVSTD,
-              WriteAdr],      (instregex "ST2i(8|16|32)_POST$")>;
-def : InstRW<[M1WriteVSTD],   (instregex "ST2i(64)$")>;
-def : InstRW<[M1WriteVSTD,
-              WriteAdr],      (instregex "ST2i(64)_POST$")>;
-
-def : InstRW<[M1WriteVSTD],   (instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVSTD,
-              WriteAdr],      (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVSTE],   (instregex "ST2Twov(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVSTE,
-              WriteAdr],      (instregex "ST2Twov(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVSTE],   (instregex "ST2Twov(2d)$")>;
-def : InstRW<[M1WriteVSTE,
-              WriteAdr],      (instregex "ST2Twov(2d)_POST$")>;
-
-def : InstRW<[M1WriteVSTH],   (instregex "ST3i(8|16)$")>;
-def : InstRW<[M1WriteVSTH,
-              WriteAdr],      (instregex "ST3i(8|16)_POST$")>;
-def : InstRW<[M1WriteVSTH],   (instregex "ST3i(32)$")>;
-def : InstRW<[M1WriteVSTH,
-              WriteAdr],      (instregex "ST3i(32)_POST$")>;
-def : InstRW<[M1WriteVSTF],   (instregex "ST3i(64)$")>;
-def : InstRW<[M1WriteVSTF,
-              WriteAdr],      (instregex "ST3i(64)_POST$")>;
-
-def : InstRW<[M1WriteVSTF],   (instregex "ST3Threev(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVSTF,
-              WriteAdr],      (instregex "ST3Threev(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVSTG],   (instregex "ST3Threev(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVSTG,
-              WriteAdr],      (instregex "ST3Threev(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVSTG],   (instregex "ST3Threev(2d)$")>;
-def : InstRW<[M1WriteVSTG,
-              WriteAdr],      (instregex "ST3Threev(2d)_POST$")>;
-
-def : InstRW<[M1WriteVSTH],   (instregex "ST4i(8|16)$")>;
-def : InstRW<[M1WriteVSTH,
-              WriteAdr],      (instregex "ST4i(8|16)_POST$")>;
-def : InstRW<[M1WriteVSTH],   (instregex "ST4i(32)$")>;
-def : InstRW<[M1WriteVSTH,
-              WriteAdr],      (instregex "ST4i(32)_POST$")>;
-def : InstRW<[M1WriteVSTF],   (instregex "ST4i(64)$")>;
-def : InstRW<[M1WriteVSTF,
-              WriteAdr],      (instregex "ST4i(64)_POST$")>;
-
-def : InstRW<[M1WriteVSTF],   (instregex "ST4Fourv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVSTF,
-              WriteAdr],      (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVSTI],   (instregex "ST4Fourv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVSTI,
-              WriteAdr],      (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVSTI],   (instregex "ST4Fourv(2d)$")>;
-def : InstRW<[M1WriteVSTI,
-              WriteAdr],      (instregex "ST4Fourv(2d)_POST$")>;
-
-// Cryptography instructions.
-def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
-def M1ReadAES  : SchedReadAdvance<1, [M1WriteAES]>;
-def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
-def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
-
-def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
-def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
-def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>;
-def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>;
-def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>;
-
-// CRC instructions.
-def : InstRW<[M1WriteC2], (instregex "^CRC32")>;
-
-} // SchedModel = ExynosM1Model
--- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td
@ -1,9 +1,8 @@
 //=- AArch64SchedExynosM3.td - Samsung Exynos M3 Sched Defs --*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -25,10 +24,9 @@ def ExynosM3Model : SchedMachineModel {
  let MispredictPenalty     =  16; // Minimum branch misprediction penalty.
  let CompleteModel         =   1; // Use the default model otherwise.

-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
-  // FIXME: Remove when all errors have been fixed.
-  let FullInstRWOverlapCheck = 0;
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
 }

 //===----------------------------------------------------------------------===//
@ -106,24 +104,13 @@ def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0,
                               M3UnitNSHF1,
                               M3UnitNSHF2]>;

-//===----------------------------------------------------------------------===//
-// Predicates.
-
-def M3BranchLinkFastPred  : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
-                                             MI->getOperand(0).isReg() &&
-                                             MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M3ResetFastPred       : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>;
-def M3RotateRightFastPred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri ||
-                                              MI->getOpcode() == AArch64::EXTRXrri) &&
-                                             MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
-                                             MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>;
-def M3ShiftLeftFastPred   : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
-
 //===----------------------------------------------------------------------===//
 // Coarse scheduling model.

 def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0;
                                    let NumMicroOps = 1; }
+def M3WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
+                                    let NumMicroOps = 0; }

 def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; }
 def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2;
@ -140,15 +127,25 @@ def M3WriteAD : SchedWriteRes<[M3UnitALU,
                                             let NumMicroOps = 2; }
 def M3WriteC1 : SchedWriteRes<[M3UnitC]>   { let Latency = 1; }
 def M3WriteC2 : SchedWriteRes<[M3UnitC]>   { let Latency = 2; }
-def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetFastPred,     [M3WriteZ0]>,
-                                   SchedVar<M3ShiftLeftFastPred, [M3WriteA1]>,
-                                   SchedVar<NoSchedPred,         [M3WriteAA]>]>;
-def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotateRightFastPred, [M3WriteA1]>,
-                                   SchedVar<NoSchedPred,           [M3WriteAA]>]>;
+def M3WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+                                   SchedVar<ExynosArithPred, [M3WriteA1]>,
+                                   SchedVar<ExynosLogicPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+                                   SchedVar<ExynosArithPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
+                                   SchedVar<ExynosLogicPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M3WriteA1]>,
+                                   SchedVar<ExynosLogicPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,              [M3WriteAA]>]>;

 def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
-def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkFastPred, [M3WriteAB]>,
-                                   SchedVar<NoSchedPred,          [M3WriteAC]>]>;
+def M3WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M3WriteAC]>,
+                                   SchedVar<NoSchedPred,            [M3WriteAB]>]>;

 def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; }
 def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; }
@ -165,44 +162,46 @@ def M3WriteLC : SchedWriteRes<[M3UnitA,
 def M3WriteLD : SchedWriteRes<[M3UnitA,
                               M3UnitL]> { let Latency = 4;
                                           let NumMicroOps = 2; }
+def M3WriteLE : SchedWriteRes<[M3UnitA,
+                               M3UnitL]> { let Latency = 6;
+                                           let NumMicroOps = 2; }
 def M3WriteLH : SchedWriteRes<[]>        { let Latency = 5;
                                           let NumMicroOps = 0; }
-
-def M3WriteLX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteL5]>,
-                                   SchedVar<NoSchedPred,         [M3WriteLB]>]>;
+def M3WriteLX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteL5]>,
+                                   SchedVar<NoSchedPred,         [M3WriteL4]>]>;
+def M3WriteLY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteLE]>,
+                                   SchedVar<NoSchedPred,         [M3WriteL5]>]>;

 def M3WriteS1 : SchedWriteRes<[M3UnitS]>   { let Latency = 1; }
 def M3WriteSA : SchedWriteRes<[M3UnitA,
                               M3UnitS,
-                               M3UnitFST]> { let Latency = 2;
+                               M3UnitFST]> { let Latency = 3;
                                             let NumMicroOps = 2; }
 def M3WriteSB : SchedWriteRes<[M3UnitA,
-                               M3UnitS]>   { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M3WriteSC : SchedWriteRes<[M3UnitA,
                               M3UnitS]>   { let Latency = 2;
                                             let NumMicroOps = 2; }
+def M3WriteSC : SchedWriteRes<[M3UnitA,
+                               M3UnitS,
+                               M3UnitFST]> { let Latency = 1;
+                                             let NumMicroOps = 2; }
+def M3WriteSY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteSA]>,
+                                   SchedVar<NoSchedPred,         [WriteVST]>]>;

-def M3WriteSX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M3WriteSB]>]>;
-def M3WriteSY : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M3WriteSC]>]>;
-
-def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
-                                      SchedVar<NoSchedPred,   [ReadDefault]>]>;
+def M3ReadAdrBase : SchedReadVariant<[SchedVar<ExynosScaledIdxPred, [ReadDefault]>,
+                                      SchedVar<NoSchedPred,         [ReadDefault]>]>;

 // Branch instructions.
 def : SchedAlias<WriteBr, M3WriteZ0>;
-def : WriteRes<WriteBrReg, [M3UnitC]> { let Latency = 1; }
+def : SchedAlias<WriteBrReg, M3WriteC1>;

 // Arithmetic and logical integer instructions.
-def : WriteRes<WriteI,     [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIEReg, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIS,    [M3UnitALU]> { let Latency = 1; }
+def : SchedAlias<WriteI,     M3WriteA1>;
+def : SchedAlias<WriteISReg, M3WriteA1>;
+def : SchedAlias<WriteIEReg, M3WriteA1>;
+def : SchedAlias<WriteIS,    M3WriteA1>;

 // Move instructions.
-def : WriteRes<WriteImm, [M3UnitALU]> { let Latency = 1; }
+def : SchedAlias<WriteImm, M3WriteA1>;

 // Divide and multiply instructions.
 def : WriteRes<WriteID32, [M3UnitC,
@ -216,26 +215,23 @@ def : WriteRes<WriteIM64, [M3UnitC]>  { let Latency = 4;
                                        let ResourceCycles = [2]; }

 // Miscellaneous instructions.
-def : WriteRes<WriteExtr, [M3UnitALU,
-                           M3UnitALU]> { let Latency = 1;
-                                         let NumMicroOps = 2; }
+def : SchedAlias<WriteExtr, M3WriteAY>;

 // Addressing modes.
-def : WriteRes<WriteAdr, []> { let Latency = 1;
-                               let NumMicroOps = 0; }
+def : SchedAlias<WriteAdr,    M3WriteZ1>;
 def : SchedAlias<ReadAdrBase, M3ReadAdrBase>;

 // Load instructions.
 def : SchedAlias<WriteLD, M3WriteL4>;
 def : WriteRes<WriteLDHi, []> { let Latency = 4;
                                let NumMicroOps = 0; }
-def : SchedAlias<WriteLDIdx, M3WriteLX>;
+def : SchedAlias<WriteLDIdx, M3WriteLB>;

 // Store instructions.
 def : SchedAlias<WriteST,    M3WriteS1>;
 def : SchedAlias<WriteSTP,   M3WriteS1>;
 def : SchedAlias<WriteSTX,   M3WriteS1>;
-def : SchedAlias<WriteSTIdx, M3WriteSX>;
+def : SchedAlias<WriteSTIdx, M3WriteSB>;

 // FP data instructions.
 def : WriteRes<WriteF,    [M3UnitFADD]>  { let Latency = 2; }
@ -245,7 +241,6 @@ def : WriteRes<WriteFDiv, [M3UnitFDIV]>  { let Latency = 12;
 def : WriteRes<WriteFMul, [M3UnitFMAC]>  { let Latency = 4; }

 // FP miscellaneous instructions.
-// TODO: Conversion between register files is much different.
 def : WriteRes<WriteFCvt,  [M3UnitFCVT]> { let Latency = 3; }
 def : WriteRes<WriteFImm,  [M3UnitNALU]> { let Latency = 1; }
 def : WriteRes<WriteFCopy, [M3UnitNALU]> { let Latency = 1; }
@ -259,7 +254,8 @@ def : WriteRes<WriteVST, [M3UnitS,
                                        let NumMicroOps = 1; }

 // ASIMD FP instructions.
-def : WriteRes<WriteV, [M3UnitNALU]> { let Latency = 3; }
+def : WriteRes<WriteVd, [M3UnitNALU]> { let Latency = 3; }
+def : WriteRes<WriteVq, [M3UnitNALU]> { let Latency = 3; }

 // Other miscellaneous instructions.
 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
@ -282,6 +278,7 @@ def : ReadAdvance<ReadID,      0>;
 def : ReadAdvance<ReadExtrHi,  0>;
 def : ReadAdvance<ReadAdrBase, 0>;
 def : ReadAdvance<ReadVLD,     0>;
+def : ReadAdvance<ReadST,      0>;

 //===----------------------------------------------------------------------===//
 // Finer scheduling model.
@ -481,11 +478,15 @@ def M3WriteVSTI    : SchedWriteRes<[M3UnitNALU,

 // Special cases.
 def M3WriteAES     : SchedWriteRes<[M3UnitNCRY]>  { let Latency = 1; }
+def M3WriteCOPY    : SchedWriteVariant<[SchedVar<ExynosFPPred, [M3WriteNALU1]>,
+                                        SchedVar<NoSchedPred,  [M3WriteZ0]>]>;
+def M3WriteMOVI    : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M3WriteZ0]>,
+                                        SchedVar<NoSchedPred,       [M3WriteNALU1]>]>;
+
+// Fast forwarding.
 def M3ReadAES      : SchedReadAdvance<1, [M3WriteAES]>;
 def M3ReadFMAC     : SchedReadAdvance<1, [M3WriteFMAC4,
                                          M3WriteFMAC5]>;
-def M3WriteMOVI    : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
-                                        SchedVar<NoSchedPred,     [M3WriteNALU1]>]>;
 def M3ReadNMUL     : SchedReadAdvance<1, [M3WriteNMUL3]>;

 // Branch instructions
@ -496,29 +497,40 @@ def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>;
 def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>;

 // Arithmetic and logical integer instructions.
-def : InstRW<[M3WriteA1], (instrs COPY)>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
+def : InstRW<[M3WriteAU], (instrs ORRWrs, ORRXrs)>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
+def : InstRW<[M3WriteAV], (instrs ADDWri, ADDXri)>;
+def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>;

 // Move instructions.
-def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
-def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
+def : InstRW<[M3WriteCOPY], (instrs COPY)>;
+def : InstRW<[M3WriteZ0],   (instrs ADR, ADRP)>;
+def : InstRW<[M3WriteZ0],   (instregex "^MOV[NZ][WX]i")>;

 // Divide and multiply instructions.

 // Miscellaneous instructions.
-def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>;

 // Load instructions.
 def : InstRW<[M3WriteLD,
              WriteLDHi,
              WriteAdr],    (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M3WriteLB,
+              ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
 def : InstRW<[M3WriteLX,
-              ReadAdrBase], (instregex "^PRFMro[WX]")>;
+              ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M3WriteLB,
+              ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[M3WriteLX,
+              ReadAdrBase], (instrs PRFMroX)>;

 // Store instructions.
+def : InstRW<[M3WriteSB,
+              ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+              ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;

 // FP data instructions.
 def : InstRW<[M3WriteNSHF1],  (instregex "^FABS[DS]r")>;
@ -555,9 +567,11 @@ def : InstRW<[WriteVLD],    (instregex "^LDUR[BDHSQ]i")>;
 def : InstRW<[WriteVLD,
              WriteAdr],    (instregex "^LDR[BDHSQ](post|pre)")>;
 def : InstRW<[WriteVLD],    (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M3WriteLX,
-              ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
-def : InstRW<[M3WriteLB,
+def : InstRW<[M3WriteLE,
+              ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
+def : InstRW<[WriteVLD,
+              ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
+def : InstRW<[M3WriteLY,
              ReadAdrBase], (instregex "^LDRQro[WX]")>;
 def : InstRW<[WriteVLD,
              M3WriteLH],   (instregex "^LDN?P[DS]i")>;
@ -575,20 +589,24 @@ def : InstRW<[WriteVST],    (instregex "^STUR[BDHSQ]i")>;
 def : InstRW<[WriteVST,
              WriteAdr],    (instregex "^STR[BDHSQ](post|pre)")>;
 def : InstRW<[WriteVST],    (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M3WriteSY,
-              ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
 def : InstRW<[M3WriteSA,
-              ReadAdrBase], (instregex "^STRQro[WX]")>;
+              ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[M3WriteSA,
+              ReadAdrBase], (instregex "^STRQroW")>;
+def : InstRW<[WriteVST,
+              ReadAdrBase], (instregex "^STR[BDHS]roX")>;
+def : InstRW<[M3WriteSY,
+              ReadAdrBase], (instregex "^STRQroX")>;
 def : InstRW<[WriteVST],    (instregex "^STN?P[DSQ]i")>;
 def : InstRW<[WriteVST,
              WriteAdr],    (instregex "^STP[DS](post|pre)")>;
-def : InstRW<[M3WriteSA,
+def : InstRW<[M3WriteSC,
              WriteAdr],    (instregex "^STPQ(post|pre)")>;

 // ASIMD instructions.
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ABAL?v")>;
 def : InstRW<[M3WriteNMSC1], (instregex "^[SU]ABDL?v")>;
-def : InstRW<[M3WriteNMSC1], (instregex "^(SQ)?(ABS|NEG)v")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^((SQ)?ABS|SQNEG)v")>;
 def : InstRW<[M3WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Pv")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]H(ADD|SUB)v")>;
@ -597,7 +615,6 @@ def : InstRW<[M3WriteNMSC3], (instregex "^R?(ADD|SUB)HN2?v")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]Q(ADD|SUB)v")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^(SU|US)QADDv")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]RHADDv")>;
-def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Vv")>;
 def : InstRW<[M3WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
 def : InstRW<[M3WriteNALU1], (instregex "^CMTSTv")>;
 def : InstRW<[M3WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
@ -647,12 +664,12 @@ def : InstRW<[M3WriteNEONY],  (instrs FSQRTv2f64)>;

 // ASIMD miscellaneous instructions.
 def : InstRW<[M3WriteNALU1], (instregex "^RBITv")>;
-def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL|BSP)v")>;
 def : InstRW<[M3WriteNEONB], (instregex "^DUPv.+gpr")>;
 def : InstRW<[M3WriteNSHF1], (instregex "^DUPv.+lane")>;
 def : InstRW<[M3WriteNSHF1], (instregex "^EXTv")>;
 def : InstRW<[M3WriteNSHF1], (instregex "^[SU]?Q?XTU?Nv")>;
-def : InstRW<[M3WriteNSHF1], (instregex "^CPY")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^DUP(i8|i16|i32|i64)$")>;
 def : InstRW<[M3WriteNSHF1], (instregex "^INSv.+lane")>;
 def : InstRW<[M3WriteMOVI],  (instregex "^MOVI")>;
 def : InstRW<[M3WriteNALU1], (instregex "^FMOVv")>;
@ -668,108 +685,108 @@ def : InstRW<[M3WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>;
 // ASIMD load instructions.
 def : InstRW<[M3WriteL5],   (instregex "LD1Onev(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteL5],   (instregex "LD1Onev(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDD], (instregex "LD1i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDD,
-              WriteAdr],    (instregex "LD1i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD1i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDE], (instregex "LD1i(64)$")>;
 def : InstRW<[M3WriteVLDE,
-              WriteAdr],    (instregex "LD1i(64)_POST")>;
+              M3WriteA1],   (instregex "LD1i(64)_POST")>;

 def : InstRW<[M3WriteL5],   (instregex "LD1Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteL5],   (instregex "LD1Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
 def : InstRW<[M3WriteVLDF,
-              WriteAdr],    (instregex "LD2Twov(8b|4h|2s)_POST")>;
+              M3WriteA1],   (instregex "LD2Twov(8b|4h|2s)_POST")>;
 def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDF,
-              WriteAdr],    (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDG], (instregex "LD2i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDG,
-              WriteAdr],    (instregex "LD2i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD2i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDH], (instregex "LD2i(64)$")>;
 def : InstRW<[M3WriteVLDH,
-              WriteAdr],    (instregex "LD2i(64)_POST")>;
+              M3WriteA1],   (instregex "LD2i(64)_POST")>;

 def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
 def : InstRW<[M3WriteVLDI,
-              WriteAdr],    (instregex "LD3Threev(8b|4h|2s)_POST")>;
+              M3WriteA1],   (instregex "LD3Threev(8b|4h|2s)_POST")>;
 def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDI,
-              WriteAdr],    (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDJ,
-              WriteAdr],    (instregex "LD3i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD3i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDL], (instregex "LD3i(64)$")>;
 def : InstRW<[M3WriteVLDL,
-              WriteAdr],    (instregex "LD3i(64)_POST")>;
+              M3WriteA1],   (instregex "LD3i(64)_POST")>;

 def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
 def : InstRW<[M3WriteVLDN,
-              WriteAdr],    (instregex "LD4Fourv(8b|4h|2s)_POST")>;
+              M3WriteA1],   (instregex "LD4Fourv(8b|4h|2s)_POST")>;
 def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDN,
-              WriteAdr],    (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;

 def : InstRW<[M3WriteVLDK], (instregex "LD4i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDK,
-              WriteAdr],    (instregex "LD4i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD4i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDM], (instregex "LD4i(64)$")>;
 def : InstRW<[M3WriteVLDM,
-              WriteAdr],    (instregex "LD4i(64)_POST")>;
+              M3WriteA1],   (instregex "LD4i(64)_POST")>;

 def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;

 // ASIMD store instructions.
 def : InstRW<[WriteVST],    (instregex "ST1Onev(8b|4h|2s|1d)$")>;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM4.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM4.td
--- a/suite/synctools/tablegen/AArch64/AArch64SchedExynosM5.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedExynosM5.td
--- a/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td
@ -1,9 +1,8 @@
 //==- AArch64SchedFalkor.td - Falkor Scheduling Definitions -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -24,8 +23,9 @@ def FalkorModel : SchedMachineModel {
  let MispredictPenalty = 11;  // Minimum branch misprediction penalty.
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
  // FIXME: Remove when all errors have been fixed.
  let FullInstRWOverlapCheck = 0;
 }
@ -92,7 +92,8 @@ def : WriteRes<WriteFCopy, []>   { let Unsupported = 1; }
 def : WriteRes<WriteFImm, []>    { let Unsupported = 1; }
 def : WriteRes<WriteFMul, []>    { let Unsupported = 1; }
 def : WriteRes<WriteFDiv, []>    { let Unsupported = 1; }
-def : WriteRes<WriteV, []>       { let Unsupported = 1; }
+def : WriteRes<WriteVd, []>      { let Unsupported = 1; }
+def : WriteRes<WriteVq, []>      { let Unsupported = 1; }
 def : WriteRes<WriteVLD, []>     { let Unsupported = 1; }
 def : WriteRes<WriteVST, []>     { let Unsupported = 1; }
 def : WriteRes<WriteSys, []>     { let Unsupported = 1; }
@ -111,6 +112,7 @@ def : ReadAdvance<ReadID,      0>;
 def : ReadAdvance<ReadExtrHi,  0>;
 def : ReadAdvance<ReadAdrBase, 0>;
 def : ReadAdvance<ReadVLD,     0>;
+def : ReadAdvance<ReadST,      0>;

 // Detailed Refinements
 // -----------------------------------------------------------------------------
--- a/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td
@ -1,9 +1,8 @@
 //==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -909,10 +908,10 @@ def : InstRW<[FalkorWr_ADDSUBsx],     (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
 // -----------------------------------------------------------------------------
 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^CPY(i8|i16|i32|i64)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^DUP(i8|i16|i32|i64)$")>;
 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^INSv(i8|i16)(gpr|lane)$")>;
 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^(S|U)MOVv.*$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIF|BIT|BSL)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIF|BIT|BSL|BSP)v8i8$")>;
 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs EXTv8i8)>;
 def : InstRW<[FalkorWr_1VXVY_0cyc],   (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs TBLv8i8One)>;
@ -936,7 +935,7 @@ def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
 def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
                                      (instregex "^INSv(i32|i64)(gpr|lane)$")>;
 def : InstRW<[FalkorWr_2GTOV_1cyc],   (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
-def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIF|BIT|BSL)v16i8$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIF|BIT|BSL|BSP)v16i8$")>;
 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs EXTv16i8)>;
 def : InstRW<[FalkorWr_2VXVY_0cyc],   (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs NOTv16i8)>;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td
@ -1,9 +1,8 @@
 //==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -28,8 +27,9 @@ def KryoModel : SchedMachineModel {
  let LoopMicroOpBufferSize = 16;
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
  // FIXME: Remove when all errors have been fixed.
  let FullInstRWOverlapCheck = 0;
 }
@ -95,7 +95,8 @@ def : WriteRes<WriteFMul,  [KryoUnitX, KryoUnitX]>
      { let Latency = 6; let NumMicroOps = 2; }
 def : WriteRes<WriteFDiv,  [KryoUnitXA, KryoUnitY]>
      { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
-def : WriteRes<WriteV,     [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteVd,    [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteVq,    [KryoUnitXY]> { let Latency = 6; }
 def : WriteRes<WriteVLD,   [KryoUnitLS]> { let Latency = 4; }
 def : WriteRes<WriteVST,   [KryoUnitLS]> { let Latency = 4; }

@ -117,6 +118,7 @@ def : ReadAdvance<ReadID,      0>;
 def : ReadAdvance<ReadExtrHi,  0>;
 def : ReadAdvance<ReadAdrBase, 0>;
 def : ReadAdvance<ReadVLD,     0>;
+def : ReadAdvance<ReadST,      0>;


 //===----------------------------------------------------------------------===//
--- a/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td
@ -1,9 +1,8 @@
 //=- AArch64SchedKryoDetails.td - QC Kryo Scheduling Defs ----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -463,13 +462,13 @@ def KryoWrite_1cyc_X_noRSV_74ln :
 	let Latency = 1; let NumMicroOps = 2;
 }
 def : InstRW<[KryoWrite_1cyc_X_noRSV_74ln],
-	(instrs BIFv8i8, BITv8i8, BSLv8i8)>;
+	(instrs BIFv8i8, BITv8i8, BSLv8i8, BSPv8i8)>;
 def KryoWrite_1cyc_X_X_75ln :
 	SchedWriteRes<[KryoUnitX, KryoUnitX]> {
 	let Latency = 1; let NumMicroOps = 2;
 }
 def : InstRW<[KryoWrite_1cyc_X_X_75ln],
-	(instrs BIFv16i8, BITv16i8, BSLv16i8)>;
+	(instrs BIFv16i8, BITv16i8, BSLv16i8, BSPv16i8)>;
 def KryoWrite_0cyc_noRSV_11ln :
 	SchedWriteRes<[]> {
 	let Latency = 0; let NumMicroOps = 1;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedPredAmpere.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedPredAmpere.td
@ -0,0 +1,25 @@
+//===- AArch64SchedPredAmpere.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Ampere Computing processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Auxiliary predicates.
+
+// Check for a LSL shift <= 4
+def AmpereCheapLSL : MCSchedPredicate<
+                                CheckAny<[CheckShiftBy0,
+                                 CheckAll<
+                                   [CheckShiftLSL,
+                                    CheckAny<
+                                      [CheckShiftBy1,
+                                       CheckShiftBy2,
+                                       CheckShiftBy3,
+                                       CheckShiftBy4]>]>]>>;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedPredExynos.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedPredExynos.td
@ -0,0 +1,157 @@
+//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Exynos processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Auxiliary predicates.
+
+// Check the shift in arithmetic and logic instructions.
+def ExynosCheckShift : CheckAny<[CheckShiftBy0,
+                                 CheckAll<
+                                   [CheckShiftLSL,
+                                    CheckAny<
+                                      [CheckShiftBy1,
+                                       CheckShiftBy2,
+                                       CheckShiftBy3]>]>]>;
+
+// Exynos predicates.
+
+// Identify BLR specifying the LR register as the indirect target register.
+def ExynosBranchLinkLRPred : MCSchedPredicate<
+                               CheckAll<[CheckOpcode<[BLR]>,
+                                         CheckRegOperand<0, LR>]>>;
+
+// Identify arithmetic instructions without or with limited extension or shift.
+def ExynosArithFn   : TIIPredicate<
+                        "isExynosArithFast",
+                        MCOpcodeSwitchStatement<
+                          [MCOpcodeSwitchCase<
+                             IsArithExtOp.ValidOpcodes,
+                             MCReturnStatement<
+                               CheckAny<[CheckExtBy0,
+                                         CheckAll<
+                                           [CheckAny<
+                                             [CheckExtUXTW,
+                                              CheckExtUXTX]>,
+                                            CheckAny<
+                                              [CheckExtBy1,
+                                               CheckExtBy2,
+                                               CheckExtBy3]>]>]>>>,
+                           MCOpcodeSwitchCase<
+                             IsArithShiftOp.ValidOpcodes,
+                             MCReturnStatement<ExynosCheckShift>>,
+                           MCOpcodeSwitchCase<
+                             IsArithUnshiftOp.ValidOpcodes,
+                             MCReturnStatement<TruePred>>,
+                           MCOpcodeSwitchCase<
+                             IsArithImmOp.ValidOpcodes,
+                             MCReturnStatement<TruePred>>],
+                          MCReturnStatement<FalsePred>>>;
+def ExynosArithPred : MCSchedPredicate<ExynosArithFn>;
+
+// Identify logic instructions with limited shift.
+def ExynosLogicFn   : TIIPredicate<
+                        "isExynosLogicFast",
+                        MCOpcodeSwitchStatement<
+                          [MCOpcodeSwitchCase<
+                             IsLogicShiftOp.ValidOpcodes,
+                             MCReturnStatement<ExynosCheckShift>>,
+                           MCOpcodeSwitchCase<
+                             IsLogicUnshiftOp.ValidOpcodes,
+                             MCReturnStatement<TruePred>>,
+                           MCOpcodeSwitchCase<
+                             IsLogicImmOp.ValidOpcodes,
+                             MCReturnStatement<TruePred>>],
+                          MCReturnStatement<FalsePred>>>;
+def ExynosLogicPred : MCSchedPredicate<ExynosLogicFn>;
+
+// Identify more logic instructions with limited shift.
+def ExynosLogicExFn   : TIIPredicate<
+                          "isExynosLogicExFast",
+                          MCOpcodeSwitchStatement<
+                            [MCOpcodeSwitchCase<
+                               IsLogicShiftOp.ValidOpcodes,
+                               MCReturnStatement<
+                                 CheckAny<
+                                   [ExynosCheckShift,
+                                    CheckAll<
+                                     [CheckShiftLSL,
+                                      CheckShiftBy8]>]>>>,
+                             MCOpcodeSwitchCase<
+                               IsLogicUnshiftOp.ValidOpcodes,
+                               MCReturnStatement<TruePred>>,
+                             MCOpcodeSwitchCase<
+                               IsLogicImmOp.ValidOpcodes,
+                               MCReturnStatement<TruePred>>],
+                            MCReturnStatement<FalsePred>>>;
+def ExynosLogicExPred : MCSchedPredicate<ExynosLogicExFn>;
+
+// Identify a load or store using the register offset addressing mode
+// with a scaled non-extended register.
+def ExynosScaledIdxFn   : TIIPredicate<"isExynosScaledAddr",
+                                       MCOpcodeSwitchStatement<
+                                         [MCOpcodeSwitchCase<
+                                            IsLoadStoreRegOffsetOp.ValidOpcodes,
+                                            MCReturnStatement<
+                                              CheckAny<
+                                                [CheckMemExtSXTW,
+                                                 CheckMemExtUXTW,
+                                                 CheckMemScaled]>>>],
+                                         MCReturnStatement<FalsePred>>>;
+def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
+
+// Identify FP instructions.
+def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckHForm,
+                                              CheckSForm,
+                                              CheckDForm,
+                                              CheckQForm]>>;
+
+// Identify 128-bit NEON instructions.
+def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
+
+// Identify instructions that reset a register efficiently.
+def ExynosResetFn   : TIIPredicate<
+                        "isExynosResetFast",
+                        MCOpcodeSwitchStatement<
+                          [MCOpcodeSwitchCase<
+                             [ADR, ADRP,
+                              MOVNWi, MOVNXi,
+                              MOVZWi, MOVZXi],
+                             MCReturnStatement<TruePred>>,
+                           MCOpcodeSwitchCase<
+                             [ORRWri, ORRXri],
+                             MCReturnStatement<
+                               CheckAll<
+                                 [CheckIsRegOperand<1>,
+                                  CheckAny<
+                                    [CheckRegOperand<1, WZR>,
+                                     CheckRegOperand<1, XZR>]>]>>>],
+                          MCReturnStatement<
+                            CheckAny<
+                              [IsCopyIdiomFn,
+                               IsZeroFPIdiomFn]>>>>;
+def ExynosResetPred : MCSchedPredicate<ExynosResetFn>;
+
+// Identify EXTR as the alias for ROR (immediate).
+def ExynosRotateRightImmPred : MCSchedPredicate<
+                                 CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
+                                           CheckSameRegOperand<1, 2>]>>;
+
+// Identify cheap arithmetic and logic immediate instructions.
+def ExynosCheapFn : TIIPredicate<
+                      "isExynosCheapAsMove",
+                      MCOpcodeSwitchStatement<
+                        [MCOpcodeSwitchCase<
+                           IsArithLogicImmOp.ValidOpcodes,
+                           MCReturnStatement<TruePred>>],
+                        MCReturnStatement<
+                          CheckAny<
+                            [ExynosArithFn, ExynosResetFn, ExynosLogicFn]>>>>;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedPredicates.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedPredicates.td
@ -0,0 +1,441 @@
+//===- AArch64SchedPredicates.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 subtargets.
+//
+//===----------------------------------------------------------------------===//
+
+// Function mappers.
+
+// Check the extension type in arithmetic instructions.
+let FunctionMapper = "AArch64_AM::getArithExtendType" in {
+  def CheckExtUXTB                      : CheckImmOperand_s<3, "AArch64_AM::UXTB">;
+  def CheckExtUXTH                      : CheckImmOperand_s<3, "AArch64_AM::UXTH">;
+  def CheckExtUXTW                      : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
+  def CheckExtUXTX                      : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
+  def CheckExtSXTB                      : CheckImmOperand_s<3, "AArch64_AM::SXTB">;
+  def CheckExtSXTH                      : CheckImmOperand_s<3, "AArch64_AM::SXTH">;
+  def CheckExtSXTW                      : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
+  def CheckExtSXTX                      : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
+}
+
+// Check for shifting in extended arithmetic instructions.
+foreach I = {0-3} in {
+  let FunctionMapper = "AArch64_AM::getArithShiftValue" in
+  def CheckExtBy#I                      : CheckImmOperand<3, I>;
+}
+
+// Check the extension type in the register offset addressing mode.
+let FunctionMapper = "AArch64_AM::getMemExtendType" in {
+  def CheckMemExtUXTW                   : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
+  def CheckMemExtLSL                    : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
+  def CheckMemExtSXTW                   : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
+  def CheckMemExtSXTX                   : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
+}
+
+// Check for scaling in the register offset addressing mode.
+let FunctionMapper = "AArch64_AM::getMemDoShift" in
+def CheckMemScaled                      : CheckImmOperandSimple<4>;
+
+// Check the shifting type in arithmetic and logic instructions.
+let FunctionMapper = "AArch64_AM::getShiftType" in {
+  def CheckShiftLSL                : CheckImmOperand_s<3, "AArch64_AM::LSL">;
+  def CheckShiftLSR                : CheckImmOperand_s<3, "AArch64_AM::LSR">;
+  def CheckShiftASR                : CheckImmOperand_s<3, "AArch64_AM::ASR">;
+  def CheckShiftROR                : CheckImmOperand_s<3, "AArch64_AM::ROR">;
+  def CheckShiftMSL                : CheckImmOperand_s<3, "AArch64_AM::MSL">;
+}
+
+// Check for shifting in arithmetic and logic instructions.
+foreach I = {0-4, 8} in {
+  let FunctionMapper = "AArch64_AM::getShiftValue" in
+  def CheckShiftBy#I        : CheckImmOperand<3, I>;
+}
+
+// Generic predicates.
+
+// Identify whether an instruction is the 16-bit NEON form based on its result.
+def CheckHForm             : CheckAll<[CheckIsRegOperand<0>,
+                                       CheckAny<[CheckRegOperand<0, H0>,
+                                                 CheckRegOperand<0, H1>,
+                                                 CheckRegOperand<0, H2>,
+                                                 CheckRegOperand<0, H3>,
+                                                 CheckRegOperand<0, H4>,
+                                                 CheckRegOperand<0, H5>,
+                                                 CheckRegOperand<0, H6>,
+                                                 CheckRegOperand<0, H7>,
+                                                 CheckRegOperand<0, H8>,
+                                                 CheckRegOperand<0, H9>,
+                                                 CheckRegOperand<0, H10>,
+                                                 CheckRegOperand<0, H11>,
+                                                 CheckRegOperand<0, H12>,
+                                                 CheckRegOperand<0, H13>,
+                                                 CheckRegOperand<0, H14>,
+                                                 CheckRegOperand<0, H15>,
+                                                 CheckRegOperand<0, H16>,
+                                                 CheckRegOperand<0, H17>,
+                                                 CheckRegOperand<0, H18>,
+                                                 CheckRegOperand<0, H19>,
+                                                 CheckRegOperand<0, H20>,
+                                                 CheckRegOperand<0, H21>,
+                                                 CheckRegOperand<0, H22>,
+                                                 CheckRegOperand<0, H23>,
+                                                 CheckRegOperand<0, H24>,
+                                                 CheckRegOperand<0, H25>,
+                                                 CheckRegOperand<0, H26>,
+                                                 CheckRegOperand<0, H27>,
+                                                 CheckRegOperand<0, H28>,
+                                                 CheckRegOperand<0, H29>,
+                                                 CheckRegOperand<0, H30>,
+                                                 CheckRegOperand<0, H31>]>]>;
+
+// Identify whether an instruction is the 32-bit NEON form based on its result.
+def CheckSForm             : CheckAll<[CheckIsRegOperand<0>,
+                                       CheckAny<[CheckRegOperand<0, S0>,
+                                                 CheckRegOperand<0, S1>,
+                                                 CheckRegOperand<0, S2>,
+                                                 CheckRegOperand<0, S3>,
+                                                 CheckRegOperand<0, S4>,
+                                                 CheckRegOperand<0, S5>,
+                                                 CheckRegOperand<0, S6>,
+                                                 CheckRegOperand<0, S7>,
+                                                 CheckRegOperand<0, S8>,
+                                                 CheckRegOperand<0, S9>,
+                                                 CheckRegOperand<0, S10>,
+                                                 CheckRegOperand<0, S11>,
+                                                 CheckRegOperand<0, S12>,
+                                                 CheckRegOperand<0, S13>,
+                                                 CheckRegOperand<0, S14>,
+                                                 CheckRegOperand<0, S15>,
+                                                 CheckRegOperand<0, S16>,
+                                                 CheckRegOperand<0, S17>,
+                                                 CheckRegOperand<0, S18>,
+                                                 CheckRegOperand<0, S19>,
+                                                 CheckRegOperand<0, S20>,
+                                                 CheckRegOperand<0, S21>,
+                                                 CheckRegOperand<0, S22>,
+                                                 CheckRegOperand<0, S23>,
+                                                 CheckRegOperand<0, S24>,
+                                                 CheckRegOperand<0, S25>,
+                                                 CheckRegOperand<0, S26>,
+                                                 CheckRegOperand<0, S27>,
+                                                 CheckRegOperand<0, S28>,
+                                                 CheckRegOperand<0, S29>,
+                                                 CheckRegOperand<0, S30>,
+                                                 CheckRegOperand<0, S31>]>]>;
+
+// Identify whether an instruction is the 64-bit NEON form based on its result.
+def CheckDForm             : CheckAll<[CheckIsRegOperand<0>,
+                                       CheckAny<[CheckRegOperand<0, D0>,
+                                                 CheckRegOperand<0, D1>,
+                                                 CheckRegOperand<0, D2>,
+                                                 CheckRegOperand<0, D3>,
+                                                 CheckRegOperand<0, D4>,
+                                                 CheckRegOperand<0, D5>,
+                                                 CheckRegOperand<0, D6>,
+                                                 CheckRegOperand<0, D7>,
+                                                 CheckRegOperand<0, D8>,
+                                                 CheckRegOperand<0, D9>,
+                                                 CheckRegOperand<0, D10>,
+                                                 CheckRegOperand<0, D11>,
+                                                 CheckRegOperand<0, D12>,
+                                                 CheckRegOperand<0, D13>,
+                                                 CheckRegOperand<0, D14>,
+                                                 CheckRegOperand<0, D15>,
+                                                 CheckRegOperand<0, D16>,
+                                                 CheckRegOperand<0, D17>,
+                                                 CheckRegOperand<0, D18>,
+                                                 CheckRegOperand<0, D19>,
+                                                 CheckRegOperand<0, D20>,
+                                                 CheckRegOperand<0, D21>,
+                                                 CheckRegOperand<0, D22>,
+                                                 CheckRegOperand<0, D23>,
+                                                 CheckRegOperand<0, D24>,
+                                                 CheckRegOperand<0, D25>,
+                                                 CheckRegOperand<0, D26>,
+                                                 CheckRegOperand<0, D27>,
+                                                 CheckRegOperand<0, D28>,
+                                                 CheckRegOperand<0, D29>,
+                                                 CheckRegOperand<0, D30>,
+                                                 CheckRegOperand<0, D31>]>]>;
+
+// Identify whether an instruction is the 128-bit NEON form based on its result.
+def CheckQForm             : CheckAll<[CheckIsRegOperand<0>,
+                                       CheckAny<[CheckRegOperand<0, Q0>,
+                                                 CheckRegOperand<0, Q1>,
+                                                 CheckRegOperand<0, Q2>,
+                                                 CheckRegOperand<0, Q3>,
+                                                 CheckRegOperand<0, Q4>,
+                                                 CheckRegOperand<0, Q5>,
+                                                 CheckRegOperand<0, Q6>,
+                                                 CheckRegOperand<0, Q7>,
+                                                 CheckRegOperand<0, Q8>,
+                                                 CheckRegOperand<0, Q9>,
+                                                 CheckRegOperand<0, Q10>,
+                                                 CheckRegOperand<0, Q11>,
+                                                 CheckRegOperand<0, Q12>,
+                                                 CheckRegOperand<0, Q13>,
+                                                 CheckRegOperand<0, Q14>,
+                                                 CheckRegOperand<0, Q15>,
+                                                 CheckRegOperand<0, Q16>,
+                                                 CheckRegOperand<0, Q17>,
+                                                 CheckRegOperand<0, Q18>,
+                                                 CheckRegOperand<0, Q19>,
+                                                 CheckRegOperand<0, Q20>,
+                                                 CheckRegOperand<0, Q21>,
+                                                 CheckRegOperand<0, Q22>,
+                                                 CheckRegOperand<0, Q23>,
+                                                 CheckRegOperand<0, Q24>,
+                                                 CheckRegOperand<0, Q25>,
+                                                 CheckRegOperand<0, Q26>,
+                                                 CheckRegOperand<0, Q27>,
+                                                 CheckRegOperand<0, Q28>,
+                                                 CheckRegOperand<0, Q29>,
+                                                 CheckRegOperand<0, Q30>,
+                                                 CheckRegOperand<0, Q31>]>]>;
+
+// Identify arithmetic instructions with extend.
+def IsArithExtOp           : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx,
+                                          SUBWrx, SUBXrx, SUBSWrx, SUBSXrx,
+                                          ADDXrx64, ADDSXrx64,
+                                          SUBXrx64, SUBSXrx64]>;
+
+// Identify arithmetic immediate instructions.
+def IsArithImmOp           : CheckOpcode<[ADDWri, ADDXri, ADDSWri, ADDSXri,
+                                          SUBWri, SUBXri, SUBSWri, SUBSXri]>;
+
+// Identify arithmetic instructions with shift.
+def IsArithShiftOp         : CheckOpcode<[ADDWrs, ADDXrs, ADDSWrs, ADDSXrs,
+                                          SUBWrs, SUBXrs, SUBSWrs, SUBSXrs]>;
+
+// Identify arithmetic instructions without shift.
+def IsArithUnshiftOp       : CheckOpcode<[ADDWrr, ADDXrr, ADDSWrr, ADDSXrr,
+                                          SUBWrr, SUBXrr, SUBSWrr, SUBSXrr]>;
+
+// Identify logic immediate instructions.
+def IsLogicImmOp           : CheckOpcode<[ANDWri, ANDXri,
+                                          EORWri, EORXri,
+                                          ORRWri, ORRXri]>;
+
+// Identify logic instructions with shift.
+def IsLogicShiftOp         : CheckOpcode<[ANDWrs, ANDXrs, ANDSWrs, ANDSXrs,
+                                          BICWrs, BICXrs, BICSWrs, BICSXrs,
+                                          EONWrs, EONXrs,
+                                          EORWrs, EORXrs,
+                                          ORNWrs, ORNXrs,
+                                          ORRWrs, ORRXrs]>;
+
+// Identify logic instructions without shift.
+def IsLogicUnshiftOp       : CheckOpcode<[ANDWrr, ANDXrr, ANDSWrr, ANDSXrr,
+                                          BICWrr, BICXrr, BICSWrr, BICSXrr,
+                                          EONWrr, EONXrr,
+                                          EORWrr, EORXrr,
+                                          ORNWrr, ORNXrr,
+                                          ORRWrr, ORRXrr]>;
+
+// Identify arithmetic and logic immediate instructions.
+def IsArithLogicImmOp      : CheckOpcode<!listconcat(IsArithImmOp.ValidOpcodes,
+                                                     IsLogicImmOp.ValidOpcodes)>;
+
+// Identify arithmetic and logic instructions with shift.
+def IsArithLogicShiftOp    : CheckOpcode<!listconcat(IsArithShiftOp.ValidOpcodes,
+                                                     IsLogicShiftOp.ValidOpcodes)>;
+
+// Identify arithmetic and logic instructions without shift.
+def IsArithLogicUnshiftOp  : CheckOpcode<!listconcat(IsArithUnshiftOp.ValidOpcodes,
+                                                     IsLogicUnshiftOp.ValidOpcodes)>;
+
+// Identify whether an instruction is an ASIMD
+// load using the post index addressing mode.
+def IsLoadASIMDPostOp      : CheckOpcode<[LD1Onev8b_POST, LD1Onev4h_POST, LD1Onev2s_POST, LD1Onev1d_POST,
+                                          LD1Onev16b_POST, LD1Onev8h_POST, LD1Onev4s_POST, LD1Onev2d_POST,
+                                          LD1Twov8b_POST, LD1Twov4h_POST, LD1Twov2s_POST, LD1Twov1d_POST,
+                                          LD1Twov16b_POST, LD1Twov8h_POST, LD1Twov4s_POST, LD1Twov2d_POST,
+                                          LD1Threev8b_POST, LD1Threev4h_POST, LD1Threev2s_POST, LD1Threev1d_POST,
+                                          LD1Threev16b_POST, LD1Threev8h_POST, LD1Threev4s_POST, LD1Threev2d_POST,
+                                          LD1Fourv8b_POST, LD1Fourv4h_POST, LD1Fourv2s_POST, LD1Fourv1d_POST,
+                                          LD1Fourv16b_POST, LD1Fourv8h_POST, LD1Fourv4s_POST, LD1Fourv2d_POST,
+                                          LD1i8_POST, LD1i16_POST, LD1i32_POST, LD1i64_POST,
+                                          LD1Rv8b_POST, LD1Rv4h_POST, LD1Rv2s_POST, LD1Rv1d_POST,
+                                          LD1Rv16b_POST, LD1Rv8h_POST, LD1Rv4s_POST, LD1Rv2d_POST,
+                                          LD2Twov8b_POST, LD2Twov4h_POST, LD2Twov2s_POST,
+                                          LD2Twov16b_POST, LD2Twov8h_POST, LD2Twov4s_POST, LD2Twov2d_POST,
+                                          LD2i8_POST, LD2i16_POST, LD2i32_POST, LD2i64_POST,
+                                          LD2Rv8b_POST, LD2Rv4h_POST, LD2Rv2s_POST, LD2Rv1d_POST,
+                                          LD2Rv16b_POST, LD2Rv8h_POST, LD2Rv4s_POST, LD2Rv2d_POST,
+                                          LD3Threev8b_POST, LD3Threev4h_POST, LD3Threev2s_POST,
+                                          LD3Threev16b_POST, LD3Threev8h_POST, LD3Threev4s_POST, LD3Threev2d_POST,
+                                          LD3i8_POST, LD3i16_POST, LD3i32_POST, LD3i64_POST,
+                                          LD3Rv8b_POST, LD3Rv4h_POST, LD3Rv2s_POST, LD3Rv1d_POST,
+                                          LD3Rv16b_POST, LD3Rv8h_POST, LD3Rv4s_POST, LD3Rv2d_POST,
+                                          LD4Fourv8b_POST, LD4Fourv4h_POST, LD4Fourv2s_POST,
+                                          LD4Fourv16b_POST, LD4Fourv8h_POST, LD4Fourv4s_POST, LD4Fourv2d_POST,
+                                          LD4i8_POST, LD4i16_POST, LD4i32_POST, LD4i64_POST,
+                                          LD4Rv8b_POST, LD4Rv4h_POST, LD4Rv2s_POST, LD4Rv1d_POST,
+                                          LD4Rv16b_POST, LD4Rv8h_POST, LD4Rv4s_POST, LD4Rv2d_POST]>;
+
+// Identify whether an instruction is an ASIMD
+// store using the post index addressing mode.
+def IsStoreASIMDPostOp     : CheckOpcode<[ST1Onev8b_POST, ST1Onev4h_POST, ST1Onev2s_POST, ST1Onev1d_POST,
+                                          ST1Onev16b_POST, ST1Onev8h_POST, ST1Onev4s_POST, ST1Onev2d_POST,
+                                          ST1Twov8b_POST, ST1Twov4h_POST, ST1Twov2s_POST, ST1Twov1d_POST,
+                                          ST1Twov16b_POST, ST1Twov8h_POST, ST1Twov4s_POST, ST1Twov2d_POST,
+                                          ST1Threev8b_POST, ST1Threev4h_POST, ST1Threev2s_POST, ST1Threev1d_POST,
+                                          ST1Threev16b_POST, ST1Threev8h_POST, ST1Threev4s_POST, ST1Threev2d_POST,
+                                          ST1Fourv8b_POST, ST1Fourv4h_POST, ST1Fourv2s_POST, ST1Fourv1d_POST,
+                                          ST1Fourv16b_POST, ST1Fourv8h_POST, ST1Fourv4s_POST, ST1Fourv2d_POST,
+                                          ST1i8_POST, ST1i16_POST, ST1i32_POST, ST1i64_POST,
+                                          ST2Twov8b_POST, ST2Twov4h_POST, ST2Twov2s_POST,
+                                          ST2Twov16b_POST, ST2Twov8h_POST, ST2Twov4s_POST, ST2Twov2d_POST,
+                                          ST2i8_POST, ST2i16_POST, ST2i32_POST, ST2i64_POST,
+                                          ST3Threev8b_POST, ST3Threev4h_POST, ST3Threev2s_POST,
+                                          ST3Threev16b_POST, ST3Threev8h_POST, ST3Threev4s_POST, ST3Threev2d_POST,
+                                          ST3i8_POST, ST3i16_POST, ST3i32_POST, ST3i64_POST,
+                                          ST4Fourv8b_POST, ST4Fourv4h_POST, ST4Fourv2s_POST,
+                                          ST4Fourv16b_POST, ST4Fourv8h_POST, ST4Fourv4s_POST, ST4Fourv2d_POST,
+                                          ST4i8_POST, ST4i16_POST, ST4i32_POST, ST4i64_POST]>;
+
+// Identify whether an instruction is an ASIMD load
+// or store using the post index addressing mode.
+def IsLoadStoreASIMDPostOp : CheckOpcode<!listconcat(IsLoadASIMDPostOp.ValidOpcodes,
+                                                     IsStoreASIMDPostOp.ValidOpcodes)>;
+
+// Identify whether an instruction is a load
+// using the register offset addressing mode.
+def IsLoadRegOffsetOp      : CheckOpcode<[PRFMroW, PRFMroX,
+                                          LDRBBroW, LDRBBroX,
+                                          LDRSBWroW, LDRSBWroX, LDRSBXroW, LDRSBXroX,
+                                          LDRHHroW, LDRHHroX,
+                                          LDRSHWroW, LDRSHWroX, LDRSHXroW, LDRSHXroX,
+                                          LDRWroW, LDRWroX,
+                                          LDRSWroW, LDRSWroX,
+                                          LDRXroW, LDRXroX,
+                                          LDRBroW, LDRBroX,
+                                          LDRHroW, LDRHroX,
+                                          LDRSroW, LDRSroX,
+                                          LDRDroW, LDRDroX,
+                                          LDRQroW, LDRQroX]>;
+
+// Identify whether an instruction is a store
+// using the register offset addressing mode.
+def IsStoreRegOffsetOp     : CheckOpcode<[STRBBroW, STRBBroX,
+                                          STRHHroW, STRHHroX,
+                                          STRWroW, STRWroX,
+                                          STRXroW, STRXroX,
+                                          STRBroW, STRBroX,
+                                          STRHroW, STRHroX,
+                                          STRSroW, STRSroX,
+                                          STRDroW, STRDroX,
+                                          STRQroW, STRQroX]>;
+
+// Identify whether an instruction is a load or
+// store using the register offset addressing mode.
+def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
+                                                     IsStoreRegOffsetOp.ValidOpcodes)>;
+
+// Target predicates.
+
+// Identify an instruction that effectively transfers a register to another.
+def IsCopyIdiomFn     : TIIPredicate<"isCopyIdiom",
+                                     MCOpcodeSwitchStatement<
+                                       [// MOV {Rd, SP}, {SP, Rn} =>
+                                        // ADD {Rd, SP}, {SP, Rn}, #0
+                                        MCOpcodeSwitchCase<
+                                          [ADDWri, ADDXri],
+                                          MCReturnStatement<
+                                            CheckAll<
+                                              [CheckIsRegOperand<0>,
+                                               CheckIsRegOperand<1>,
+                                               CheckAny<
+                                                 [CheckRegOperand<0, WSP>,
+                                                  CheckRegOperand<0, SP>,
+                                                  CheckRegOperand<1, WSP>,
+                                                  CheckRegOperand<1, SP>]>,
+                                               CheckZeroOperand<2>]>>>,
+                                        // MOV Rd, Rm =>
+                                        // ORR Rd, ZR, Rm, LSL #0
+                                        MCOpcodeSwitchCase<
+                                          [ORRWrs, ORRXrs],
+                                          MCReturnStatement<
+                                            CheckAll<
+                                              [CheckIsRegOperand<1>,
+                                               CheckIsRegOperand<2>,
+                                               CheckAny<
+                                                 [CheckRegOperand<1, WZR>,
+                                                  CheckRegOperand<1, XZR>]>,
+                                               CheckShiftBy0]>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def IsCopyIdiomPred   : MCSchedPredicate<IsCopyIdiomFn>;
+
+// Identify arithmetic instructions with an extended register.
+def RegExtendedFn     : TIIPredicate<"hasExtendedReg",
+                                     MCOpcodeSwitchStatement<
+                                       [MCOpcodeSwitchCase<
+                                         IsArithExtOp.ValidOpcodes,
+                                         MCReturnStatement<
+                                           CheckNot<CheckZeroOperand<3>>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def RegExtendedPred   : MCSchedPredicate<RegExtendedFn>;
+
+// Identify arithmetic and logic instructions with a shifted register.
+def RegShiftedFn      : TIIPredicate<"hasShiftedReg",
+                                     MCOpcodeSwitchStatement<
+                                       [MCOpcodeSwitchCase<
+                                          IsArithLogicShiftOp.ValidOpcodes,
+                                          MCReturnStatement<
+                                            CheckNot<CheckZeroOperand<3>>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def RegShiftedPred    : MCSchedPredicate<RegShiftedFn>;
+
+// Identify a load or store using the register offset addressing mode
+// with an extended or scaled register.
+def ScaledIdxFn       : TIIPredicate<"isScaledAddr",
+                                     MCOpcodeSwitchStatement<
+                                       [MCOpcodeSwitchCase<
+                                          IsLoadStoreRegOffsetOp.ValidOpcodes,
+                                          MCReturnStatement<
+                                            CheckAny<[CheckNot<CheckMemExtLSL>,
+                                                      CheckMemScaled]>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def ScaledIdxPred     : MCSchedPredicate<ScaledIdxFn>;
+
+// Identify an instruction that effectively resets a FP register to zero.
+def IsZeroFPIdiomFn   : TIIPredicate<"isZeroFPIdiom",
+                                     MCOpcodeSwitchStatement<
+                                       [// MOVI Vd, #0
+                                        MCOpcodeSwitchCase<
+                                          [MOVIv8b_ns, MOVIv16b_ns,
+                                           MOVID, MOVIv2d_ns],
+                                          MCReturnStatement<CheckZeroOperand<1>>>,
+                                        // MOVI Vd, #0, LSL #0
+                                        MCOpcodeSwitchCase<
+                                          [MOVIv4i16, MOVIv8i16,
+                                           MOVIv2i32, MOVIv4i32],
+                                          MCReturnStatement<
+                                            CheckAll<
+                                              [CheckZeroOperand<1>,
+                                               CheckZeroOperand<2>]>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def IsZeroFPIdiomPred : MCSchedPredicate<IsZeroFPIdiomFn>;
+
+// Identify an instruction that effectively resets a GP register to zero.
+def IsZeroIdiomFn     : TIIPredicate<"isZeroIdiom",
+                                    MCOpcodeSwitchStatement<
+                                      [// ORR Rd, ZR, #0
+                                       MCOpcodeSwitchCase<
+                                         [ORRWri, ORRXri],
+                                         MCReturnStatement<
+                                           CheckAll<
+                                             [CheckIsRegOperand<1>,
+                                              CheckAny<
+                                                [CheckRegOperand<1, WZR>,
+                                                 CheckRegOperand<1, XZR>]>,
+                                              CheckZeroOperand<2>]>>>],
+                                      MCReturnStatement<FalsePred>>>;
+def IsZeroIdiomPred   : MCSchedPredicate<IsZeroIdiomFn>;
--- a/suite/synctools/tablegen/AArch64/AArch64SchedTSV110.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedTSV110.td
@ -0,0 +1,747 @@
+//==- AArch64SchedTSV110.td - Huawei TSV110 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Huawei TSV110 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
+
+// Huawei TSV110 scheduling machine model.
+def TSV110Model : SchedMachineModel {
+  let IssueWidth            =   4; // 4 micro-ops dispatched  per cycle. 
+  let MicroOpBufferSize     = 128; // 128 micro-op re-order buffer
+  let LoopMicroOpBufferSize =  16; 
+  let LoadLatency           =   4; // Optimistic load latency.
+  let MispredictPenalty     =  14; // Fetch + Decode/Rename/Dispatch + Branch
+  let CompleteModel         =   1;
+
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F);
+}
+
+// Define each kind of processor resource and number available on the TSV110,
+// which has 8 pipelines, each with its own queue where micro-ops wait for
+// their operands and issue out-of-order to one of eight execution pipelines.
+let SchedModel = TSV110Model in {
+  def TSV110UnitALU  : ProcResource<1>; // Int ALU
+  def TSV110UnitAB   : ProcResource<2>; // Int ALU/BRU
+  def TSV110UnitMDU  : ProcResource<1>; // Multi-Cycle
+  def TSV110UnitFSU1 : ProcResource<1>; // FP/ASIMD
+  def TSV110UnitFSU2 : ProcResource<1>; // FP/ASIMD
+  def TSV110UnitLdSt : ProcResource<2>; // Load/Store
+
+  def TSV110UnitF     : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2]>;
+  def TSV110UnitALUAB : ProcResGroup<[TSV110UnitALU, TSV110UnitAB]>;
+  def TSV110UnitFLdSt : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2, TSV110UnitLdSt]>;
+}
+
+let SchedModel = TSV110Model in {
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for 
+// TSV110
+
+// Integer ALU
+def : WriteRes<WriteImm,   [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteI,     [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [TSV110UnitMDU]>   { let Latency = 2; } 
+def : WriteRes<WriteIEReg, [TSV110UnitMDU]>   { let Latency = 2; } 
+def : WriteRes<WriteExtr,  [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteIS,    [TSV110UnitALUAB]> { let Latency = 1; }
+
+// Integer Mul/MAC/Div
+def : WriteRes<WriteID32,  [TSV110UnitMDU]> { let Latency = 12;
+                                              let ResourceCycles = [12]; } 
+def : WriteRes<WriteID64,  [TSV110UnitMDU]> { let Latency = 20;
+                                              let ResourceCycles = [20]; }
+def : WriteRes<WriteIM32,  [TSV110UnitMDU]> { let Latency = 3; }
+def : WriteRes<WriteIM64,  [TSV110UnitMDU]> { let Latency = 4; }
+
+// Load
+def : WriteRes<WriteLD,    [TSV110UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDIdx, [TSV110UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDHi,  []> { let Latency = 4; }
+
+// Pre/Post Indexing
+def : WriteRes<WriteAdr,   [TSV110UnitALUAB]> { let Latency = 1; } 
+
+// Store
+def : WriteRes<WriteST,    [TSV110UnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP,   [TSV110UnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [TSV110UnitLdSt]> { let Latency = 1; }
+
+// FP
+def : WriteRes<WriteF,     [TSV110UnitF]> { let Latency = 2; }
+def : WriteRes<WriteFCmp,  [TSV110UnitF]> { let Latency = 3; }
+def : WriteRes<WriteFCvt,  [TSV110UnitF]> { let Latency = 3; } 
+def : WriteRes<WriteFCopy, [TSV110UnitF]> { let Latency = 2; }
+def : WriteRes<WriteFImm,  [TSV110UnitF]> { let Latency = 2; } 
+def : WriteRes<WriteFMul,  [TSV110UnitF]> { let Latency = 5; }
+
+// FP Div, Sqrt
+def : WriteRes<WriteFDiv,  [TSV110UnitFSU1]> { let Latency = 18; } 
+
+def : WriteRes<WriteVd,    [TSV110UnitF]>     { let Latency = 4; }
+def : WriteRes<WriteVq,    [TSV110UnitF]>     { let Latency = 4; }
+def : WriteRes<WriteVLD,   [TSV110UnitFLdSt]> { let Latency = 5; }
+def : WriteRes<WriteVST,   [TSV110UnitF]>     { let Latency = 1; }
+
+// Branch
+def : WriteRes<WriteBr,    [TSV110UnitAB]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [TSV110UnitAB]> { let Latency = 1; }
+def : WriteRes<WriteSys,     []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint,    []> { let Latency = 1; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 
+
+// Forwarding logic is modeled only for multiply and accumulate.
+def : ReadAdvance<ReadI,       0>;
+def : ReadAdvance<ReadISReg,   0>;
+def : ReadAdvance<ReadIEReg,   0>;
+def : ReadAdvance<ReadIM,      0>;
+def : ReadAdvance<ReadIMA,     2, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID,      0>;
+def : ReadAdvance<ReadExtrHi,  0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD,     0>;
+def : ReadAdvance<ReadST,      0>;
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// Detailed Refinements
+//===----------------------------------------------------------------------===//
+
+// Contains all of the TSV110 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+//   Prefix: TSV110Wr
+//       Latency: #cyc
+//   MicroOp Count/Types: #(ALU|AB|MDU|FSU1|FSU2|LdSt|ALUAB|F|FLdSt)
+//
+// e.g. TSV110Wr_6cyc_1ALU_6MDU_4LdSt means the total latency is 6 and there are
+//      1 micro-ops to be issued down one ALU pipe, six MDU pipes and four LdSt pipes.
+//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def TSV110Wr_1cyc_1AB    : SchedWriteRes<[TSV110UnitAB]>    { let Latency = 1; }
+def TSV110Wr_1cyc_1ALU   : SchedWriteRes<[TSV110UnitALU]>   { let Latency = 1; }
+def TSV110Wr_1cyc_1ALUAB : SchedWriteRes<[TSV110UnitALUAB]> { let Latency = 1; }
+def TSV110Wr_1cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 1; }
+
+def TSV110Wr_2cyc_1AB    : SchedWriteRes<[TSV110UnitAB]>    { let Latency = 2; }
+def TSV110Wr_2cyc_1ALU   : SchedWriteRes<[TSV110UnitALU]>   { let Latency = 2; }
+def TSV110Wr_2cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 2; }
+def TSV110Wr_2cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 2; }
+def TSV110Wr_2cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 2; }
+def TSV110Wr_2cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 2; }
+
+def TSV110Wr_3cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 3; }
+def TSV110Wr_3cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 3; }
+def TSV110Wr_3cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 3; }
+
+def TSV110Wr_4cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 4; }
+def TSV110Wr_4cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 4; }
+def TSV110Wr_4cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 4; }
+def TSV110Wr_4cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 4; }
+
+def TSV110Wr_5cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 5; }
+def TSV110Wr_5cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 5; }
+def TSV110Wr_5cyc_1FSU2  : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 5; }
+def TSV110Wr_5cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 5; }
+
+def TSV110Wr_6cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 6; }
+
+def TSV110Wr_7cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 7; }
+
+def TSV110Wr_8cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 8; }
+
+def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 11; }
+
+def TSV110Wr_12cyc_1MDU  : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 12; }
+
+def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 17; }
+
+def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 18; }
+
+def TSV110Wr_20cyc_1MDU  : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 20; }
+
+def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 24; }
+
+def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 31; }
+
+def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 36; }
+
+def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 38; }
+
+def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 64; }
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def TSV110Wr_1cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+                                                TSV110UnitALUAB]> {
+  let Latency = 1;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_1LdSt_1ALUAB :  SchedWriteRes<[TSV110UnitLdSt,
+                                                 TSV110UnitALUAB]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_2LdSt        : SchedWriteRes<[TSV110UnitLdSt,
+                                                TSV110UnitLdSt]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_2F           : SchedWriteRes<[TSV110UnitF,
+                                                TSV110UnitF]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1,
+                                                TSV110UnitFSU2]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_2F           : SchedWriteRes<[TSV110UnitF,
+                                                TSV110UnitF]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1,
+                                                TSV110UnitFSU2]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+                                                TSV110UnitALUAB]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_5cyc_1ALU_1F      : SchedWriteRes<[TSV110UnitALU,
+                                                TSV110UnitF]> {
+  let Latency     = 5;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_6cyc_2LdSt        : SchedWriteRes<[TSV110UnitLdSt,
+                                                TSV110UnitLdSt]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_6cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+                                                TSV110UnitALUAB]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_7cyc_1F_1LdSt     : SchedWriteRes<[TSV110UnitF,
+                                                TSV110UnitLdSt]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+}
+
+def TSV110Wr_8cyc_2FSU1        : SchedWriteRes<[TSV110UnitFSU1,
+                                                TSV110UnitFSU1]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+}
+
+
+def TSV110Wr_8cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1,
+                                                TSV110UnitFSU2]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def TSV110Wr_6cyc_3F       : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+                                            TSV110UnitF]> {
+  let Latency     = 6;
+  let NumMicroOps = 3;
+}
+
+def TSV110Wr_6cyc_3LdSt    : SchedWriteRes<[TSV110UnitLdSt, TSV110UnitLdSt,
+                                            TSV110UnitLdSt]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+}
+
+def TSV110Wr_7cyc_2F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+                                                         TSV110UnitLdSt]> {
+  let Latency = 7;
+  let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 4 micro-op types
+
+def TSV110Wr_8cyc_4F          : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+                                               TSV110UnitF, TSV110UnitF]> {
+  let Latency = 8;
+  let NumMicroOps = 4;
+}
+
+def TSV110Wr_8cyc_3F_1LdSt    : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+                                               TSV110UnitF, TSV110UnitLdSt]> {
+  let Latency = 8;
+  let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 5 micro-op types
+
+def TSV110Wr_8cyc_3F_2LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, TSV110UnitF,
+                                            TSV110UnitLdSt, TSV110UnitLdSt]> {
+  let Latency = 8;
+  let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 8 micro-op types
+
+def TSV110Wr_10cyc_4F_4LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+                                             TSV110UnitF, TSV110UnitF,
+                                             TSV110UnitLdSt, TSV110UnitLdSt,
+                                             TSV110UnitLdSt, TSV110UnitLdSt]> {
+  let Latency = 10;
+  let NumMicroOps = 8;
+}
+
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs B)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BL)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BLR)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ))$")>;
+
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AES[DE]")>;
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AESI?MC")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA1SU1")>;
+def : InstRW<[TSV110Wr_2cyc_2F],    (instregex "^SHA1(H|SU0)")>;
+def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA1[CMP]")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA256SU0")>;
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^SHA256SU1")>;
+def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA256(H|H2)")>;
+def TSV110ReadCRC: SchedReadAdvance<1, [TSV110Wr_2cyc_1MDU]>;
+def : InstRW<[TSV110Wr_2cyc_1MDU, TSV110ReadCRC],  (instregex "^CRC32.*$")>;
+
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(BIC|EON|ORN)[WX]rr")>;
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "(BIC)S[WX]rr")>;
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(ADD|AND|EOR|ORR|SUB)[WX]r(r|i)")>;
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "(ADD|AND|EOR|ORR|SUB)S[WX]r(r|i)")>;
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(ADC|SBC|BIC)[WX]r$")>;
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "^(ADC|SBC)S[WX]r$")>;
+
+def : InstRW<[TSV110Wr_2cyc_1MDU],   (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+def : InstRW<[TSV110Wr_2cyc_1AB],    (instregex "^(AND|BIC|EON|EOR|ORN|ORR)S[WX]rs$")>;
+def : InstRW<[TSV110Wr_2cyc_1MDU],   (instregex "^(ADD|SUB)[WX]r(s|x|x64)$")>;
+def : InstRW<[TSV110Wr_2cyc_1AB],    (instregex "^(ADD|SUB)S[WX]r(s|x|x64)$")>;
+
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
+
+
+// Move and Shift Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instrs ADR, ADRP)>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^MOV[NZK][WX]i")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(LSLV|LSRV|ASRV|RORV)(W|X)r")>;
+
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_12cyc_1MDU],  (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[TSV110Wr_20cyc_1MDU],  (instregex "^(S|U)DIVXr$")>;
+
+def TSV110ReadMAW : SchedReadAdvance<2, [TSV110Wr_3cyc_1MDU]>;
+def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instrs MADDWrrr, MSUBWrrr)>;
+def TSV110ReadMAQ : SchedReadAdvance<3, [TSV110Wr_4cyc_1MDU]>;
+def : InstRW<[TSV110Wr_4cyc_1MDU, TSV110ReadMAQ], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+def : InstRW<[TSV110Wr_4cyc_1MDU], (instregex "^(S|U)MULHrr$")>;
+
+
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^EXTR(W|X)rri$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^(S|U)?BFM(W|X)ri$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^(CLS|CLZ|RBIT|REV(16|32)?)(W|X)r$")>;
+
+
+// Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDR(W|X)l$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs LDRSWl)>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDR(BB|HH|W|X)ui$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr],     (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr],     (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDTR(B|H|W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDUR(BB|HH|W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],     (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],     (instregex "^LDP(W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],           (instrs LDPSWi)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs PRFMl)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs PRFUMi)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^PRFMui$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^PRFMro(W|X)$")>;
+
+
+// Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STN?P(W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STP(W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STUR(BB|HH|W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STR(BB|HH|W|X)ui$")>;
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+
+
+// FP Data Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "F(ABS|NEG)(D|S)r")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCCMP(E)?(S|D)rr$")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCSEL(S|D)rrr$")>;
+
+def : InstRW<[TSV110Wr_11cyc_1FSU1], (instrs FDIVSrr)>;
+def : InstRW<[TSV110Wr_18cyc_1FSU1], (instrs FDIVDrr)>;
+def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTSr)>;
+def : InstRW<[TSV110Wr_31cyc_1FSU2], (instrs FSQRTDr)>;
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN).+rr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^FN?M(ADD|SUB)Hrrr")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FN?M(ADD|SUB)Srrr")>;
+def : InstRW<[TSV110Wr_7cyc_1F], (instregex "^FN?M(ADD|SUB)Drrr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Hrr")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|SUB)Srr")>;
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Drr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(N)?MULHrr$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULSrr$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULDrr$")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT.+r")>;
+
+
+// FP Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_5cyc_1ALU_1F], (instregex "^[SU]CVTF[SU][WX][SD]ri")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1],   (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
+def : InstRW<[TSV110Wr_3cyc_1F],      (instregex "^FCVT[HSD][HSD]r")>;
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1],   (instregex "^FMOV(DX|WS|XD|SW|DXHigh|XDHigh)r$")>;
+def : InstRW<[TSV110Wr_2cyc_1F],      (instregex "^FMOV[SD][ir]$")>;
+
+
+// FP Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDR[DSQ]l")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDUR[BDHSQ]i")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],            (instregex "^LDR[BDHSQ](post|pre)")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDR[BDHSQ]ui")>;
+def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase],  (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi],           (instregex "^LDN?P[DQS]i")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>;
+
+
+// FP Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt],                     (instregex "^STUR[BHSDQ]i")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](post|pre)")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt],                     (instregex "^STR[BHSDQ]ui")>;
+def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>;
+def : InstRW<[TSV110Wr_2cyc_2LdSt],                     (instregex "^STN?P[SDQ]i")>;
+def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr],           (instregex "^STP[SDQ](post|pre)")>;
+
+
+// ASIMD Integer Instructions
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+//   D form - v8i8, v4i16, v2i32
+//   Q form - v16i8, v8i16, v4i32
+//   D form - v1i8, v1i16, v1i32, v1i64
+//   Q form - v16i8, v8i16, v4i32, v2i64
+//   D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
+//   Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
+
+// ASIMD simple arithmetic
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(ABS|ADD(P)?|NEG|SUB)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](ADD(L|LP|W)|SUB(L|W))v")>;
+
+// ASIMD complex arithmetic
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]H(ADD|SUB)v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^R?(ADD|SUB)HN2?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]Q(ADD|SUB)v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^(SU|US)QADDv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]RHADDv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABAL?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABDL?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ADALPv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^((SQ)(ABS|NEG))v")>;
+
+// ASIMD compare
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT|TST)v")>;
+
+// ASIMD max/min
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)P?v")>;
+
+// ASIMD logical
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(AND|BIC|BIF|BIT|BSL|EOR|MVN|NOT|ORN|ORR)v")>;
+
+// ASIMD multiply accumulate, D-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[TSV110Wr_8cyc_2FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v16i8|v8i16|v4i32)")>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD shift
+// ASIMD shift accumulate
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(S|SR|U|UR)SRA")>;
+// ASIMD shift by immed, basic
+def : InstRW<[TSV110Wr_4cyc_1FSU1],
+            (instregex "SHLv","SLIv","SRIv","SHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+// ASIMD shift by immed, complex
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD reduction
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?Vv16i8v$")>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+
+
+// Vector - Floating Point
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+//   D form - v2f32
+//   Q form - v4f32, v2f64
+//   D form - 32, 64
+//   D form - v1i32, v1i64
+//   D form - v2i32
+//   Q form - v4i32, v2i64
+
+// ASIMD FP sign manipulation
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FABSv")>;
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FNEGv")>;
+
+// ASIMD FP compare
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v")>;
+
+// ASIMD FP convert
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FCVT[AMNPZ][SU]v")>;
+def : InstRW<[TSV110Wr_3cyc_1F],  (instregex "^FCVT(L)v")>;
+def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^FCVT(N|XN)v")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[TSV110Wr_11cyc_1FSU1], (instregex "FDIVv2f32")>;
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[TSV110Wr_24cyc_1FSU1], (instregex "FDIVv4f32")>;
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[TSV110Wr_38cyc_1FSU1], (instregex "FDIVv2f64")>;
+
+// ASIMD FP SQRT
+def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTv2f32)>;
+def : InstRW<[TSV110Wr_36cyc_1FSU2], (instrs FSQRTv4f32)>;
+def : InstRW<[TSV110Wr_64cyc_1FSU2], (instrs FSQRTv2f64)>;
+
+// ASIMD FP max,min
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(MAX|MIN)(NM)?v")>;
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[TSV110Wr_4cyc_1F],  (instregex "^F(MAX|MIN)(NM)?Vv")>;
+
+// ASIMD FP add
+def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^F(ADD|ADDP|SUB)v")>;
+
+// ASIMD FP multiply
+def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^FMULX?v")>;
+
+
+// ASIMD Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(CLS|CLZ|CNT)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(DUP|INS)v.+lane")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^REV(16|32|64)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(UZP|ZIP)[12]v")>;
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^EXTv")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^XTNv")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^RBITv")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^(INS|DUP)v.+gpr")>;
+
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^[SU]MOVv")>;
+
+// ASIMD table lookup, D-form
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v8i8Four")>;
+// ASIMD table lookup, Q-form
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v16i8Four")>;
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOVv")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT[AIMNPXZ]v")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[SU]CVTFv")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>;
+
+
+// ASIMD Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt],            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr],  (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt],            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr],  (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt],            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr],  (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt],            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr],  (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def  : InstRW<[TSV110Wr_7cyc_1F_1LdSt],           (instregex "LD1i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_7cyc_2F_1LdSt],           (instregex "LD2i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_8cyc_3F_1LdSt],           (instregex "LD3i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_8cyc_3F_2LdSt],           (instregex "LD4i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+
+def : InstRW<[TSV110Wr_5cyc_1LdSt],               (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],     (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt],               (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],     (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_3LdSt],               (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr],     (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_2LdSt],               (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr],     (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt],            (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr],  (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt],            (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr],  (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_10cyc_4F_4LdSt],           (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+
+// ASIMD Store Instructions
+// -----------------------------------------------------------------------------
+
+def  : InstRW<[TSV110Wr_3cyc_1F],             (instregex "ST1i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_3cyc_1F, WriteAdr],   (instregex "ST1i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_4cyc_1F],             (instregex "ST2i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],   (instregex "ST2i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_5cyc_1F],             (instregex "ST3i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],   (instregex "ST3i(8|16|32|64)_POST$")>;
+def  : InstRW<[TSV110Wr_6cyc_1F],             (instregex "ST4i(8|16|32|64)$")>;
+def  : InstRW<[TSV110Wr_6cyc_1F, WriteAdr],   (instregex "ST4i(8|16|32|64)_POST$")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F],              (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr],    (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_4cyc_1F],              (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],    (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_5cyc_1F],              (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],    (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_1F],              (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr],    (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F],              (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],    (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_5cyc_1F],              (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],    (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_8cyc_1F],              (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr],    (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+} // SchedModel = TSV110Model
--- a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td
@ -1,9 +1,8 @@
 //==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -26,8 +25,9 @@ def ThunderXT8XModel : SchedMachineModel {
  let PostRAScheduler = 1;    // Use PostRA scheduler.
  let CompleteModel = 1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
  // FIXME: Remove when all errors have been fixed.
  let FullInstRWOverlapCheck = 0;
 }
@ -154,7 +154,8 @@ def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
-def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVd, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; }

 // FP Mul, Div, Sqrt
 def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
@ -192,6 +193,7 @@ def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
 def : ReadAdvance<ReadExtrHi, 1>;
 def : ReadAdvance<ReadAdrBase, 2>;
 def : ReadAdvance<ReadVLD, 2>;
+def : ReadAdvance<ReadST, 2>;

 // FIXME: This needs more targeted benchmarking.
 // ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
--- a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td
@ -1,9 +1,8 @@
 //=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -26,8 +25,9 @@ def ThunderX2T99Model : SchedMachineModel {
  let PostRAScheduler       =   1; // Using PostRA sched.
  let CompleteModel         =   1;

-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+                                                    PAUnsupported.F,
+                                                    SMEUnsupported.F);
  // FIXME: Remove when all errors have been fixed.
  let FullInstRWOverlapCheck = 0;
 }
@ -362,6 +362,7 @@ def : ReadAdvance<ReadID,      0>;
 def : ReadAdvance<ReadExtrHi,  0>;
 def : ReadAdvance<ReadAdrBase, 0>;
 def : ReadAdvance<ReadVLD,     0>;
+def : ReadAdvance<ReadST,      0>;

 //===----------------------------------------------------------------------===//
 // 3. Instruction Tables.
@ -1249,7 +1250,12 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
 // ASIMD shift by register, basic, Q-form
 // ASIMD shift by register, complex, D-form
 // ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX2T99F01]> {
+def : WriteRes<WriteVd, [THX2T99F01]> {
+  let Latency = 7;
+  let NumMicroOps = 4;
+  let ResourceCycles = [4];
+}
+def : WriteRes<WriteVq, [THX2T99F01]> {
  let Latency = 7;
  let NumMicroOps = 4;
  let ResourceCycles = [4];
@ -1483,7 +1489,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
 // ASIMD bitwise insert, D-form
 // ASIMD bitwise insert, Q-form
 def : InstRW<[THX2T99Write_5Cyc_F01],
-            (instregex "^BIFv", "^BITv", "^BSLv")>;
+            (instregex "^BIFv", "^BITv", "^BSLv", "^BSPv")>;

 // ASIMD count, D-form
 // ASIMD count, Q-form
@ -1493,7 +1499,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01],
 // ASIMD duplicate, gen reg
 // ASIMD duplicate, element
 def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUP(i8|i16|i32|i64)$")>;
 def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>;

 // ASIMD extract
@ -1518,25 +1524,6 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv")>;
 // ASIMD move, FP immed
 def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;

-// ASIMD table lookup, D-form
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>;
-
-// ASIMD table lookup, Q-form
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>;
-
-// ASIMD transpose
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>;
-
-// ASIMD unzip/zip
-def : InstRW<[THX2T99Write_5Cyc_F01],
-            (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
-
 // ASIMD reciprocal estimate, D-form
 // ASIMD reciprocal estimate, Q-form
 def : InstRW<[THX2T99Write_5Cyc_F01],
--- a/suite/synctools/tablegen/AArch64/AArch64SchedThunderX3T110.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SchedThunderX3T110.td
--- a/suite/synctools/tablegen/AArch64/AArch64Schedule.td
+++ b/suite/synctools/tablegen/AArch64/AArch64Schedule.td
@ -1,9 +1,8 @@
 //==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

@ -48,19 +47,9 @@ def WriteAdr       : SchedWrite; // Address pre/post increment.

 def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
 def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
+def ReadST     : SchedRead;  // Read the stored value.
 def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.

-// Predicate for determining when a shiftable register is shifted.
-def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(*MI)}]>;
-
-// Predicate for determining when a extendedable register is extended.
-def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(*MI)}]>;
-
-// ScaledIdxPred is true if a WriteLDIdx operand will be
-// scaled. Subtargets can use this to dynamically select resources and
-// latency for WriteLDIdx and ReadAdrBase.
-def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(*MI)}]>;
-
 // Serialized two-level address load.
 // EXAMPLE: LOADGot
 def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
@ -88,7 +77,8 @@ def WriteFImm    : SchedWrite; // Floating-point immediate.
 def WriteFMul    : SchedWrite; // Floating-point multiply.
 def WriteFDiv    : SchedWrite; // Floating-point division.

-def WriteV   : SchedWrite; // Vector ops.
+def WriteVd  : SchedWrite; // 64bit Vector D ops.
+def WriteVq  : SchedWrite; // 128bit Vector Q ops.
 def WriteVLD : SchedWrite; // Vector loads.
 def WriteVST : SchedWrite; // Vector stores.

@ -98,9 +88,9 @@ def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP)
 def ReadVLD : SchedRead;

 // Sequential vector load and shuffle.
-def WriteVLDShuffle     : WriteSequence<[WriteVLD, WriteV]>;
-def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
+def WriteVLDShuffle     : WriteSequence<[WriteVLD, WriteVq]>;
+def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteVq, WriteVq]>;

 // Store a shuffled vector.
-def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
-def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
+def WriteVSTShuffle : WriteSequence<[WriteVq, WriteVST]>;
+def WriteVSTPairShuffle : WriteSequence<[WriteVq, WriteVq, WriteVST]>;
--- a/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td
+++ b/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td
@ -1,9 +1,8 @@
 //===- AArch64SystemOperands.td ----------------------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@ -14,6 +13,30 @@

 include "llvm/TableGen/SearchableTable.td"

+//===----------------------------------------------------------------------===//
+// Features that, for the compiler, only enable system operands and PStates
+//===----------------------------------------------------------------------===//
+
+def HasCCPP    : Predicate<"Subtarget->hasCCPP()">,
+                 AssemblerPredicate<(all_of FeatureCCPP), "ccpp">;
+
+def HasPAN     : Predicate<"Subtarget->hasPAN()">,
+                 AssemblerPredicate<(all_of FeaturePAN),
+                 "ARM v8.1  Privileged Access-Never extension">;
+
+def HasPsUAO   : Predicate<"Subtarget->hasPsUAO()">,
+                 AssemblerPredicate<(all_of FeaturePsUAO),
+                 "ARM v8.2 UAO PState extension (psuao)">;
+
+def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
+                 AssemblerPredicate<(all_of FeaturePAN_RWV),
+                 "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
+
+def HasCONTEXTIDREL2
+               : Predicate<"Subtarget->hasCONTEXTIDREL2()">,
+                 AssemblerPredicate<(all_of FeatureCONTEXTIDREL2),
+                 "Target contains CONTEXTIDR_EL2 RW operand">;
+
 //===----------------------------------------------------------------------===//
 // AT (address translate) instruction options.
 //===----------------------------------------------------------------------===//
@ -45,7 +68,7 @@ def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>;
 def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
 def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;

-let Requires = [{ {AArch64::HasV8_2aOps} }] in {
+let Requires = [{ {AArch64::FeaturePAN_RWV} }] in {
 def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
 def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
 }
@ -75,6 +98,21 @@ def : DB<"ld",    0xd>;
 def : DB<"st",    0xe>;
 def : DB<"sy",    0xf>;

+class DBnXS<string name, bits<4> encoding, bits<5> immValue> : SearchableTable {
+  let SearchableFields = ["Name", "Encoding", "ImmValue"];
+  let EnumValueField = "Encoding";
+
+  string Name = name;
+  bits<4> Encoding = encoding;
+  bits<5> ImmValue = immValue;
+  code Requires = [{ {AArch64::FeatureXS} }];
+}
+
+def : DBnXS<"oshnxs", 0x3, 0x10>;
+def : DBnXS<"nshnxs", 0x7, 0x14>;
+def : DBnXS<"ishnxs", 0xb, 0x18>;
+def : DBnXS<"synxs",  0xf, 0x1c>;
+
 //===----------------------------------------------------------------------===//
 // DC (data cache maintenance) instruction options.
 //===----------------------------------------------------------------------===//
@ -102,9 +140,33 @@ def : DC<"CVAU",  0b011, 0b0111, 0b1011, 0b001>;
 def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
 def : DC<"CISW",  0b000, 0b0111, 0b1110, 0b010>;

-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeatureCCPP} }] in
 def : DC<"CVAP",  0b011, 0b0111, 0b1100, 0b001>;

+let Requires = [{ {AArch64::FeatureCacheDeepPersist} }] in
+def : DC<"CVADP",  0b011, 0b0111, 0b1101, 0b001>;
+
+let Requires = [{ {AArch64::FeatureMTE} }] in {
+def : DC<"IGVAC",   0b000, 0b0111, 0b0110, 0b011>;
+def : DC<"IGSW",    0b000, 0b0111, 0b0110, 0b100>;
+def : DC<"CGSW",    0b000, 0b0111, 0b1010, 0b100>;
+def : DC<"CIGSW",   0b000, 0b0111, 0b1110, 0b100>;
+def : DC<"CGVAC",   0b011, 0b0111, 0b1010, 0b011>;
+def : DC<"CGVAP",   0b011, 0b0111, 0b1100, 0b011>;
+def : DC<"CGVADP",  0b011, 0b0111, 0b1101, 0b011>;
+def : DC<"CIGVAC",  0b011, 0b0111, 0b1110, 0b011>;
+def : DC<"GVA",     0b011, 0b0111, 0b0100, 0b011>;
+def : DC<"IGDVAC",  0b000, 0b0111, 0b0110, 0b101>;
+def : DC<"IGDSW",   0b000, 0b0111, 0b0110, 0b110>;
+def : DC<"CGDSW",   0b000, 0b0111, 0b1010, 0b110>;
+def : DC<"CIGDSW",  0b000, 0b0111, 0b1110, 0b110>;
+def : DC<"CGDVAC",  0b011, 0b0111, 0b1010, 0b101>;
+def : DC<"CGDVAP",  0b011, 0b0111, 0b1100, 0b101>;
+def : DC<"CGDVADP", 0b011, 0b0111, 0b1101, 0b101>;
+def : DC<"CIGDVAC", 0b011, 0b0111, 0b1110, 0b101>;
+def : DC<"GZVA",    0b011, 0b0111, 0b0100, 0b100>;
+}
+
 //===----------------------------------------------------------------------===//
 // IC (instruction cache maintenance) instruction options.
 //===----------------------------------------------------------------------===//
@ -154,7 +216,7 @@ class TSB<string name, bits<4> encoding> : SearchableTable{
  bits<4> Encoding;
  let Encoding = encoding;

-  code Requires = [{ {AArch64::HasV8_4aOps} }];
+  code Requires = [{ {AArch64::FeatureTRACEV8_4} }];
 }

 def : TSB<"csync", 0>;
@ -290,14 +352,41 @@ def : PState<"SPSel",   0b00101>;
 def : PState<"DAIFSet", 0b11110>;
 def : PState<"DAIFClr", 0b11111>;
 // v8.1a "Privileged Access Never" extension-specific PStates
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeaturePAN} }] in
 def : PState<"PAN",     0b00100>;
+
 // v8.2a "User Access Override" extension-specific PStates
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeaturePsUAO} }] in
 def : PState<"UAO",     0b00011>;
-// v8.4a timining insensitivity of data processing instructions
-let Requires = [{ {AArch64::HasV8_4aOps} }] in
+// v8.4a timing insensitivity of data processing instructions
+let Requires = [{ {AArch64::FeatureDIT} }] in
 def : PState<"DIT",     0b11010>;
+// v8.5a Spectre Mitigation
+let Requires = [{ {AArch64::FeatureSSBS} }] in
+def : PState<"SSBS",    0b11001>;
+// v8.5a Memory Tagging Extension
+let Requires = [{ {AArch64::FeatureMTE} }] in
+def : PState<"TCO",     0b11100>;
+
+//===----------------------------------------------------------------------===//
+// SVCR instruction options.
+//===----------------------------------------------------------------------===//
+
+class SVCR<string name, bits<3> encoding> : SearchableTable {
+  let SearchableFields = ["Name", "Encoding"];
+  let EnumValueField = "Encoding";
+
+  string Name = name;
+  bits<3> Encoding;
+  let Encoding = encoding;
+  code Requires = [{ {} }];
+}
+
+let Requires = [{ {AArch64::FeatureSME} }] in {
+def : SVCR<"SVCRSM",   0b001>;
+def : SVCR<"SVCRZA",   0b010>;
+def : SVCR<"SVCRSMZA", 0b011>;
+}

 //===----------------------------------------------------------------------===//
 // PSB instruction options.
@ -315,14 +404,28 @@ class PSB<string name, bits<5> encoding> : SearchableTable {
 def : PSB<"csync", 0x11>;

 //===----------------------------------------------------------------------===//
-// TLBI (translation lookaside buffer invalidate) instruction options.
+// BTI instruction options.
 //===----------------------------------------------------------------------===//

-class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
-             bits<3> op2, bit needsreg = 1> : SearchableTable {
+class BTI<string name, bits<3> encoding> : SearchableTable {
  let SearchableFields = ["Name", "Encoding"];
  let EnumValueField = "Encoding";

+  string Name = name;
+  bits<3> Encoding;
+  let Encoding = encoding;
+}
+
+def : BTI<"c",  0b010>;
+def : BTI<"j",  0b100>;
+def : BTI<"jc", 0b110>;
+
+//===----------------------------------------------------------------------===//
+// TLBI (translation lookaside buffer invalidate) instruction options.
+//===----------------------------------------------------------------------===//
+
+class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+             bits<3> op2, bit needsreg> {
  string Name = name;
  bits<14> Encoding;
  let Encoding{13-11} = op1;
@ -330,94 +433,147 @@ class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
  let Encoding{6-3} = crm;
  let Encoding{2-0} = op2;
  bit NeedsReg = needsreg;
-  code Requires = [{ {} }];
+  list<string> Requires = [];
+  list<string> ExtraRequires = [];
+  code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }];
 }

-def : TLBI<"IPAS2E1IS",    0b100, 0b1000, 0b0000, 0b001>;
-def : TLBI<"IPAS2LE1IS",   0b100, 0b1000, 0b0000, 0b101>;
-def : TLBI<"VMALLE1IS",    0b000, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE2IS",      0b100, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE3IS",      0b110, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"VAE1IS",       0b000, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE2IS",       0b100, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE3IS",       0b110, 0b1000, 0b0011, 0b001>;
-def : TLBI<"ASIDE1IS",     0b000, 0b1000, 0b0011, 0b010>;
-def : TLBI<"VAAE1IS",      0b000, 0b1000, 0b0011, 0b011>;
-def : TLBI<"ALLE1IS",      0b100, 0b1000, 0b0011, 0b100, 0>;
-def : TLBI<"VALE1IS",      0b000, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE2IS",      0b100, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE3IS",      0b110, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
-def : TLBI<"VAALE1IS",     0b000, 0b1000, 0b0011, 0b111>;
-def : TLBI<"IPAS2E1",      0b100, 0b1000, 0b0100, 0b001>;
-def : TLBI<"IPAS2LE1",     0b100, 0b1000, 0b0100, 0b101>;
-def : TLBI<"VMALLE1",      0b000, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE2",        0b100, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE3",        0b110, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"VAE1",         0b000, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE2",         0b100, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE3",         0b110, 0b1000, 0b0111, 0b001>;
-def : TLBI<"ASIDE1",       0b000, 0b1000, 0b0111, 0b010>;
-def : TLBI<"VAAE1",        0b000, 0b1000, 0b0111, 0b011>;
-def : TLBI<"ALLE1",        0b100, 0b1000, 0b0111, 0b100, 0>;
-def : TLBI<"VALE1",        0b000, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE2",        0b100, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE3",        0b110, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VMALLS12E1",   0b100, 0b1000, 0b0111, 0b110, 0>;
-def : TLBI<"VAALE1",       0b000, 0b1000, 0b0111, 0b111>;
+def TLBITable : GenericTable {
+  let FilterClass = "TLBIEntry";
+  let CppTypeName = "TLBI";
+  let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+}

+def lookupTLBIByName : SearchIndex {
+  let Table = TLBITable;
+  let Key = ["Name"];
+}
+
+def lookupTLBIByEncoding : SearchIndex {
+  let Table = TLBITable;
+  let Key = ["Encoding"];
+}
+
+multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+             bits<3> op2, bit needsreg = 1> {
+  def : TLBIEntry<name, op1, crn, crm, op2, needsreg>;
+  def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
+    let Encoding{7} = 1;
+    let ExtraRequires = ["AArch64::FeatureXS"];
+  }
+}
+
+defm : TLBI<"IPAS2E1IS",    0b100, 0b1000, 0b0000, 0b001>;
+defm : TLBI<"IPAS2LE1IS",   0b100, 0b1000, 0b0000, 0b101>;
+defm : TLBI<"VMALLE1IS",    0b000, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"ALLE2IS",      0b100, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"ALLE3IS",      0b110, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"VAE1IS",       0b000, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"VAE2IS",       0b100, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"VAE3IS",       0b110, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"ASIDE1IS",     0b000, 0b1000, 0b0011, 0b010>;
+defm : TLBI<"VAAE1IS",      0b000, 0b1000, 0b0011, 0b011>;
+defm : TLBI<"ALLE1IS",      0b100, 0b1000, 0b0011, 0b100, 0>;
+defm : TLBI<"VALE1IS",      0b000, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VALE2IS",      0b100, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VALE3IS",      0b110, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
+defm : TLBI<"VAALE1IS",     0b000, 0b1000, 0b0011, 0b111>;
+defm : TLBI<"IPAS2E1",      0b100, 0b1000, 0b0100, 0b001>;
+defm : TLBI<"IPAS2LE1",     0b100, 0b1000, 0b0100, 0b101>;
+defm : TLBI<"VMALLE1",      0b000, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"ALLE2",        0b100, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"ALLE3",        0b110, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"VAE1",         0b000, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"VAE2",         0b100, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"VAE3",         0b110, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"ASIDE1",       0b000, 0b1000, 0b0111, 0b010>;
+defm : TLBI<"VAAE1",        0b000, 0b1000, 0b0111, 0b011>;
+defm : TLBI<"ALLE1",        0b100, 0b1000, 0b0111, 0b100, 0>;
+defm : TLBI<"VALE1",        0b000, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VALE2",        0b100, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VALE3",        0b110, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VMALLS12E1",   0b100, 0b1000, 0b0111, 0b110, 0>;
+defm : TLBI<"VAALE1",       0b000, 0b1000, 0b0111, 0b111>;
+
+// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
+let Requires = ["AArch64::FeatureTLB_RMI"] in {
 // Armv8.4-A Outer Sharable TLB Maintenance instructions:
-let Requires = [{ {AArch64::HasV8_4aOps} }] in {
 //                         op1    CRn     CRm     op2
-def : TLBI<"VMALLE1OS",    0b000, 0b1000, 0b0001, 0b000, 0>;
-def : TLBI<"VAE1OS",       0b000, 0b1000, 0b0001, 0b001>;
-def : TLBI<"ASIDE1OS",     0b000, 0b1000, 0b0001, 0b010>;
-def : TLBI<"VAAE1OS",      0b000, 0b1000, 0b0001, 0b011>;
-def : TLBI<"VALE1OS",      0b000, 0b1000, 0b0001, 0b101>;
-def : TLBI<"VAALE1OS",     0b000, 0b1000, 0b0001, 0b111>;
-def : TLBI<"IPAS2E1OS",    0b100, 0b1000, 0b0100, 0b000>;
-def : TLBI<"IPAS2LE1OS",   0b100, 0b1000, 0b0100, 0b100>;
-def : TLBI<"VAE2OS",       0b100, 0b1000, 0b0001, 0b001>;
-def : TLBI<"VALE2OS",      0b100, 0b1000, 0b0001, 0b101>;
-def : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
-def : TLBI<"VAE3OS",       0b110, 0b1000, 0b0001, 0b001>;
-def : TLBI<"VALE3OS",      0b110, 0b1000, 0b0001, 0b101>;
-def : TLBI<"ALLE2OS",      0b100, 0b1000, 0b0001, 0b000, 0>;
-def : TLBI<"ALLE1OS",      0b100, 0b1000, 0b0001, 0b100, 0>;
-def : TLBI<"ALLE3OS",      0b110, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"VMALLE1OS",    0b000, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"VAE1OS",       0b000, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"ASIDE1OS",     0b000, 0b1000, 0b0001, 0b010>;
+defm : TLBI<"VAAE1OS",      0b000, 0b1000, 0b0001, 0b011>;
+defm : TLBI<"VALE1OS",      0b000, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"VAALE1OS",     0b000, 0b1000, 0b0001, 0b111>;
+defm : TLBI<"IPAS2E1OS",    0b100, 0b1000, 0b0100, 0b000>;
+defm : TLBI<"IPAS2LE1OS",   0b100, 0b1000, 0b0100, 0b100>;
+defm : TLBI<"VAE2OS",       0b100, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"VALE2OS",      0b100, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
+defm : TLBI<"VAE3OS",       0b110, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"VALE3OS",      0b110, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"ALLE2OS",      0b100, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"ALLE1OS",      0b100, 0b1000, 0b0001, 0b100, 0>;
+defm : TLBI<"ALLE3OS",      0b110, 0b1000, 0b0001, 0b000, 0>;

 // Armv8.4-A TLB Range Maintenance instructions:
 //                         op1    CRn     CRm     op2
-def : TLBI<"RVAE1",        0b000, 0b1000, 0b0110, 0b001>;
-def : TLBI<"RVAAE1",       0b000, 0b1000, 0b0110, 0b011>;
-def : TLBI<"RVALE1",       0b000, 0b1000, 0b0110, 0b101>;
-def : TLBI<"RVAALE1",      0b000, 0b1000, 0b0110, 0b111>;
-def : TLBI<"RVAE1IS",      0b000, 0b1000, 0b0010, 0b001>;
-def : TLBI<"RVAAE1IS",     0b000, 0b1000, 0b0010, 0b011>;
-def : TLBI<"RVALE1IS",     0b000, 0b1000, 0b0010, 0b101>;
-def : TLBI<"RVAALE1IS",    0b000, 0b1000, 0b0010, 0b111>;
-def : TLBI<"RVAE1OS",      0b000, 0b1000, 0b0101, 0b001>;
-def : TLBI<"RVAAE1OS",     0b000, 0b1000, 0b0101, 0b011>;
-def : TLBI<"RVALE1OS",     0b000, 0b1000, 0b0101, 0b101>;
-def : TLBI<"RVAALE1OS",    0b000, 0b1000, 0b0101, 0b111>;
-def : TLBI<"RIPAS2E1IS",   0b100, 0b1000, 0b0000, 0b010>;
-def : TLBI<"RIPAS2LE1IS",  0b100, 0b1000, 0b0000, 0b110>;
-def : TLBI<"RIPAS2E1",     0b100, 0b1000, 0b0100, 0b010>;
-def : TLBI<"RIPAS2LE1",    0b100, 0b1000, 0b0100, 0b110>;
-def : TLBI<"RIPAS2E1OS",   0b100, 0b1000, 0b0100, 0b011>;
-def : TLBI<"RIPAS2LE1OS",  0b100, 0b1000, 0b0100, 0b111>;
-def : TLBI<"RVAE2",        0b100, 0b1000, 0b0110, 0b001>;
-def : TLBI<"RVALE2",       0b100, 0b1000, 0b0110, 0b101>;
-def : TLBI<"RVAE2IS",      0b100, 0b1000, 0b0010, 0b001>;
-def : TLBI<"RVALE2IS",     0b100, 0b1000, 0b0010, 0b101>;
-def : TLBI<"RVAE2OS",      0b100, 0b1000, 0b0101, 0b001>;
-def : TLBI<"RVALE2OS",     0b100, 0b1000, 0b0101, 0b101>;
-def : TLBI<"RVAE3",        0b110, 0b1000, 0b0110, 0b001>;
-def : TLBI<"RVALE3",       0b110, 0b1000, 0b0110, 0b101>;
-def : TLBI<"RVAE3IS",      0b110, 0b1000, 0b0010, 0b001>;
-def : TLBI<"RVALE3IS",     0b110, 0b1000, 0b0010, 0b101>;
-def : TLBI<"RVAE3OS",      0b110, 0b1000, 0b0101, 0b001>;
-def : TLBI<"RVALE3OS",     0b110, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAE1",        0b000, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVAAE1",       0b000, 0b1000, 0b0110, 0b011>;
+defm : TLBI<"RVALE1",       0b000, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAALE1",      0b000, 0b1000, 0b0110, 0b111>;
+defm : TLBI<"RVAE1IS",      0b000, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVAAE1IS",     0b000, 0b1000, 0b0010, 0b011>;
+defm : TLBI<"RVALE1IS",     0b000, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAALE1IS",    0b000, 0b1000, 0b0010, 0b111>;
+defm : TLBI<"RVAE1OS",      0b000, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVAAE1OS",     0b000, 0b1000, 0b0101, 0b011>;
+defm : TLBI<"RVALE1OS",     0b000, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAALE1OS",    0b000, 0b1000, 0b0101, 0b111>;
+defm : TLBI<"RIPAS2E1IS",   0b100, 0b1000, 0b0000, 0b010>;
+defm : TLBI<"RIPAS2LE1IS",  0b100, 0b1000, 0b0000, 0b110>;
+defm : TLBI<"RIPAS2E1",     0b100, 0b1000, 0b0100, 0b010>;
+defm : TLBI<"RIPAS2LE1",    0b100, 0b1000, 0b0100, 0b110>;
+defm : TLBI<"RIPAS2E1OS",   0b100, 0b1000, 0b0100, 0b011>;
+defm : TLBI<"RIPAS2LE1OS",  0b100, 0b1000, 0b0100, 0b111>;
+defm : TLBI<"RVAE2",        0b100, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVALE2",       0b100, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAE2IS",      0b100, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVALE2IS",     0b100, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAE2OS",      0b100, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVALE2OS",     0b100, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAE3",        0b110, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVALE3",       0b110, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAE3IS",      0b110, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVALE3IS",     0b110, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAE3OS",      0b110, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVALE3OS",     0b110, 0b1000, 0b0101, 0b101>;
+} //FeatureTLB_RMI
+
+// Armv9-A Realm Management Extention TLBI Instructions
+let Requires = ["AArch64::FeatureRME"] in {
+defm : TLBI<"RPAOS",        0b110, 0b1000, 0b0100, 0b011>;
+defm : TLBI<"RPALOS",       0b110, 0b1000, 0b0100, 0b111>;
+defm : TLBI<"PAALLOS",      0b110, 0b1000, 0b0001, 0b100, 0>;
+defm : TLBI<"PAALL",        0b110, 0b1000, 0b0111, 0b100, 0>;
+}
+
+// Armv8.5-A Prediction Restriction by Context instruction options:
+class PRCTX<string name, bits<4> crm> : SearchableTable {
+  let SearchableFields = ["Name", "Encoding"];
+  let EnumValueField = "Encoding";
+
+  string Name = name;
+  bits<11> Encoding;
+  let Encoding{10-4} = 0b0110111;
+  let Encoding{3-0} = crm;
+  bit NeedsReg = 1;
+  code Requires = [{ {} }];
+}
+
+let Requires = [{ {AArch64::FeaturePredRes} }] in {
+def : PRCTX<"RCTX", 0b0011>;
 }

 //===----------------------------------------------------------------------===//
@ -430,6 +586,7 @@ class SysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
  let EnumValueField = "Encoding";

  string Name = name;
+  string AltName = name;
  bits<16> Encoding;
  let Encoding{15-14} = op0;
  let Encoding{13-11} = op1;
@ -476,8 +633,10 @@ def : ROSysReg<"PMCEID0_EL0",        0b11, 0b011, 0b1001, 0b1100, 0b110>;
 def : ROSysReg<"PMCEID1_EL0",        0b11, 0b011, 0b1001, 0b1100, 0b111>;
 def : ROSysReg<"MIDR_EL1",           0b11, 0b000, 0b0000, 0b0000, 0b000>;
 def : ROSysReg<"CCSIDR_EL1",         0b11, 0b001, 0b0000, 0b0000, 0b000>;
+
+//v8.3 CCIDX - extending the CCsIDr number of sets
 def : ROSysReg<"CCSIDR2_EL1",        0b11, 0b001, 0b0000, 0b0000, 0b010> {
-  let Requires = [{ {AArch64::HasV8_3aOps} }];
+  let Requires = [{ {AArch64::FeatureCCIDX} }];
 }
 def : ROSysReg<"CLIDR_EL1",          0b11, 0b001, 0b0000, 0b0000, 0b001>;
 def : ROSysReg<"CTR_EL0",            0b11, 0b011, 0b0000, 0b0000, 0b001>;
@ -487,6 +646,9 @@ def : ROSysReg<"AIDR_EL1",           0b11, 0b001, 0b0000, 0b0000, 0b111>;
 def : ROSysReg<"DCZID_EL0",          0b11, 0b011, 0b0000, 0b0000, 0b111>;
 def : ROSysReg<"ID_PFR0_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b000>;
 def : ROSysReg<"ID_PFR1_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b001>;
+def : ROSysReg<"ID_PFR2_EL1",        0b11, 0b000, 0b0000, 0b0011, 0b100> {
+    let Requires = [{ {AArch64::FeatureSpecRestrict} }];
+}
 def : ROSysReg<"ID_DFR0_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b010>;
 def : ROSysReg<"ID_AFR0_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b011>;
 def : ROSysReg<"ID_MMFR0_EL1",       0b11, 0b000, 0b0000, 0b0001, 0b100>;
@ -510,11 +672,10 @@ def : ROSysReg<"ID_AA64AFR0_EL1",     0b11, 0b000, 0b0000, 0b0101, 0b100>;
 def : ROSysReg<"ID_AA64AFR1_EL1",     0b11, 0b000, 0b0000, 0b0101, 0b101>;
 def : ROSysReg<"ID_AA64ISAR0_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b000>;
 def : ROSysReg<"ID_AA64ISAR1_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b001>;
+def : ROSysReg<"ID_AA64ISAR2_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b010>;
 def : ROSysReg<"ID_AA64MMFR0_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b000>;
 def : ROSysReg<"ID_AA64MMFR1_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b001>;
-def : ROSysReg<"ID_AA64MMFR2_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b010> {
-  let Requires = [{ {AArch64::HasV8_2aOps} }];
-}
+def : ROSysReg<"ID_AA64MMFR2_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b010>;
 def : ROSysReg<"MVFR0_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b000>;
 def : ROSysReg<"MVFR1_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b001>;
 def : ROSysReg<"MVFR2_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b010>;
@ -525,6 +686,7 @@ def : ROSysReg<"ISR_EL1",            0b11, 0b000, 0b1100, 0b0001, 0b000>;
 def : ROSysReg<"CNTPCT_EL0",         0b11, 0b011, 0b1110, 0b0000, 0b001>;
 def : ROSysReg<"CNTVCT_EL0",         0b11, 0b011, 0b1110, 0b0000, 0b010>;
 def : ROSysReg<"ID_MMFR4_EL1",       0b11, 0b000, 0b0000, 0b0010, 0b110>;
+def : ROSysReg<"ID_MMFR5_EL1",       0b11, 0b000, 0b0000, 0b0011, 0b110>;

 // Trace registers
 //                                 Op0    Op1     CRn     CRm    Op2
@ -584,7 +746,7 @@ def : ROSysReg<"ID_AA64ZFR0_EL1",    0b11, 0b000, 0b0000, 0b0100, 0b100>;

 // v8.1a "Limited Ordering Regions" extension-specific system register
 //                         Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeatureLOR} }] in
 def : ROSysReg<"LORID_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b111>;

 // v8.2a "RAS extension" registers
@ -594,6 +756,35 @@ def : ROSysReg<"ERRIDR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b000>;
 def : ROSysReg<"ERXFR_EL1",  0b11, 0b000, 0b0101, 0b0100, 0b000>;
 }

+// v8.5a "random number" registers
+//                       Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureRandGen} }] in {
+def : ROSysReg<"RNDR",   0b11, 0b011, 0b0010, 0b0100, 0b000>;
+def : ROSysReg<"RNDRRS", 0b11, 0b011, 0b0010, 0b0100, 0b001>;
+}
+
+// v8.5a Software Context Number registers
+let Requires = [{ {AArch64::FeatureSpecRestrict} }] in {
+def : RWSysReg<"SCXTNUM_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b111>;
+}
+
+// v9a Realm Management Extension registers
+let Requires = [{ {AArch64::FeatureRME} }] in {
+def : RWSysReg<"MFAR_EL3",  0b11, 0b110, 0b0110, 0b0000, 0b101>;
+def : RWSysReg<"GPCCR_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b110>;
+def : RWSysReg<"GPTBR_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b100>;
+}
+
+// v9-a Scalable Matrix Extension (SME) registers
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureSME} }] in {
+def : ROSysReg<"ID_AA64SMFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b101>;
+}
+
 //===----------------------
 // Write-only regs
 //===----------------------
@ -710,6 +901,9 @@ def : RWSysReg<"ACTLR_EL1",          0b11, 0b000, 0b0001, 0b0000, 0b001>;
 def : RWSysReg<"ACTLR_EL2",          0b11, 0b100, 0b0001, 0b0000, 0b001>;
 def : RWSysReg<"ACTLR_EL3",          0b11, 0b110, 0b0001, 0b0000, 0b001>;
 def : RWSysReg<"HCR_EL2",            0b11, 0b100, 0b0001, 0b0001, 0b000>;
+def : RWSysReg<"HCRX_EL2",           0b11, 0b100, 0b0001, 0b0010, 0b010> {
+  let Requires = [{ {AArch64::FeatureHCX} }];
+}
 def : RWSysReg<"SCR_EL3",            0b11, 0b110, 0b0001, 0b0001, 0b000>;
 def : RWSysReg<"MDCR_EL2",           0b11, 0b100, 0b0001, 0b0001, 0b001>;
 def : RWSysReg<"SDER32_EL3",         0b11, 0b110, 0b0001, 0b0001, 0b001>;
@ -719,13 +913,19 @@ def : RWSysReg<"HSTR_EL2",           0b11, 0b100, 0b0001, 0b0001, 0b011>;
 def : RWSysReg<"HACR_EL2",           0b11, 0b100, 0b0001, 0b0001, 0b111>;
 def : RWSysReg<"MDCR_EL3",           0b11, 0b110, 0b0001, 0b0011, 0b001>;
 def : RWSysReg<"TTBR0_EL1",          0b11, 0b000, 0b0010, 0b0000, 0b000>;
-def : RWSysReg<"TTBR0_EL2",          0b11, 0b100, 0b0010, 0b0000, 0b000>;
 def : RWSysReg<"TTBR0_EL3",          0b11, 0b110, 0b0010, 0b0000, 0b000>;
+
+let Requires = [{ {AArch64::FeatureEL2VMSA} }] in {
+def : RWSysReg<"TTBR0_EL2",          0b11, 0b100, 0b0010, 0b0000, 0b000> {
+  let AltName = "VSCTLR_EL2";
+}
+def : RWSysReg<"VTTBR_EL2",          0b11, 0b100, 0b0010, 0b0001, 0b000>;
+}
+
 def : RWSysReg<"TTBR1_EL1",          0b11, 0b000, 0b0010, 0b0000, 0b001>;
 def : RWSysReg<"TCR_EL1",            0b11, 0b000, 0b0010, 0b0000, 0b010>;
 def : RWSysReg<"TCR_EL2",            0b11, 0b100, 0b0010, 0b0000, 0b010>;
 def : RWSysReg<"TCR_EL3",            0b11, 0b110, 0b0010, 0b0000, 0b010>;
-def : RWSysReg<"VTTBR_EL2",          0b11, 0b100, 0b0010, 0b0001, 0b000>;
 def : RWSysReg<"VTCR_EL2",           0b11, 0b100, 0b0010, 0b0001, 0b010>;
 def : RWSysReg<"DACR32_EL2",         0b11, 0b100, 0b0011, 0b0000, 0b000>;
 def : RWSysReg<"SPSR_EL1",           0b11, 0b000, 0b0100, 0b0000, 0b000>;
@ -740,7 +940,7 @@ def : RWSysReg<"SP_EL2",             0b11, 0b110, 0b0100, 0b0001, 0b000>;
 def : RWSysReg<"SPSel",              0b11, 0b000, 0b0100, 0b0010, 0b000>;
 def : RWSysReg<"NZCV",               0b11, 0b011, 0b0100, 0b0010, 0b000>;
 def : RWSysReg<"DAIF",               0b11, 0b011, 0b0100, 0b0010, 0b001>;
-def : RWSysReg<"CurrentEL",          0b11, 0b000, 0b0100, 0b0010, 0b010>;
+def : ROSysReg<"CurrentEL",          0b11, 0b000, 0b0100, 0b0010, 0b010>;
 def : RWSysReg<"SPSR_irq",           0b11, 0b100, 0b0100, 0b0011, 0b000>;
 def : RWSysReg<"SPSR_abt",           0b11, 0b100, 0b0100, 0b0011, 0b001>;
 def : RWSysReg<"SPSR_und",           0b11, 0b100, 0b0100, 0b0011, 0b010>;
@ -777,6 +977,7 @@ def : RWSysReg<"PMUSERENR_EL0",      0b11, 0b011, 0b1001, 0b1110, 0b000>;
 def : RWSysReg<"PMINTENSET_EL1",     0b11, 0b000, 0b1001, 0b1110, 0b001>;
 def : RWSysReg<"PMINTENCLR_EL1",     0b11, 0b000, 0b1001, 0b1110, 0b010>;
 def : RWSysReg<"PMOVSSET_EL0",       0b11, 0b011, 0b1001, 0b1110, 0b011>;
+def : RWSysReg<"PMMIR_EL1",          0b11, 0b000, 0b1001, 0b1110, 0b110>;
 def : RWSysReg<"MAIR_EL1",           0b11, 0b000, 0b1010, 0b0010, 0b000>;
 def : RWSysReg<"MAIR_EL2",           0b11, 0b100, 0b1010, 0b0010, 0b000>;
 def : RWSysReg<"MAIR_EL3",           0b11, 0b110, 0b1010, 0b0010, 0b000>;
@ -1063,7 +1264,6 @@ def : RWSysReg<"ICC_SRE_EL3",        0b11, 0b110, 0b1100, 0b1100, 0b101>;
 def : RWSysReg<"ICC_IGRPEN0_EL1",    0b11, 0b000, 0b1100, 0b1100, 0b110>;
 def : RWSysReg<"ICC_IGRPEN1_EL1",    0b11, 0b000, 0b1100, 0b1100, 0b111>;
 def : RWSysReg<"ICC_IGRPEN1_EL3",    0b11, 0b110, 0b1100, 0b1100, 0b111>;
-def : RWSysReg<"ICC_SEIEN_EL1",      0b11, 0b000, 0b1100, 0b1101, 0b000>;
 def : RWSysReg<"ICC_AP0R0_EL1",      0b11, 0b000, 0b1100, 0b1000, 0b100>;
 def : RWSysReg<"ICC_AP0R1_EL1",      0b11, 0b000, 0b1100, 0b1000, 0b101>;
 def : RWSysReg<"ICC_AP0R2_EL1",      0b11, 0b000, 0b1100, 0b1000, 0b110>;
@ -1081,9 +1281,8 @@ def : RWSysReg<"ICH_AP1R1_EL2",      0b11, 0b100, 0b1100, 0b1001, 0b001>;
 def : RWSysReg<"ICH_AP1R2_EL2",      0b11, 0b100, 0b1100, 0b1001, 0b010>;
 def : RWSysReg<"ICH_AP1R3_EL2",      0b11, 0b100, 0b1100, 0b1001, 0b011>;
 def : RWSysReg<"ICH_HCR_EL2",        0b11, 0b100, 0b1100, 0b1011, 0b000>;
-def : RWSysReg<"ICH_MISR_EL2",       0b11, 0b100, 0b1100, 0b1011, 0b010>;
+def : ROSysReg<"ICH_MISR_EL2",       0b11, 0b100, 0b1100, 0b1011, 0b010>;
 def : RWSysReg<"ICH_VMCR_EL2",       0b11, 0b100, 0b1100, 0b1011, 0b111>;
-def : RWSysReg<"ICH_VSEIR_EL2",      0b11, 0b100, 0b1100, 0b1001, 0b100>;
 def : RWSysReg<"ICH_LR0_EL2",        0b11, 0b100, 0b1100, 0b1100, 0b000>;
 def : RWSysReg<"ICH_LR1_EL2",        0b11, 0b100, 0b1100, 0b1100, 0b001>;
 def : RWSysReg<"ICH_LR2_EL2",        0b11, 0b100, 0b1100, 0b1100, 0b010>;
@ -1101,24 +1300,74 @@ def : RWSysReg<"ICH_LR13_EL2",       0b11, 0b100, 0b1100, 0b1101, 0b101>;
 def : RWSysReg<"ICH_LR14_EL2",       0b11, 0b100, 0b1100, 0b1101, 0b110>;
 def : RWSysReg<"ICH_LR15_EL2",       0b11, 0b100, 0b1100, 0b1101, 0b111>;

+// v8r system registers
+let Requires = [{ {AArch64::HasV8_0rOps} }] in {
+//Virtualization System Control Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"VSCTLR_EL2",       0b11, 0b100, 0b0010, 0b0000, 0b000> {
+  let AltName = "TTBR0_EL2";
+}
+
+//MPU Type Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"MPUIR_EL1",        0b11, 0b000, 0b0000, 0b0000, 0b100>;
+def : RWSysReg<"MPUIR_EL2",        0b11, 0b100, 0b0000, 0b0000, 0b100>;
+
+//Protection Region Enable Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"PRENR_EL1",        0b11, 0b000, 0b0110, 0b0001, 0b001>;
+def : RWSysReg<"PRENR_EL2",        0b11, 0b100, 0b0110, 0b0001, 0b001>;
+
+//Protection Region Selection Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"PRSELR_EL1",       0b11, 0b000, 0b0110, 0b0010, 0b001>;
+def : RWSysReg<"PRSELR_EL2",       0b11, 0b100, 0b0110, 0b0010, 0b001>;
+
+//Protection Region Base Address Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"PRBAR_EL1",        0b11, 0b000, 0b0110, 0b1000, 0b000>;
+def : RWSysReg<"PRBAR_EL2",        0b11, 0b100, 0b0110, 0b1000, 0b000>;
+
+//Protection Region Limit Address Register
+//                                 Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"PRLAR_EL1",        0b11, 0b000, 0b0110, 0b1000, 0b001>;
+def : RWSysReg<"PRLAR_EL2",        0b11, 0b100, 0b0110, 0b1000, 0b001>;
+
+foreach n = 1-15 in {
+foreach x = 1-2 in {
+//Direct acces to Protection Region Base Address Register for n th MPU region
+  def : RWSysReg<!strconcat("PRBAR"#n, "_EL"#x),
+    0b11, 0b000, 0b0110, 0b1000, 0b000>{
+    let Encoding{5-2} = n;
+    let Encoding{13} = !add(x,-1);
+  }
+
+  def : RWSysReg<!strconcat("PRLAR"#n, "_EL"#x),
+    0b11, 0b000, 0b0110, 0b1000, 0b001>{
+    let Encoding{5-2} = n;
+    let Encoding{13} = !add(x,-1);
+  }
+} //foreach x = 1-2 in
+} //foreach n = 1-15 in
+} //let Requires = [{ {AArch64::HasV8_0rOps} }] in
+
 // v8.1a "Privileged Access Never" extension-specific system registers
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeaturePAN} }] in
 def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>;

 // v8.1a "Limited Ordering Regions" extension-specific system registers
 //                         Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+let Requires = [{ {AArch64::FeatureLOR} }] in {
 def : RWSysReg<"LORSA_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b000>;
 def : RWSysReg<"LOREA_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b001>;
 def : RWSysReg<"LORN_EL1",   0b11, 0b000, 0b1010, 0b0100, 0b010>;
 def : RWSysReg<"LORC_EL1",   0b11, 0b000, 0b1010, 0b0100, 0b011>;
 }

-// v8.1a "Virtualization hos extensions" system registers
+// v8.1a "Virtualization Host extensions" system registers
 //                              Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+let Requires = [{ {AArch64::FeatureVH} }] in {
 def : RWSysReg<"TTBR1_EL2",       0b11, 0b100, 0b0010, 0b0000, 0b001>;
-def : RWSysReg<"CONTEXTIDR_EL2",  0b11, 0b100, 0b1101, 0b0000, 0b001>;
 def : RWSysReg<"CNTHV_TVAL_EL2",  0b11, 0b100, 0b1110, 0b0011, 0b000>;
 def : RWSysReg<"CNTHV_CVAL_EL2",  0b11, 0b100, 0b1110, 0b0011, 0b010>;
 def : RWSysReg<"CNTHV_CTL_EL2",   0b11, 0b100, 0b1110, 0b0011, 0b001>;
@ -1144,10 +1393,13 @@ def : RWSysReg<"CNTV_CTL_EL02",   0b11, 0b101, 0b1110, 0b0011, 0b001>;
 def : RWSysReg<"CNTV_CVAL_EL02",  0b11, 0b101, 0b1110, 0b0011, 0b010>;
 def : RWSysReg<"SPSR_EL12",       0b11, 0b101, 0b0100, 0b0000, 0b000>;
 def : RWSysReg<"ELR_EL12",        0b11, 0b101, 0b0100, 0b0000, 0b001>;
+let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in {
+  def : RWSysReg<"CONTEXTIDR_EL2",  0b11, 0b100, 0b1101, 0b0000, 0b001>;
+}
 }
 // v8.2a registers
 //                  Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeaturePsUAO} }] in
 def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>;

 // v8.2a "Statistical Profiling extension" registers
@ -1156,7 +1408,7 @@ let Requires = [{ {AArch64::FeatureSPE} }] in {
 def : RWSysReg<"PMBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b000>;
 def : RWSysReg<"PMBPTR_EL1",    0b11, 0b000, 0b1001, 0b1010, 0b001>;
 def : RWSysReg<"PMBSR_EL1",     0b11, 0b000, 0b1001, 0b1010, 0b011>;
-def : RWSysReg<"PMBIDR_EL1",    0b11, 0b000, 0b1001, 0b1010, 0b111>;
+def : ROSysReg<"PMBIDR_EL1",    0b11, 0b000, 0b1001, 0b1010, 0b111>;
 def : RWSysReg<"PMSCR_EL2",     0b11, 0b100, 0b1001, 0b1001, 0b000>;
 def : RWSysReg<"PMSCR_EL12",    0b11, 0b101, 0b1001, 0b1001, 0b000>;
 def : RWSysReg<"PMSCR_EL1",     0b11, 0b000, 0b1001, 0b1001, 0b000>;
@ -1165,7 +1417,7 @@ def : RWSysReg<"PMSIRR_EL1",    0b11, 0b000, 0b1001, 0b1001, 0b011>;
 def : RWSysReg<"PMSFCR_EL1",    0b11, 0b000, 0b1001, 0b1001, 0b100>;
 def : RWSysReg<"PMSEVFR_EL1",   0b11, 0b000, 0b1001, 0b1001, 0b101>;
 def : RWSysReg<"PMSLATFR_EL1",  0b11, 0b000, 0b1001, 0b1001, 0b110>;
-def : RWSysReg<"PMSIDR_EL1",    0b11, 0b000, 0b1001, 0b1001, 0b111>;
+def : ROSysReg<"PMSIDR_EL1",    0b11, 0b000, 0b1001, 0b1001, 0b111>;
 }

 // v8.2a "RAS extension" registers
@ -1184,7 +1436,7 @@ def : RWSysReg<"VSESR_EL2",     0b11, 0b100, 0b0101, 0b0010, 0b011>;

 // v8.3a "Pointer authentication extension" registers
 //                              Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_3aOps} }] in {
+let Requires = [{ {AArch64::FeaturePAuth} }] in {
 def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
 def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
 def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
@ -1197,12 +1449,14 @@ def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>;
 def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>;
 }

-let Requires = [{ {AArch64::HasV8_4aOps} }] in {
-
+// v8.4 "Secure Exception Level 2 extension"
+let Requires = [{ {AArch64::FeatureSEL2} }] in {
 // v8.4a "Virtualization secure second stage translation" registers
 //                           Op0   Op1    CRn     CRm     Op2
 def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>;
-def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000>;
+def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000> {
+  let Requires = [{ {AArch64::HasV8_0aOps} }];
+}

 // v8.4a "Virtualization timer" registers
 //                                Op0   Op1    CRn     CRm     Op2
@ -1216,18 +1470,19 @@ def : RWSysReg<"CNTHPS_CTL_EL2",  0b11, 0b100, 0b1110, 0b0101, 0b001>;
 // v8.4a "Virtualization debug state" registers
 //                           Op0   Op1    CRn     CRm     Op2
 def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
+} // FeatureSEL2

 // v8.4a RAS registers
-//                              Op0   Op1    CRn     CRm    Op2
+//                              Op0   Op1    CRn     CRm     Op2
 def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
 def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>;
-def : RWSysReg<"ERXTS_EL1",     0b11, 0b000, 0b0101, 0b0101, 0b111>;
 def : RWSysReg<"ERXMISC2_EL1",  0b11, 0b000, 0b0101, 0b0101, 0b010>;
 def : RWSysReg<"ERXMISC3_EL1",  0b11, 0b000, 0b0101, 0b0101, 0b011>;
 def : ROSysReg<"ERXPFGF_EL1",   0b11, 0b000, 0b0101, 0b0100, 0b100>;

 // v8.4a MPAM registers
 //                             Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureMPAM} }] in {
 def : RWSysReg<"MPAM0_EL1",    0b11, 0b000, 0b1010, 0b0101, 0b001>;
 def : RWSysReg<"MPAM1_EL1",    0b11, 0b000, 0b1010, 0b0101, 0b000>;
 def : RWSysReg<"MPAM2_EL2",    0b11, 0b100, 0b1010, 0b0101, 0b000>;
@ -1244,9 +1499,11 @@ def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>;
 def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>;
 def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
 def : ROSysReg<"MPAMIDR_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b100>;
+} //FeatureMPAM

-// v8.4a Activitiy monitor registers
+// v8.4a Activity Monitor registers
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureAM} }] in {
 def : RWSysReg<"AMCR_EL0",         0b11, 0b011, 0b1101, 0b0010, 0b000>;
 def : ROSysReg<"AMCFGR_EL0",       0b11, 0b011, 0b1101, 0b0010, 0b001>;
 def : ROSysReg<"AMCGCR_EL0",       0b11, 0b011, 0b1101, 0b0010, 0b010>;
@ -1295,6 +1552,7 @@ def : RWSysReg<"AMEVTYPER112_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b100>;
 def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>;
 def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>;
 def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
+} //FeatureAM

 // v8.4a Trace Extension registers
 //
@ -1303,19 +1561,24 @@ def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
 // but they are already defined above.
 //
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureTRACEV8_4} }] in {
 def : RWSysReg<"TRFCR_EL1",        0b11, 0b000, 0b0001, 0b0010, 0b001>;
 def : RWSysReg<"TRFCR_EL2",        0b11, 0b100, 0b0001, 0b0010, 0b001>;
 def : RWSysReg<"TRFCR_EL12",       0b11, 0b101, 0b0001, 0b0010, 0b001>;
+} //FeatureTRACEV8_4

-// v8.4a Timining insensitivity of data processing instructions
+// v8.4a Timing insensitivity of data processing instructions
+// DIT: Data Independent Timing instructions
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureDIT} }] in {
 def : RWSysReg<"DIT",              0b11, 0b011, 0b0100, 0b0010, 0b101>;
+} //FeatureDIT

 // v8.4a Enhanced Support for Nested Virtualization
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureNV} }] in {
 def : RWSysReg<"VNCR_EL2",         0b11, 0b100, 0b0010, 0b0010, 0b000>;
-
-} // HasV8_4aOps
+} //FeatureNV

 // SVE control registers
 //                                 Op0   Op1    CRn     CRm     Op2
@ -1326,7 +1589,131 @@ def : RWSysReg<"ZCR_EL3",          0b11, 0b110, 0b0001, 0b0010, 0b000>;
 def : RWSysReg<"ZCR_EL12",         0b11, 0b101, 0b0001, 0b0010, 0b000>;
 }

+// V8.5a Spectre mitigation SSBS register
+//                     Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureSSBS} }] in
+def : RWSysReg<"SSBS", 0b11, 0b011, 0b0100, 0b0010, 0b110>;
+
+// v8.5a Memory Tagging Extension
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureMTE} }] in {
+def : RWSysReg<"TCO",              0b11, 0b011, 0b0100, 0b0010, 0b111>;
+def : RWSysReg<"GCR_EL1",          0b11, 0b000, 0b0001, 0b0000, 0b110>;
+def : RWSysReg<"RGSR_EL1",         0b11, 0b000, 0b0001, 0b0000, 0b101>;
+def : RWSysReg<"TFSR_EL1",         0b11, 0b000, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL2",         0b11, 0b100, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL3",         0b11, 0b110, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL12",        0b11, 0b101, 0b0101, 0b0110, 0b000>;
+def : RWSysReg<"TFSRE0_EL1",       0b11, 0b000, 0b0101, 0b0110, 0b001>;
+def : ROSysReg<"GMID_EL1",         0b11, 0b001, 0b0000, 0b0000, 0b100>;
+} // HasMTE
+
+// Embedded Trace Extension R/W System registers
+let Requires = [{ {AArch64::FeatureETE} }] in {
+//              Name            Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"TRCRSR",        0b10, 0b001, 0b0000, 0b1010, 0b000>;
+//  TRCEXTINSELR0 has the same encoding as ETM TRCEXTINSELR
+def : RWSysReg<"TRCEXTINSELR0", 0b10, 0b001, 0b0000, 0b1000, 0b100>;
+def : RWSysReg<"TRCEXTINSELR1", 0b10, 0b001, 0b0000, 0b1001, 0b100>;
+def : RWSysReg<"TRCEXTINSELR2", 0b10, 0b001, 0b0000, 0b1010, 0b100>;
+def : RWSysReg<"TRCEXTINSELR3", 0b10, 0b001, 0b0000, 0b1011, 0b100>;
+} // FeatureETE
+
+// Trace Buffer Extension System registers
+let Requires = [{ {AArch64::FeatureTRBE} }] in {
+//                   Name       Op0   Op1    CRn     CRm     Op2
+def : RWSysReg<"TRBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b000>;
+def : RWSysReg<"TRBPTR_EL1",    0b11, 0b000, 0b1001, 0b1011, 0b001>;
+def : RWSysReg<"TRBBASER_EL1",  0b11, 0b000, 0b1001, 0b1011, 0b010>;
+def : RWSysReg<"TRBSR_EL1",     0b11, 0b000, 0b1001, 0b1011, 0b011>;
+def : RWSysReg<"TRBMAR_EL1",    0b11, 0b000, 0b1001, 0b1011, 0b100>;
+def : RWSysReg<"TRBTRG_EL1",    0b11, 0b000, 0b1001, 0b1011, 0b110>;
+def : ROSysReg<"TRBIDR_EL1",    0b11, 0b000, 0b1001, 0b1011, 0b111>;
+} // FeatureTRBE
+
+
+// v8.6a Activity Monitors Virtualization Support
+let Requires = [{ {AArch64::FeatureAMVS} }] in {
+foreach n = 0-15 in {
+  foreach x = 0-1 in {
+  def : RWSysReg<"AMEVCNTVOFF"#x#n#"_EL2",
+    0b11, 0b100, 0b1101, 0b1000, 0b000>{
+      let Encoding{4} = x;
+      let Encoding{3-0} = n;
+    }
+  }
+}
+}
+
+// v8.6a Fine Grained Virtualization Traps
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureFineGrainedTraps} }] in {
+def : RWSysReg<"HFGRTR_EL2",       0b11, 0b100, 0b0001, 0b0001, 0b100>;
+def : RWSysReg<"HFGWTR_EL2",       0b11, 0b100, 0b0001, 0b0001, 0b101>;
+def : RWSysReg<"HFGITR_EL2",       0b11, 0b100, 0b0001, 0b0001, 0b110>;
+def : RWSysReg<"HDFGRTR_EL2",      0b11, 0b100, 0b0011, 0b0001, 0b100>;
+def : RWSysReg<"HDFGWTR_EL2",      0b11, 0b100, 0b0011, 0b0001, 0b101>;
+}
+
+// v8.6a Enhanced Counter Virtualization
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureEnhancedCounterVirtualization} }] in {
+def : RWSysReg<"CNTSCALE_EL2",     0b11, 0b100, 0b1110, 0b0000, 0b100>;
+def : RWSysReg<"CNTISCALE_EL2",    0b11, 0b100, 0b1110, 0b0000, 0b101>;
+def : RWSysReg<"CNTPOFF_EL2",      0b11, 0b100, 0b1110, 0b0000, 0b110>;
+def : RWSysReg<"CNTVFRQ_EL2",      0b11, 0b100, 0b1110, 0b0000, 0b111>;
+def : RWSysReg<"CNTPCTSS_EL0",     0b11, 0b011, 0b1110, 0b0000, 0b101>;
+def : RWSysReg<"CNTVCTSS_EL0",     0b11, 0b011, 0b1110, 0b0000, 0b110>;
+}
+
+// v8.7a LD64B/ST64B Accelerator Extension system register
+let Requires = [{ {AArch64::FeatureLS64} }] in
+def : RWSysReg<"ACCDATA_EL1",       0b11, 0b000, 0b1101, 0b0000, 0b101>;
+
+// Branch Record Buffer system registers
+let Requires = [{ {AArch64::FeatureBRBE} }] in {
+def : RWSysReg<"BRBCR_EL1",         0b10, 0b001, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBCR_EL12",        0b10, 0b101, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBCR_EL2",         0b10, 0b100, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBFCR_EL1",        0b10, 0b001, 0b1001, 0b0000, 0b001>;
+def : ROSysReg<"BRBIDR0_EL1",       0b10, 0b001, 0b1001, 0b0010, 0b000>;
+def : RWSysReg<"BRBINFINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b000>;
+def : RWSysReg<"BRBSRCINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b001>;
+def : RWSysReg<"BRBTGTINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b010>;
+def : RWSysReg<"BRBTS_EL1",         0b10, 0b001, 0b1001, 0b0000, 0b010>;
+foreach n = 0-31 in {
+  defvar nb = !cast<bits<5>>(n);
+  def : ROSysReg<"BRBINF"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b00}>;
+  def : ROSysReg<"BRBSRC"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b01}>;
+  def : ROSysReg<"BRBTGT"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b10}>;
+}
+}
+
+// Statistical Profiling Extension system register
+let Requires = [{ {AArch64::FeatureSPE_EEF} }] in
+def : RWSysReg<"PMSNEVFR_EL1",      0b11, 0b000, 0b1001, 0b1001, 0b001>;
+
 // Cyclone specific system registers
 //                                 Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::ProcCyclone} }] in
+let Requires = [{ {AArch64::FeatureAppleA7SysReg} }] in
 def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
+
+// Scalable Matrix Extension (SME)
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureSME} }] in {
+def : RWSysReg<"SMCR_EL1",         0b11, 0b000, 0b0001, 0b0010, 0b110>;
+def : RWSysReg<"SMCR_EL2",         0b11, 0b100, 0b0001, 0b0010, 0b110>;
+def : RWSysReg<"SMCR_EL3",         0b11, 0b110, 0b0001, 0b0010, 0b110>;
+def : RWSysReg<"SMCR_EL12",        0b11, 0b101, 0b0001, 0b0010, 0b110>;
+def : RWSysReg<"SVCR",             0b11, 0b011, 0b0100, 0b0010, 0b010>;
+def : RWSysReg<"SMPRI_EL1",        0b11, 0b000, 0b0001, 0b0010, 0b100>;
+def : RWSysReg<"SMPRIMAP_EL2",     0b11, 0b100, 0b0001, 0b0010, 0b101>;
+def : ROSysReg<"SMIDR_EL1",        0b11, 0b001, 0b0000, 0b0000, 0b110>;
+def : RWSysReg<"TPIDR2_EL0",       0b11, 0b011, 0b1101, 0b0000, 0b101>;
+} // HasSME
+
+// v8.4a MPAM and SME registers
+//                              Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureMPAM, AArch64::FeatureSME} }] in {
+def : RWSysReg<"MPAMSM_EL1",    0b11, 0b000, 0b1010, 0b0101, 0b011>;
+} // HasMPAM, HasSME
--- a/suite/synctools/tablegen/AArch64/SMEInstrFormats.td
+++ b/suite/synctools/tablegen/AArch64/SMEInstrFormats.td
@ -0,0 +1,726 @@
+//=-- SMEInstrFormats.td -  AArch64 SME Instruction classes -*- tablegen -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SME Outer Products
+//===----------------------------------------------------------------------===//
+
+class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty,
+                                ZPRRegOp zpr_ty, string mnemonic>
+    : I<(outs za_ty:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
+        "", []>,
+      Sched<[]> {
+  bits<5> Zm;
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  let Inst{31-23} = 0b100000001;
+  let Inst{22}    = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = S;
+  let Inst{3}     = 0b0;
+}
+
+class sme_outer_product_fp32<bit S, string mnemonic>
+    : sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> {
+  bits<2> ZAda;
+  let Inst{1-0} = ZAda;
+  let Inst{2}   = 0b0;
+}
+
+class sme_outer_product_fp64<bit S, string mnemonic>
+    : sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> {
+  bits<3> ZAda;
+  let Inst{2-0} = ZAda;
+}
+
+class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
+                                 MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
+                                 string mnemonic>
+    : I<(outs za_ty:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
+        "", []>,
+      Sched<[]> {
+  bits<5> Zm;
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  let Inst{31-25} = 0b1010000;
+  let Inst{24}    = u0;
+  let Inst{23}    = 0b1;
+  let Inst{22}    = sz;
+  let Inst{21}    = u1;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = S;
+  let Inst{3}     = 0b0;
+}
+
+class sme_int_outer_product_i32<bits<3> opc, string mnemonic>
+    : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32, ZPR8,
+                                 mnemonic> {
+  bits<2> ZAda;
+  let Inst{1-0} = ZAda;
+  let Inst{2}   = 0b0;
+}
+
+class sme_int_outer_product_i64<bits<3> opc, string mnemonic>
+    : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64, ZPR16,
+                                 mnemonic> {
+  bits<3> ZAda;
+  let Inst{2-0} = ZAda;
+}
+
+class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
+    : I<(outs TileOp32:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
+        "", []>,
+      Sched<[]> {
+  bits<5> Zm;
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  bits<2> ZAda;
+  let Inst{31-22} = 0b1000000110;
+  let Inst{21}    = op;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = S;
+  let Inst{3-2}   = 0b00;
+  let Inst{1-0}   = ZAda;
+}
+
+multiclass sme_bf16_outer_product<bit S, string mnemonic> {
+  def : sme_outer_product_widening_inst<0b0, S, mnemonic>;
+}
+
+multiclass sme_f16_outer_product<bit S, string mnemonic> {
+  def : sme_outer_product_widening_inst<0b1, S, mnemonic>;
+}
+
+//===----------------------------------------------------------------------===//
+// SME Add Vector to Tile
+//===----------------------------------------------------------------------===//
+
+class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
+                                  ZPRRegOp zpr_ty, string mnemonic>
+    : I<(outs tile_ty:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
+        "", []>, Sched<[]> {
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  let Inst{31-23} = 0b110000001;
+  let Inst{22}    = op;
+  let Inst{21-17} = 0b01000;
+  let Inst{16}    = V;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4-3}   = 0b00;
+}
+
+class sme_add_vector_to_tile_u32<bit V, string mnemonic>
+    : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
+  bits<2> ZAda;
+  let Inst{2}   = 0b0;
+  let Inst{1-0} = ZAda;
+}
+
+class sme_add_vector_to_tile_u64<bit V, string mnemonic>
+    : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
+  bits<3> ZAda;
+  let Inst{2-0} = ZAda;
+}
+
+//===----------------------------------------------------------------------===//
+// SME Contiguous Loads
+//===----------------------------------------------------------------------===//
+
+class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
+                         string mnemonic, string argstr>
+    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
+  bits<5> Rm;
+  bits<2> Rv;
+  bits<3> Pg;
+  bits<5> Rn;
+  let Inst{31-25} = 0b1110000;
+  let Inst{24}    = Q;
+  let Inst{23-22} = msz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Rm;
+  let Inst{15}    = V;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Rn;
+  let Inst{4}     = 0b0;
+
+  let mayLoad = 1;
+}
+
+class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
+                         MatrixTileVectorOperand tile_ty, bit is_col,
+                         Operand imm_ty, RegisterOperand gpr_ty>
+    : sme_mem_ld_ss_base<
+        Q, is_col, msz, (outs tile_ty:$ZAt),
+        (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
+             gpr_ty:$Rm),
+        mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
+
+multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
+                                   MatrixTileVectorOperand tile_ty,
+                                   Operand imm_ty, RegisterOperand gpr_ty,
+                                   string pg_suffix=""> {
+  def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
+                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
+  // Default XZR offset aliases
+  def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
+                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
+  def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
+                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
+}
+
+multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
+                              string pg_suffix=""> {
+  defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
+                                 !if(is_col, TileVectorOpV8, TileVectorOpH8),
+                                 sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
+  defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
+                                 !if(is_col, TileVectorOpV16, TileVectorOpH16),
+                                 sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
+  defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
+                                 !if(is_col, TileVectorOpV32, TileVectorOpH32),
+                                 sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
+  defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
+                                 !if(is_col, TileVectorOpV64, TileVectorOpH64),
+                                 sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
+  defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
+                                 !if(is_col, TileVectorOpV128, TileVectorOpH128),
+                                 sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
+}
+
+multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
+  defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
+}
+
+multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
+  def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
+                              !if(is_col, TileVectorOpV8, TileVectorOpH8),
+                              is_col, sme_elm_idx0_15, GPR64shifted8> {
+    bits<4> imm;
+    let Inst{3-0} = imm;
+  }
+  def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
+                              !if(is_col, TileVectorOpV16, TileVectorOpH16),
+                              is_col, sme_elm_idx0_7, GPR64shifted16> {
+    bits<1> ZAt;
+    bits<3> imm;
+    let Inst{3}   = ZAt;
+    let Inst{2-0} = imm;
+  }
+  def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
+                              !if(is_col, TileVectorOpV32, TileVectorOpH32),
+                              is_col, sme_elm_idx0_3, GPR64shifted32> {
+    bits<2> ZAt;
+    bits<2> imm;
+    let Inst{3-2} = ZAt;
+    let Inst{1-0} = imm;
+  }
+  def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
+                              !if(is_col, TileVectorOpV64, TileVectorOpH64),
+                              is_col, sme_elm_idx0_1, GPR64shifted64> {
+    bits<3> ZAt;
+    bits<1> imm;
+    let Inst{3-1} = ZAt;
+    let Inst{0}   = imm;
+  }
+  def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
+                              !if(is_col, TileVectorOpV128, TileVectorOpH128),
+                              is_col, sme_elm_idx0_0, GPR64shifted128> {
+    bits<4> ZAt;
+    let Inst{3-0} = ZAt;
+  }
+
+  defm : sme_mem_ld_ss_aliases<NAME, is_col>;
+}
+
+multiclass sme_mem_ld_ss<string mnemonic> {
+  defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>;
+  defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SME Contiguous Stores
+//===----------------------------------------------------------------------===//
+
+class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
+                         string mnemonic, string argstr>
+    : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
+  bits<5> Rm;
+  bits<2> Rv;
+  bits<3> Pg;
+  bits<5> Rn;
+  let Inst{31-25} = 0b1110000;
+  let Inst{24}    = Q;
+  let Inst{23-22} = msz;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Rm;
+  let Inst{15}    = V;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Rn;
+  let Inst{4}     = 0b0;
+
+  let mayStore = 1;
+  let hasSideEffects = 1;
+}
+
+class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
+                         MatrixTileVectorOperand tile_ty, bit is_col,
+                         Operand imm_ty, RegisterOperand gpr_ty>
+    : sme_mem_st_ss_base<
+        Q, is_col, msz,
+        (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
+             GPR64sp:$Rn, gpr_ty:$Rm),
+        mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
+
+multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
+  defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
+}
+
+multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
+  def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
+                              !if(is_col, TileVectorOpV8, TileVectorOpH8),
+                              is_col, sme_elm_idx0_15, GPR64shifted8> {
+    bits<4> imm;
+    let Inst{3-0} = imm;
+  }
+  def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
+                              !if(is_col, TileVectorOpV16, TileVectorOpH16),
+                              is_col, sme_elm_idx0_7, GPR64shifted16> {
+    bits<1> ZAt;
+    bits<3> imm;
+    let Inst{3}   = ZAt;
+    let Inst{2-0} = imm;
+  }
+  def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
+                              !if(is_col, TileVectorOpV32, TileVectorOpH32),
+                              is_col, sme_elm_idx0_3, GPR64shifted32> {
+    bits<2> ZAt;
+    bits<2> imm;
+    let Inst{3-2} = ZAt;
+    let Inst{1-0} = imm;
+  }
+  def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
+                              !if(is_col, TileVectorOpV64, TileVectorOpH64),
+                              is_col, sme_elm_idx0_1, GPR64shifted64> {
+    bits<3> ZAt;
+    bits<1> imm;
+    let Inst{3-1} = ZAt;
+    let Inst{0}   = imm;
+  }
+  def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
+                              !if(is_col, TileVectorOpV128, TileVectorOpH128),
+                              is_col, sme_elm_idx0_0, GPR64shifted128> {
+    bits<4> ZAt;
+    let Inst{3-0} = ZAt;
+  }
+
+  defm : sme_mem_st_ss_aliases<NAME, is_col>;
+}
+
+multiclass sme_mem_st_ss<string mnemonic> {
+  defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>;
+  defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SME Save and Restore Array
+//===----------------------------------------------------------------------===//
+
+class sme_spill_fill_inst<bit isStore, dag outs, dag ins, string opcodestr>
+    : I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
+        []>,
+      Sched<[]> {
+  bits<2> Rv;
+  bits<5> Rn;
+  bits<4> imm4;
+  let Inst{31-22} = 0b1110000100;
+  let Inst{21}    = isStore;
+  let Inst{20-15} = 0b000000;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = 0b000;
+  let Inst{9-5}   = Rn;
+  let Inst{4}     = 0b0;
+  let Inst{3-0}   = imm4;
+
+  let mayLoad = !not(isStore);
+  let mayStore = isStore;
+}
+
+multiclass sme_spill_fill<bit isStore, dag outs, dag ins, string opcodestr> {
+  def NAME : sme_spill_fill_inst<isStore, outs, ins, opcodestr>;
+
+  def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
+                  (!cast<Instruction>(NAME) MatrixOp:$ZAt,
+                   MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
+}
+
+multiclass sme_spill<string opcodestr> {
+  defm NAME : sme_spill_fill<0b1, (outs),
+                             (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
+                                  sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
+                                  imm0_15:$offset),
+                             opcodestr>;
+}
+
+multiclass sme_fill<string opcodestr> {
+  defm NAME : sme_spill_fill<0b0, (outs MatrixOp:$ZAt),
+                             (ins MatrixIndexGPR32Op12_15:$Rv,
+                                  sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
+                                  imm0_15:$offset),
+                             opcodestr>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
+                              string mnemonic, string argstr>
+    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
+  bits<2> Rv;
+  bits<3> Pg;
+  bits<5> Zn;
+  let Inst{31-24} = 0b11000000;
+  let Inst{23-22} = sz;
+  let Inst{21-17} = 0b00000;
+  let Inst{16}    = Q;
+  let Inst{15}    = V;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = 0b0;
+}
+
+class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
+                              bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
+                              string mnemonic>
+    : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
+        (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
+        mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">;
+
+multiclass sme_vector_to_tile_aliases<Instruction inst,
+                                      MatrixTileVectorOperand tile_ty,
+                                      ZPRRegOp zpr_ty, Operand imm_ty> {
+  def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
+                  (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
+}
+
+multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
+  def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
+                                                          TileVectorOpH8),
+                                   is_col, sme_elm_idx0_15, ZPR8, mnemonic> {
+    bits<4> imm;
+    let Inst{3-0} = imm;
+  }
+  def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
+                                                          TileVectorOpH16),
+                                   is_col, sme_elm_idx0_7, ZPR16, mnemonic> {
+    bits<1> ZAd;
+    bits<3> imm;
+    let Inst{3}   = ZAd;
+    let Inst{2-0} = imm;
+  }
+  def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
+                                                          TileVectorOpH32),
+                                   is_col, sme_elm_idx0_3, ZPR32, mnemonic> {
+    bits<2> ZAd;
+    bits<2> imm;
+    let Inst{3-2} = ZAd;
+    let Inst{1-0} = imm;
+  }
+  def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
+                                                          TileVectorOpH64),
+                                   is_col, sme_elm_idx0_1, ZPR64, mnemonic> {
+    bits<3> ZAd;
+    bits<1> imm;
+    let Inst{3-1} = ZAd;
+    let Inst{0}   = imm;
+  }
+  def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
+                                                          TileVectorOpH128),
+                                   is_col, sme_elm_idx0_0, ZPR128, mnemonic> {
+    bits<4> ZAd;
+    bits<1> imm;
+    let Inst{3-0} = ZAd;
+  }
+
+  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
+                                    !if(is_col, TileVectorOpV8,
+                                                TileVectorOpH8),
+                                    ZPR8, sme_elm_idx0_15>;
+  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
+                                    !if(is_col, TileVectorOpV16,
+                                                TileVectorOpH16),
+                                    ZPR16, sme_elm_idx0_7>;
+  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
+                                    !if(is_col, TileVectorOpV32,
+                                                TileVectorOpH32),
+                                    ZPR32, sme_elm_idx0_3>;
+  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
+                                    !if(is_col, TileVectorOpV64,
+                                                TileVectorOpH64),
+                                    ZPR64, sme_elm_idx0_1>;
+  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
+                                    !if(is_col, TileVectorOpV128,
+                                                TileVectorOpH128),
+                                    ZPR128, sme_elm_idx0_0>;
+}
+
+multiclass sme_vector_to_tile<string mnemonic> {
+  defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>;
+  defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>;
+}
+
+class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
+                              string mnemonic, string argstr>
+    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
+  bits<2> Rv;
+  bits<3> Pg;
+  bits<5> Zd;
+  let Inst{31-24} = 0b11000000;
+  let Inst{23-22} = sz;
+  let Inst{21-17} = 0b00001;
+  let Inst{16}    = Q;
+  let Inst{15}    = V;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = Pg;
+  let Inst{9}     = 0b0;
+  let Inst{4-0}   = Zd;
+}
+
+class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
+                              MatrixTileVectorOperand tile_ty,
+                              bit is_col, Operand imm_ty, string mnemonic>
+    : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
+        (ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
+        mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]">;
+
+multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
+                                      MatrixTileVectorOperand tile_ty,
+                                      Operand imm_ty > {
+  def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
+                  (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
+}
+
+multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
+  def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
+                                                                TileVectorOpH8),
+                                   is_col, sme_elm_idx0_15, mnemonic> {
+    bits<4> imm;
+    let Inst{8-5} = imm;
+  }
+  def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
+                                                                 TileVectorOpH16),
+                                   is_col, sme_elm_idx0_7, mnemonic> {
+    bits<1> ZAn;
+    bits<3> imm;
+    let Inst{8}   = ZAn;
+    let Inst{7-5} = imm;
+  }
+  def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
+                                                                 TileVectorOpH32),
+                                   is_col, sme_elm_idx0_3, mnemonic> {
+    bits<2> ZAn;
+    bits<2> imm;
+    let Inst{8-7} = ZAn;
+    let Inst{6-5} = imm;
+  }
+  def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
+                                                                 TileVectorOpH64),
+                                   is_col, sme_elm_idx0_1, mnemonic> {
+    bits<3> ZAn;
+    bits<1> imm;
+    let Inst{8-6} = ZAn;
+    let Inst{5}   = imm;
+  }
+  def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
+                                                                  TileVectorOpH128),
+                                   is_col, sme_elm_idx0_0, mnemonic> {
+    bits<4> ZAn;
+    let Inst{8-5} = ZAn;
+  }
+
+  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
+                                    !if(is_col, TileVectorOpV8,
+                                                TileVectorOpH8), sme_elm_idx0_15>;
+  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
+                                    !if(is_col, TileVectorOpV16,
+                                                TileVectorOpH16), sme_elm_idx0_7>;
+  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
+                                    !if(is_col, TileVectorOpV32,
+                                                TileVectorOpH32), sme_elm_idx0_3>;
+  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
+                                    !if(is_col, TileVectorOpV64,
+                                                TileVectorOpH64), sme_elm_idx0_1>;
+  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
+                                    !if(is_col, TileVectorOpV128,
+                                                TileVectorOpH128), sme_elm_idx0_0>;
+}
+
+multiclass sme_tile_to_vector<string mnemonic> {
+  defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>;
+  defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SME Zero
+//===----------------------------------------------------------------------===//
+
+class sme_zero_inst<string mnemonic>
+    : I<(outs MatrixTileList:$imm), (ins),
+        mnemonic, "\t$imm", "", []>, Sched<[]> {
+  bits<8> imm;
+  let Inst{31-8} = 0b110000000000100000000000;
+  let Inst{7-0}  = imm;
+}
+
+multiclass sme_zero<string mnemonic> {
+  def NAME : sme_zero_inst<mnemonic>;
+
+  def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
+  def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
+  def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
+  def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
+  def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
+  def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
+  def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
+  def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
+  def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
+  def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
+  def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
+  def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
+  def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
+  def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
+  def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Instructions
+//===----------------------------------------------------------------------===//
+
+class sve2_int_perm_revd<string asm>
+    : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
+        asm, "\t$Zd, $Pg/m, $Zn", "", []>,
+      Sched<[]> {
+  bits<5> Zd;
+  bits<3> Pg;
+  bits<5> Zn;
+  let Inst{31-24} = 0b00000101;
+  let Inst{23-22} = 0b00; // size
+  let Inst{21-13} = 0b101110100;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+
+  let Constraints = "$Zd = $_Zd";
+  let DestructiveInstType = DestructiveUnary;
+  let ElementSize = ZPR128.ElementSize;
+}
+
+class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
+    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
+        asm, "\t$Zd, $Zn, $Zm", "", []>,
+      Sched<[]> {
+  bits<5> Zm;
+  bits<5> Zn;
+  bits<5> Zd;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15-11} = 0b11000;
+  let Inst{10}    = U;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+
+  let Constraints = "$Zd = $_Zd";
+  let DestructiveInstType = DestructiveOther;
+  let ElementSize = zpr_ty.ElementSize;
+}
+
+multiclass sve2_clamp<string asm, bit U> {
+  def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
+  def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
+  def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
+  def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
+}
+
+class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
+    : I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm,
+                            MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
+        asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
+      Sched<[]> {
+  bits<2> Rv;
+  bits<4> Pn;
+  bits<4> Pm;
+  bits<4> Pd;
+  let Inst{31-24} = 0b00100101;
+  let Inst{21}    = 0b1;
+  let Inst{17-16} = Rv;
+  let Inst{15-14} = 0b01;
+  let Inst{13-10} = Pn;
+  let Inst{9}     = 0b0;
+  let Inst{8-5}   = Pm;
+  let Inst{4}     = 0b0;
+  let Inst{3-0}   = Pd;
+}
+
+multiclass sve2_int_perm_sel_p<string asm> {
+  def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
+    bits<4> imm;
+    let Inst{23-22} = imm{3-2};
+    let Inst{20-19} = imm{1-0};
+    let Inst{18}    = 0b1;
+  }
+  def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
+    bits<3> imm;
+    let Inst{23-22} = imm{2-1};
+    let Inst{20}    = imm{0};
+    let Inst{19-18} = 0b10;
+  }
+  def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
+    bits<2> imm;
+    let Inst{23-22} = imm{1-0};
+    let Inst{20-18} = 0b100;
+  }
+  def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
+    bits<1> imm;
+    let Inst{23}    = imm;
+    let Inst{22}    = 0b1;
+    let Inst{20-18} = 0b000;
+  }
+}
--- a/suite/synctools/tablegen/AArch64/SVEInstrFormats.td
+++ b/suite/synctools/tablegen/AArch64/SVEInstrFormats.td