Updated synctools/tablegen/AArch64 files to LLVM 14.0.5.
This commit is contained in:
parent
62fee65c54
commit
e385fc5267
File diff suppressed because it is too large
Load Diff
|
@ -1,9 +1,8 @@
|
|||
//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -11,17 +10,19 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// CCIfAlign - Match of the original alignment of the arg
|
||||
class CCIfAlign<string Align, CCAction A> :
|
||||
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
|
||||
/// CCIfBigEndian - Match only if we're in big endian mode.
|
||||
class CCIfBigEndian<CCAction A> :
|
||||
CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
|
||||
|
||||
class CCIfILP32<CCAction A> :
|
||||
CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM AAPCS64 Calling Convention
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Entry = 1 in
|
||||
def CC_AArch64_AAPCS : CallingConv<[
|
||||
CCIfType<[iPTR], CCBitConvertToType<i64>>,
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
|
@ -29,13 +30,29 @@ def CC_AArch64_AAPCS : CallingConv<[
|
|||
|
||||
// Big endian vectors must be passed as if they were 1-element vectors so that
|
||||
// their lanes are in a consistent order.
|
||||
CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
|
||||
CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v4bf16, v8i8],
|
||||
CCBitConvertToType<f64>>>,
|
||||
CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
|
||||
CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
|
||||
CCBitConvertToType<f128>>>,
|
||||
|
||||
// An SRet is passed in X8, not X0 like a normal pointer parameter.
|
||||
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
|
||||
// In AAPCS, an SRet is passed in X8, not X0 like a normal pointer parameter.
|
||||
// However, on windows, in some circumstances, the SRet is passed in X0 or X1
|
||||
// instead. The presence of the inreg attribute indicates that SRet is
|
||||
// passed in the alternative register (X0 or X1), not X8:
|
||||
// - X0 for non-instance methods.
|
||||
// - X1 for instance methods.
|
||||
|
||||
// The "sret" attribute identifies indirect returns.
|
||||
// The "inreg" attribute identifies non-aggregate types.
|
||||
// The position of the "sret" attribute identifies instance/non-instance
|
||||
// methods.
|
||||
// "sret" on argument 0 means non-instance methods.
|
||||
// "sret" on argument 1 means instance methods.
|
||||
|
||||
CCIfInReg<CCIfType<[i64],
|
||||
CCIfSRet<CCIfType<[i64], CCAssignToReg<[X0, X1]>>>>>,
|
||||
|
||||
CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,
|
||||
|
||||
// Put ByVal arguments directly on the stack. Minimum size and alignment of a
|
||||
// slot is 64-bit.
|
||||
|
@ -47,18 +64,33 @@ def CC_AArch64_AAPCS : CallingConv<[
|
|||
CCIfNest<CCAssignToReg<[X18]>>,
|
||||
|
||||
// Pass SwiftSelf in a callee saved register.
|
||||
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
|
||||
CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,
|
||||
|
||||
// A SwiftError is passed in X21.
|
||||
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
|
||||
CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
|
||||
|
||||
// Pass SwiftAsync in an otherwise callee saved register so that it will be
|
||||
// preserved for normal function calls.
|
||||
CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,
|
||||
|
||||
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
|
||||
|
||||
CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
|
||||
nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
|
||||
CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
|
||||
CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
|
||||
nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
|
||||
CCPassIndirect<i64>>,
|
||||
|
||||
CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
|
||||
CCAssignToReg<[P0, P1, P2, P3]>>,
|
||||
CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
|
||||
CCPassIndirect<i64>>,
|
||||
|
||||
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
|
||||
// up to eight each of GPR and FPR.
|
||||
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
// i128 is split to two i64s, we can't fit half to register X7.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
|
||||
[X0, X1, X3, X5]>>>,
|
||||
|
@ -66,129 +98,145 @@ def CC_AArch64_AAPCS : CallingConv<[
|
|||
// i128 is split to two i64s, and its stack alignment is 16 bytes.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
|
||||
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
|
||||
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
|
||||
CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
|
||||
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
|
||||
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
|
||||
CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
|
||||
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
|
||||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
|
||||
// If more than will fit in registers, pass them on the stack instead.
|
||||
CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>,
|
||||
CCIfType<[i1, i8, i16, f16, bf16], CCAssignToStack<8, 8>>,
|
||||
CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
|
||||
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
|
||||
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16],
|
||||
CCAssignToStack<8, 8>>,
|
||||
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
|
||||
CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
|
||||
let Entry = 1 in
|
||||
def RetCC_AArch64_AAPCS : CallingConv<[
|
||||
CCIfType<[iPTR], CCBitConvertToType<i64>>,
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
|
||||
|
||||
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
|
||||
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
|
||||
CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
|
||||
|
||||
// Big endian vectors must be passed as if they were 1-element vectors so that
|
||||
// their lanes are in a consistent order.
|
||||
CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
|
||||
CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v4bf16, v8i8],
|
||||
CCBitConvertToType<f64>>>,
|
||||
CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
|
||||
CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
|
||||
CCBitConvertToType<f128>>>,
|
||||
|
||||
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
|
||||
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
|
||||
CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
|
||||
CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
|
||||
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
|
||||
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
|
||||
CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
|
||||
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
|
||||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
|
||||
CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
|
||||
nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
|
||||
CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
|
||||
|
||||
CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
|
||||
CCAssignToReg<[P0, P1, P2, P3]>>
|
||||
]>;
|
||||
|
||||
// Vararg functions on windows pass floats in integer registers
|
||||
let Entry = 1 in
|
||||
def CC_AArch64_Win64_VarArg : CallingConv<[
|
||||
CCIfType<[f16, f32], CCPromoteToType<f64>>,
|
||||
CCIfType<[f16, bf16], CCBitConvertToType<i16>>,
|
||||
CCIfType<[f32], CCBitConvertToType<i32>>,
|
||||
CCIfType<[f64], CCBitConvertToType<i64>>,
|
||||
CCDelegateTo<CC_AArch64_AAPCS>
|
||||
]>;
|
||||
|
||||
// Windows Control Flow Guard checks take a single argument (the target function
|
||||
// address) and have no return value.
|
||||
let Entry = 1 in
|
||||
def CC_AArch64_Win64_CFGuard_Check : CallingConv<[
|
||||
CCIfType<[i64], CCAssignToReg<[X15]>>
|
||||
]>;
|
||||
|
||||
|
||||
// Darwin uses a calling convention which differs in only two ways
|
||||
// from the standard one at this level:
|
||||
// + i128s (i.e. split i64s) don't need even registers.
|
||||
// + Stack slots are sized as needed rather than being at least 64-bit.
|
||||
let Entry = 1 in
|
||||
def CC_AArch64_DarwinPCS : CallingConv<[
|
||||
CCIfType<[iPTR], CCBitConvertToType<i64>>,
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
|
||||
|
||||
// An SRet is passed in X8, not X0 like a normal pointer parameter.
|
||||
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
|
||||
CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,
|
||||
|
||||
// Put ByVal arguments directly on the stack. Minimum size and alignment of a
|
||||
// slot is 64-bit.
|
||||
CCIfByVal<CCPassByVal<8, 8>>,
|
||||
|
||||
// Pass SwiftSelf in a callee saved register.
|
||||
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
|
||||
CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,
|
||||
|
||||
// A SwiftError is passed in X21.
|
||||
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
|
||||
CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
|
||||
|
||||
// Pass SwiftAsync in an otherwise callee saved register so that it will be
|
||||
// preserved for normal function calls.
|
||||
CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,
|
||||
|
||||
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
|
||||
|
||||
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
|
||||
// up to eight each of GPR and FPR.
|
||||
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
// i128 is split to two i64s, we can't fit half to register X7.
|
||||
CCIfType<[i64],
|
||||
CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6],
|
||||
[W0, W1, W2, W3, W4, W5, W6]>>>,
|
||||
CCIfSplit<CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6]>>>,
|
||||
// i128 is split to two i64s, and its stack alignment is 16 bytes.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
|
||||
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
|
||||
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
|
||||
CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
|
||||
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
|
||||
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
|
||||
CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
|
||||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
|
||||
// If more than will fit in registers, pass them on the stack instead.
|
||||
CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
|
||||
CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
|
||||
CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16 || ValVT == MVT::bf16",
|
||||
CCAssignToStack<2, 2>>,
|
||||
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
|
||||
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
|
||||
|
||||
// Re-demote pointers to 32-bits so we don't end up storing 64-bit
|
||||
// values and clobbering neighbouring stack locations. Not very pretty.
|
||||
CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
|
||||
CCIfPtr<CCIfILP32<CCAssignToStack<4, 4>>>,
|
||||
|
||||
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16],
|
||||
CCAssignToStack<8, 8>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
|
||||
CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
|
||||
let Entry = 1 in
|
||||
def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
|
||||
CCIfType<[iPTR], CCBitConvertToType<i64>>,
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
|
@ -198,41 +246,62 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
|
|||
|
||||
// Handle all scalar types as either i64 or f64.
|
||||
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
|
||||
CCIfType<[f16, f32], CCPromoteToType<f64>>,
|
||||
CCIfType<[f16, bf16, f32], CCPromoteToType<f64>>,
|
||||
|
||||
// Everything is on the stack.
|
||||
// i128 is split to two i64s, and its stack alignment is 16 bytes.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
|
||||
CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
|
||||
CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
|
||||
CCAssignToStack<8, 8>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
|
||||
CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
|
||||
// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the
|
||||
// same as the normal Darwin VarArgs handling.
|
||||
let Entry = 1 in
|
||||
def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
|
||||
|
||||
// Handle all scalar types as either i32 or f32.
|
||||
CCIfType<[i8, i16], CCPromoteToType<i32>>,
|
||||
CCIfType<[f16, bf16], CCPromoteToType<f32>>,
|
||||
|
||||
// Everything is on the stack.
|
||||
// i128 is split to two i64s, and its stack alignment is 16 bytes.
|
||||
CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
|
||||
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
|
||||
CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
|
||||
CCAssignToStack<8, 8>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
|
||||
CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
|
||||
|
||||
// The WebKit_JS calling convention only passes the first argument (the callee)
|
||||
// in register and the remaining arguments on stack. We allow 32bit stack slots,
|
||||
// so that WebKit can write partial values in the stack and define the other
|
||||
// 32bit quantity as undef.
|
||||
let Entry = 1 in
|
||||
def CC_AArch64_WebKit_JS : CallingConv<[
|
||||
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
|
||||
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
|
||||
CCIfType<[i32], CCAssignToReg<[W0]>>,
|
||||
CCIfType<[i64], CCAssignToReg<[X0]>>,
|
||||
|
||||
// Pass the remaining arguments on the stack instead.
|
||||
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
|
||||
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
|
||||
]>;
|
||||
|
||||
let Entry = 1 in
|
||||
def RetCC_AArch64_WebKit_JS : CallingConv<[
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
|
||||
CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
|
||||
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -257,6 +326,7 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
|
|||
// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
|
||||
// register mapping".
|
||||
|
||||
let Entry = 1 in
|
||||
def CC_AArch64_GHC : CallingConv<[
|
||||
CCIfType<[iPTR], CCBitConvertToType<i64>>,
|
||||
|
||||
|
@ -275,6 +345,12 @@ def CC_AArch64_GHC : CallingConv<[
|
|||
CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
|
||||
]>;
|
||||
|
||||
// The order of the callee-saves in this file is important, because the
|
||||
// FrameLowering code will use this order to determine the layout the
|
||||
// callee-save area in the stack frame. As can be observed below, Darwin
|
||||
// requires the frame-record (LR, FP) to be at the top the callee-save area,
|
||||
// whereas for other platforms they are at the bottom.
|
||||
|
||||
// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
|
||||
// presumably a callee to someone. External functions may not do so, but this
|
||||
// is currently safe since BL has LR as an implicit-def and what happens after a
|
||||
|
@ -283,11 +359,45 @@ def CC_AArch64_GHC : CallingConv<[
|
|||
// It would be better to model its preservation semantics properly (create a
|
||||
// vreg on entry, use it in RET & tail call generation; make that vreg def if we
|
||||
// end up saving LR as part of a call frame). Watch this space...
|
||||
def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
|
||||
X23, X24, X25, X26, X27, X28,
|
||||
def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
|
||||
X25, X26, X27, X28, LR, FP,
|
||||
D8, D9, D10, D11,
|
||||
D12, D13, D14, D15)>;
|
||||
|
||||
// A variant for treating X18 as callee saved, when interfacing with
|
||||
// code that needs X18 to be preserved.
|
||||
def CSR_AArch64_AAPCS_X18 : CalleeSavedRegs<(add X18, CSR_AArch64_AAPCS)>;
|
||||
|
||||
// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
|
||||
// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
|
||||
// and not (LR,FP) pairs.
|
||||
def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
|
||||
X25, X26, X27, X28, FP, LR,
|
||||
D8, D9, D10, D11,
|
||||
D12, D13, D14, D15)>;
|
||||
|
||||
// The Control Flow Guard check call uses a custom calling convention that also
|
||||
// preserves X0-X8 and Q0-Q7.
|
||||
def CSR_Win_AArch64_CFGuard_Check : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS,
|
||||
(sequence "X%u", 0, 8),
|
||||
(sequence "Q%u", 0, 7))>;
|
||||
|
||||
// AArch64 PCS for vector functions (VPCS)
|
||||
// must (additionally) preserve full Q8-Q23 registers
|
||||
def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
|
||||
X25, X26, X27, X28, LR, FP,
|
||||
(sequence "Q%u", 8, 23))>;
|
||||
|
||||
// Functions taking SVE arguments or returning an SVE type
|
||||
// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
|
||||
def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
|
||||
(sequence "P%u", 4, 15),
|
||||
X19, X20, X21, X22, X23, X24,
|
||||
X25, X26, X27, X28, LR, FP)>;
|
||||
|
||||
def CSR_AArch64_AAPCS_SwiftTail
|
||||
: CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X20, X22)>;
|
||||
|
||||
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
|
||||
// 'this' and the pointer return value are both passed in X0 in these cases,
|
||||
// this can be partially modelled by treating X0 as a callee-saved register;
|
||||
|
@ -301,32 +411,6 @@ def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
|
|||
def CSR_AArch64_AAPCS_SwiftError
|
||||
: CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
|
||||
|
||||
// The function used by Darwin to obtain the address of a thread-local variable
|
||||
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
|
||||
// fast path for calculation, but other registers except X0 (argument/return)
|
||||
// and LR (it is a call, after all) are preserved.
|
||||
def CSR_AArch64_TLS_Darwin
|
||||
: CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
|
||||
FP,
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
||||
// We can only handle a register pair with adjacent registers, the register pair
|
||||
// should belong to the same class as well. Since the access function on the
|
||||
// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
|
||||
// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
|
||||
def CSR_AArch64_CXX_TLS_Darwin
|
||||
: CalleeSavedRegs<(add CSR_AArch64_AAPCS,
|
||||
(sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
|
||||
(sequence "D%u", 0, 31))>;
|
||||
|
||||
// CSRs that are handled by prologue, epilogue.
|
||||
def CSR_AArch64_CXX_TLS_Darwin_PE
|
||||
: CalleeSavedRegs<(add LR, FP)>;
|
||||
|
||||
// CSRs that are handled explicitly via copies.
|
||||
def CSR_AArch64_CXX_TLS_Darwin_ViaCopy
|
||||
: CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>;
|
||||
|
||||
// The ELF stub used for TLS-descriptor access saves every feasible
|
||||
// register. Only X0 and LR are clobbered.
|
||||
def CSR_AArch64_TLS_ELF
|
||||
|
@ -350,17 +434,67 @@ def CSR_AArch64_StackProbe_Windows
|
|||
(sequence "X%u", 18, 28), FP, SP,
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
||||
// Darwin variants of AAPCS.
|
||||
// Darwin puts the frame-record at the top of the callee-save area.
|
||||
def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
|
||||
X23, X24, X25, X26, X27, X28,
|
||||
D8, D9, D10, D11,
|
||||
D12, D13, D14, D15)>;
|
||||
|
||||
def CSR_Darwin_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21,
|
||||
X22, X23, X24, X25, X26, X27,
|
||||
X28, (sequence "Q%u", 8, 23))>;
|
||||
def CSR_Darwin_AArch64_AAPCS_ThisReturn
|
||||
: CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, X0)>;
|
||||
|
||||
def CSR_Darwin_AArch64_AAPCS_SwiftError
|
||||
: CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>;
|
||||
|
||||
def CSR_Darwin_AArch64_AAPCS_SwiftTail
|
||||
: CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X20, X22)>;
|
||||
|
||||
// The function used by Darwin to obtain the address of a thread-local variable
|
||||
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
|
||||
// fast path for calculation, but other registers except X0 (argument/return)
|
||||
// and LR (it is a call, after all) are preserved.
|
||||
def CSR_Darwin_AArch64_TLS
|
||||
: CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
|
||||
FP,
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
||||
// We can only handle a register pair with adjacent registers, the register pair
|
||||
// should belong to the same class as well. Since the access function on the
|
||||
// fast path calls a function that follows CSR_Darwin_AArch64_TLS,
|
||||
// CSR_Darwin_AArch64_CXX_TLS should be a subset of CSR_Darwin_AArch64_TLS.
|
||||
def CSR_Darwin_AArch64_CXX_TLS
|
||||
: CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS,
|
||||
(sub (sequence "X%u", 1, 28), X9, X15, X16, X17, X18, X19),
|
||||
(sequence "D%u", 0, 31))>;
|
||||
|
||||
// CSRs that are handled by prologue, epilogue.
|
||||
def CSR_Darwin_AArch64_CXX_TLS_PE
|
||||
: CalleeSavedRegs<(add LR, FP)>;
|
||||
|
||||
// CSRs that are handled explicitly via copies.
|
||||
def CSR_Darwin_AArch64_CXX_TLS_ViaCopy
|
||||
: CalleeSavedRegs<(sub CSR_Darwin_AArch64_CXX_TLS, LR, FP)>;
|
||||
|
||||
def CSR_Darwin_AArch64_RT_MostRegs
|
||||
: CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sequence "X%u", 9, 15))>;
|
||||
|
||||
// Variants of the standard calling conventions for shadow call stack.
|
||||
// These all preserve x18 in addition to any other registers.
|
||||
def CSR_AArch64_NoRegs_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>;
|
||||
def CSR_AArch64_AllRegs_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>;
|
||||
def CSR_AArch64_CXX_TLS_Darwin_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_CXX_TLS_Darwin, X18)>;
|
||||
def CSR_AArch64_AAPCS_SwiftError_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
|
||||
def CSR_AArch64_RT_MostRegs_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
|
||||
def CSR_AArch64_AAVPCS_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
|
||||
def CSR_AArch64_SVE_AAPCS_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>;
|
||||
def CSR_AArch64_AAPCS_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
|
||||
|
|
|
@ -0,0 +1,233 @@
|
|||
//=- AArch64.td - Define AArch64 Combine Rules ---------------*- tablegen -*-=//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "llvm/Target/GlobalISel/Combine.td"
|
||||
|
||||
def fconstant_to_constant : GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (wip_match_opcode G_FCONSTANT):$root,
|
||||
[{ return matchFConstantToConstant(*${root}, MRI); }]),
|
||||
(apply [{ applyFConstantToConstant(*${root}); }])>;
|
||||
|
||||
def icmp_redundant_trunc_matchdata : GIDefMatchData<"Register">;
|
||||
def icmp_redundant_trunc : GICombineRule<
|
||||
(defs root:$root, icmp_redundant_trunc_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_ICMP):$root,
|
||||
[{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]),
|
||||
(apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>;
|
||||
|
||||
// AArch64-specific offset folding for G_GLOBAL_VALUE.
|
||||
def fold_global_offset_matchdata : GIDefMatchData<"std::pair<uint64_t, uint64_t>">;
|
||||
def fold_global_offset : GICombineRule<
|
||||
(defs root:$root, fold_global_offset_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_GLOBAL_VALUE):$root,
|
||||
[{ return matchFoldGlobalOffset(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ return applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}])
|
||||
>;
|
||||
|
||||
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
|
||||
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
|
||||
fconstant_to_constant,
|
||||
icmp_redundant_trunc,
|
||||
fold_global_offset]> {
|
||||
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
|
||||
let StateClass = "AArch64PreLegalizerCombinerHelperState";
|
||||
let AdditionalArguments = [];
|
||||
}
|
||||
|
||||
def AArch64O0PreLegalizerCombinerHelper: GICombinerHelper<
|
||||
"AArch64GenO0PreLegalizerCombinerHelper", [optnone_combines]> {
|
||||
let DisableRuleOption = "aarch64O0prelegalizercombiner-disable-rule";
|
||||
let StateClass = "AArch64O0PreLegalizerCombinerHelperState";
|
||||
let AdditionalArguments = [];
|
||||
}
|
||||
|
||||
// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a
|
||||
// target-specific opcode.
|
||||
def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">;
|
||||
|
||||
def rev : GICombineRule<
|
||||
(defs root:$root, shuffle_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
||||
[{ return matchREV(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def zip : GICombineRule<
|
||||
(defs root:$root, shuffle_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
||||
[{ return matchZip(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def uzp : GICombineRule<
|
||||
(defs root:$root, shuffle_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
||||
[{ return matchUZP(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def dup: GICombineRule <
|
||||
(defs root:$root, shuffle_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
||||
[{ return matchDup(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def trn : GICombineRule<
|
||||
(defs root:$root, shuffle_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
||||
[{ return matchTRN(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def ext: GICombineRule <
|
||||
(defs root:$root, shuffle_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
||||
[{ return matchEXT(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyEXT(*${root}, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def shuf_to_ins_matchdata : GIDefMatchData<"std::tuple<Register, int, Register, int>">;
|
||||
def shuf_to_ins: GICombineRule <
|
||||
(defs root:$root, shuf_to_ins_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
||||
[{ return matchINS(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ return applyINS(*${root}, MRI, B, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
|
||||
def vashr_vlshr_imm : GICombineRule<
|
||||
(defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_ASHR, G_LSHR):$root,
|
||||
[{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def form_duplane_matchdata :
|
||||
GIDefMatchData<"std::pair<unsigned, int>">;
|
||||
def form_duplane : GICombineRule <
|
||||
(defs root:$root, form_duplane_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
||||
[{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
|
||||
form_duplane,
|
||||
shuf_to_ins]>;
|
||||
|
||||
def adjust_icmp_imm_matchdata :
|
||||
GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
|
||||
def adjust_icmp_imm : GICombineRule <
|
||||
(defs root:$root, adjust_icmp_imm_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_ICMP):$root,
|
||||
[{ return matchAdjustICmpImmAndPred(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }])
|
||||
>;
|
||||
|
||||
def swap_icmp_operands : GICombineRule <
|
||||
(defs root:$root),
|
||||
(match (wip_match_opcode G_ICMP):$root,
|
||||
[{ return trySwapICmpOperands(*${root}, MRI); }]),
|
||||
(apply [{ applySwapICmpOperands(*${root}, Observer); }])
|
||||
>;
|
||||
|
||||
def icmp_lowering : GICombineGroup<[adjust_icmp_imm, swap_icmp_operands]>;
|
||||
|
||||
def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
|
||||
def extractvecelt_pairwise_add : GICombineRule<
|
||||
(defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
|
||||
[{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def mul_const_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder&, Register)>">;
|
||||
def mul_const : GICombineRule<
|
||||
(defs root:$root, mul_const_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_MUL):$root,
|
||||
[{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def build_vector_to_dup : GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (wip_match_opcode G_BUILD_VECTOR):$root,
|
||||
[{ return matchBuildVectorToDup(*${root}, MRI); }]),
|
||||
(apply [{ return applyBuildVectorToDup(*${root}, MRI, B); }])
|
||||
>;
|
||||
|
||||
def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
|
||||
|
||||
def lower_vector_fcmp : GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (wip_match_opcode G_FCMP):$root,
|
||||
[{ return lowerVectorFCMP(*${root}, MRI, B); }]),
|
||||
(apply [{}])>;
|
||||
|
||||
def form_truncstore_matchdata : GIDefMatchData<"Register">;
|
||||
def form_truncstore : GICombineRule<
|
||||
(defs root:$root, form_truncstore_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_STORE):$root,
|
||||
[{ return matchFormTruncstore(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyFormTruncstore(*${root}, MRI, B, Observer, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def fold_merge_to_zext : GICombineRule<
|
||||
(defs root:$d),
|
||||
(match (wip_match_opcode G_MERGE_VALUES):$d,
|
||||
[{ return matchFoldMergeToZext(*${d}, MRI); }]),
|
||||
(apply [{ applyFoldMergeToZext(*${d}, MRI, B, Observer); }])
|
||||
>;
|
||||
|
||||
def mutate_anyext_to_zext : GICombineRule<
|
||||
(defs root:$d),
|
||||
(match (wip_match_opcode G_ANYEXT):$d,
|
||||
[{ return matchMutateAnyExtToZExt(*${d}, MRI); }]),
|
||||
(apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }])
|
||||
>;
|
||||
|
||||
def split_store_zero_128 : GICombineRule<
|
||||
(defs root:$d),
|
||||
(match (wip_match_opcode G_STORE):$d,
|
||||
[{ return matchSplitStoreZero128(*${d}, MRI); }]),
|
||||
(apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }])
|
||||
>;
|
||||
|
||||
// Post-legalization combines which should happen at all optimization levels.
|
||||
// (E.g. ones that facilitate matching for the selector) For example, matching
|
||||
// pseudos.
|
||||
def AArch64PostLegalizerLoweringHelper
|
||||
: GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
|
||||
[shuffle_vector_lowering, vashr_vlshr_imm,
|
||||
icmp_lowering, build_vector_lowering,
|
||||
lower_vector_fcmp, form_truncstore]> {
|
||||
let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
|
||||
}
|
||||
|
||||
// Post-legalization combines which are primarily optimizations.
|
||||
def AArch64PostLegalizerCombinerHelper
|
||||
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
|
||||
[copy_prop, erase_undef_store, combines_for_extload,
|
||||
sext_trunc_sextload, mutate_anyext_to_zext,
|
||||
hoist_logic_op_with_same_opcode_hands,
|
||||
redundant_and, xor_of_and_with_same_reg,
|
||||
extractvecelt_pairwise_add, redundant_or,
|
||||
mul_const, redundant_sext_inreg,
|
||||
form_bitfield_extract, rotate_out_of_range,
|
||||
icmp_to_true_false_known_bits, merge_unmerge,
|
||||
select_combines, fold_merge_to_zext,
|
||||
constant_fold, identity_combines,
|
||||
ptr_add_immed_chain, overlapping_and,
|
||||
split_store_zero_128]> {
|
||||
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
|
||||
}
|
|
@ -0,0 +1,275 @@
|
|||
//===- AArch64GenRegisterBankInfo.def ----------------------------*- C++ -*-==//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
/// This file defines all the static objects used by AArch64RegisterBankInfo.
|
||||
/// \todo This should be generated by TableGen.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace llvm {
|
||||
RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
|
||||
/* StartIdx, Length, RegBank */
|
||||
// 0: FPR 16-bit value.
|
||||
{0, 16, AArch64::FPRRegBank},
|
||||
// 1: FPR 32-bit value.
|
||||
{0, 32, AArch64::FPRRegBank},
|
||||
// 2: FPR 64-bit value.
|
||||
{0, 64, AArch64::FPRRegBank},
|
||||
// 3: FPR 128-bit value.
|
||||
{0, 128, AArch64::FPRRegBank},
|
||||
// 4: FPR 256-bit value.
|
||||
{0, 256, AArch64::FPRRegBank},
|
||||
// 5: FPR 512-bit value.
|
||||
{0, 512, AArch64::FPRRegBank},
|
||||
// 6: GPR 32-bit value.
|
||||
{0, 32, AArch64::GPRRegBank},
|
||||
// 7: GPR 64-bit value.
|
||||
{0, 64, AArch64::GPRRegBank},
|
||||
// 8: GPR 128-bit value.
|
||||
{0, 128, AArch64::GPRRegBank},
|
||||
};
|
||||
|
||||
// ValueMappings.
|
||||
RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
|
||||
/* BreakDown, NumBreakDowns */
|
||||
// 0: invalid
|
||||
{nullptr, 0},
|
||||
// 3-operands instructions (all binary operations should end up with one of
|
||||
// those mapping).
|
||||
// 1: FPR 16-bit value. <-- This must match First3OpsIdx.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
|
||||
// 4: FPR 32-bit value. <-- This must match First3OpsIdx.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
|
||||
// 7: FPR 64-bit value.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
|
||||
// 10: FPR 128-bit value.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
|
||||
// 13: FPR 256-bit value.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
|
||||
// 16: FPR 512-bit value.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
|
||||
// 19: GPR 32-bit value.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
|
||||
// 22: GPR 64-bit value.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
|
||||
// 25: GPR 128-bit value. <-- This must match Last3OpsIdx.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1},
|
||||
// Cross register bank copies.
|
||||
// 28: FPR 16-bit value to GPR 16-bit. <-- This must match
|
||||
// FirstCrossRegCpyIdx.
|
||||
// Note: This is the kind of copy we see with physical registers.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
|
||||
// 30: FPR 32-bit value to GPR 32-bit value.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
|
||||
// 32: FPR 64-bit value to GPR 64-bit value.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
|
||||
// 34: FPR 128-bit value to GPR 128-bit value (invalid)
|
||||
{nullptr, 1},
|
||||
{nullptr, 1},
|
||||
// 36: FPR 256-bit value to GPR 256-bit value (invalid)
|
||||
{nullptr, 1},
|
||||
{nullptr, 1},
|
||||
// 38: FPR 512-bit value to GPR 512-bit value (invalid)
|
||||
{nullptr, 1},
|
||||
{nullptr, 1},
|
||||
// 40: GPR 32-bit value to FPR 32-bit value.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
|
||||
// 42: GPR 64-bit value to FPR 64-bit value. <-- This must match
|
||||
// LastCrossRegCpyIdx.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
|
||||
// 44: FPExt: 16 to 32. <-- This must match FPExt16To32Idx.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
|
||||
// 46: FPExt: 16 to 32. <-- This must match FPExt16To64Idx.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
|
||||
// 48: FPExt: 32 to 64. <-- This must match FPExt32To64Idx.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
|
||||
// 50: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx.
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
|
||||
// 52: Shift scalar with 64 bit shift imm
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
|
||||
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
|
||||
};
|
||||
|
||||
bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,
|
||||
unsigned ValStartIdx,
|
||||
unsigned ValLength,
|
||||
const RegisterBank &RB) {
|
||||
const PartialMapping &Map = PartMappings[Idx - PartialMappingIdx::PMI_Min];
|
||||
return Map.StartIdx == ValStartIdx && Map.Length == ValLength &&
|
||||
Map.RegBank == &RB;
|
||||
}
|
||||
|
||||
bool AArch64GenRegisterBankInfo::checkValueMapImpl(unsigned Idx,
|
||||
unsigned FirstInBank,
|
||||
unsigned Size,
|
||||
unsigned Offset) {
|
||||
unsigned PartialMapBaseIdx = Idx - PartialMappingIdx::PMI_Min;
|
||||
const ValueMapping &Map =
|
||||
AArch64GenRegisterBankInfo::getValueMapping((PartialMappingIdx)FirstInBank, Size)[Offset];
|
||||
return Map.BreakDown == &PartMappings[PartialMapBaseIdx] &&
|
||||
Map.NumBreakDowns == 1;
|
||||
}
|
||||
|
||||
bool AArch64GenRegisterBankInfo::checkPartialMappingIdx(
|
||||
PartialMappingIdx FirstAlias, PartialMappingIdx LastAlias,
|
||||
ArrayRef<PartialMappingIdx> Order) {
|
||||
if (Order.front() != FirstAlias)
|
||||
return false;
|
||||
if (Order.back() != LastAlias)
|
||||
return false;
|
||||
if (Order.front() > Order.back())
|
||||
return false;
|
||||
|
||||
PartialMappingIdx Previous = Order.front();
|
||||
bool First = true;
|
||||
for (const auto &Current : Order) {
|
||||
if (First) {
|
||||
First = false;
|
||||
continue;
|
||||
}
|
||||
if (Previous + 1 != Current)
|
||||
return false;
|
||||
Previous = Current;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
|
||||
unsigned Size) {
|
||||
if (RBIdx == PMI_FirstGPR) {
|
||||
if (Size <= 32)
|
||||
return 0;
|
||||
if (Size <= 64)
|
||||
return 1;
|
||||
if (Size <= 128)
|
||||
return 2;
|
||||
return -1;
|
||||
}
|
||||
if (RBIdx == PMI_FirstFPR) {
|
||||
if (Size <= 16)
|
||||
return 0;
|
||||
if (Size <= 32)
|
||||
return 1;
|
||||
if (Size <= 64)
|
||||
return 2;
|
||||
if (Size <= 128)
|
||||
return 3;
|
||||
if (Size <= 256)
|
||||
return 4;
|
||||
if (Size <= 512)
|
||||
return 5;
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
const RegisterBankInfo::ValueMapping *
|
||||
AArch64GenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx,
|
||||
unsigned Size) {
|
||||
assert(RBIdx != PartialMappingIdx::PMI_None && "No mapping needed for that");
|
||||
unsigned BaseIdxOffset = getRegBankBaseIdxOffset(RBIdx, Size);
|
||||
if (BaseIdxOffset == -1u)
|
||||
return &ValMappings[InvalidIdx];
|
||||
|
||||
unsigned ValMappingIdx =
|
||||
First3OpsIdx + (RBIdx - PartialMappingIdx::PMI_Min + BaseIdxOffset) *
|
||||
ValueMappingIdx::DistanceBetweenRegBanks;
|
||||
assert(ValMappingIdx >= First3OpsIdx && ValMappingIdx <= Last3OpsIdx &&
|
||||
"Mapping out of bound");
|
||||
|
||||
return &ValMappings[ValMappingIdx];
|
||||
}
|
||||
|
||||
AArch64GenRegisterBankInfo::PartialMappingIdx
|
||||
AArch64GenRegisterBankInfo::BankIDToCopyMapIdx[]{
|
||||
PMI_None, // CCR
|
||||
PMI_FirstFPR, // FPR
|
||||
PMI_FirstGPR, // GPR
|
||||
};
|
||||
|
||||
const RegisterBankInfo::ValueMapping *
|
||||
AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID,
|
||||
unsigned SrcBankID, unsigned Size) {
|
||||
assert(DstBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
|
||||
assert(SrcBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
|
||||
PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];
|
||||
PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID];
|
||||
assert(DstRBIdx != PMI_None && "No such mapping");
|
||||
assert(SrcRBIdx != PMI_None && "No such mapping");
|
||||
|
||||
if (DstRBIdx == SrcRBIdx)
|
||||
return getValueMapping(DstRBIdx, Size);
|
||||
|
||||
assert(Size <= 64 && "GPR cannot handle that size");
|
||||
unsigned ValMappingIdx =
|
||||
FirstCrossRegCpyIdx +
|
||||
(DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(DstRBIdx, Size)) *
|
||||
ValueMappingIdx::DistanceBetweenCrossRegCpy;
|
||||
assert(ValMappingIdx >= FirstCrossRegCpyIdx &&
|
||||
ValMappingIdx <= LastCrossRegCpyIdx && "Mapping out of bound");
|
||||
return &ValMappings[ValMappingIdx];
|
||||
}
|
||||
|
||||
const RegisterBankInfo::ValueMapping *
|
||||
AArch64GenRegisterBankInfo::getFPExtMapping(unsigned DstSize,
|
||||
unsigned SrcSize) {
|
||||
// We support:
|
||||
// - For Scalar:
|
||||
// - 16 to 32.
|
||||
// - 16 to 64.
|
||||
// - 32 to 64.
|
||||
// => FPR 16 to FPR 32|64
|
||||
// => FPR 32 to FPR 64
|
||||
// - For vectors:
|
||||
// - v4f16 to v4f32
|
||||
// - v2f32 to v2f64
|
||||
// => FPR 64 to FPR 128
|
||||
|
||||
// Check that we have been asked sensible sizes.
|
||||
if (SrcSize == 16) {
|
||||
assert((DstSize == 32 || DstSize == 64) && "Unexpected half extension");
|
||||
if (DstSize == 32)
|
||||
return &ValMappings[FPExt16To32Idx];
|
||||
return &ValMappings[FPExt16To64Idx];
|
||||
}
|
||||
|
||||
if (SrcSize == 32) {
|
||||
assert(DstSize == 64 && "Unexpected float extension");
|
||||
return &ValMappings[FPExt32To64Idx];
|
||||
}
|
||||
assert((SrcSize == 64 || DstSize == 128) && "Unexpected vector extension");
|
||||
return &ValMappings[FPExt64To128Idx];
|
||||
}
|
||||
} // End llvm namespace.
|
|
@ -1,9 +1,8 @@
|
|||
//=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -16,9 +15,9 @@
|
|||
//===----------------------------------
|
||||
let AddedComplexity = 15, Size = 0 in
|
||||
def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering),
|
||||
[(atomic_fence imm:$ordering, 0)]>, Sched<[]>;
|
||||
def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
|
||||
def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
|
||||
[(atomic_fence timm:$ordering, 0)]>, Sched<[]>;
|
||||
def : Pat<(atomic_fence (i64 4), (timm)), (DMB (i32 0x9))>;
|
||||
def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>;
|
||||
|
||||
//===----------------------------------
|
||||
// Atomic loads
|
||||
|
@ -103,6 +102,34 @@ def : Pat<(relaxed_load<atomic_load_64>
|
|||
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
|
||||
(LDURXi GPR64sp:$Rn, simm9:$offset)>;
|
||||
|
||||
// FP 32-bit loads
|
||||
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
|
||||
ro_Wextend32:$extend))))),
|
||||
(LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
|
||||
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
|
||||
ro_Xextend32:$extend))))),
|
||||
(LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
|
||||
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (am_indexed32 GPR64sp:$Rn,
|
||||
uimm12s8:$offset))))),
|
||||
(LDRSui GPR64sp:$Rn, uimm12s8:$offset)>;
|
||||
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32>
|
||||
(am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
|
||||
(LDURSi GPR64sp:$Rn, simm9:$offset)>;
|
||||
|
||||
// FP 64-bit loads
|
||||
def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
|
||||
ro_Wextend64:$extend))))),
|
||||
(LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
|
||||
def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
|
||||
ro_Xextend64:$extend))))),
|
||||
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
|
||||
def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (am_indexed64 GPR64sp:$Rn,
|
||||
uimm12s8:$offset))))),
|
||||
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
|
||||
def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64>
|
||||
(am_unscaled64 GPR64sp:$Rn, simm9:$offset))))),
|
||||
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
|
||||
|
||||
//===----------------------------------
|
||||
// Atomic stores
|
||||
//===----------------------------------
|
||||
|
@ -197,6 +224,38 @@ def : Pat<(relaxed_store<atomic_store_64>
|
|||
(am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
|
||||
(STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;
|
||||
|
||||
// FP 32-bit stores
|
||||
def : Pat<(relaxed_store<atomic_store_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
|
||||
ro_Wextend32:$extend),
|
||||
(i32 (bitconvert (f32 FPR32Op:$val)))),
|
||||
(STRSroW FPR32Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
|
||||
def : Pat<(relaxed_store<atomic_store_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
|
||||
ro_Xextend32:$extend),
|
||||
(i32 (bitconvert (f32 FPR32Op:$val)))),
|
||||
(STRSroX FPR32Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
|
||||
def : Pat<(relaxed_store<atomic_store_32>
|
||||
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
|
||||
(STRSui FPR32Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
|
||||
def : Pat<(relaxed_store<atomic_store_32>
|
||||
(am_unscaled32 GPR64sp:$Rn, simm9:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
|
||||
(STURSi FPR32Op:$val, GPR64sp:$Rn, simm9:$offset)>;
|
||||
|
||||
// FP 64-bit stores
|
||||
def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
|
||||
ro_Wextend64:$extend),
|
||||
(i64 (bitconvert (f64 FPR64Op:$val)))),
|
||||
(STRDroW FPR64Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
|
||||
def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
|
||||
ro_Xextend64:$extend),
|
||||
(i64 (bitconvert (f64 FPR64Op:$val)))),
|
||||
(STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
|
||||
def : Pat<(relaxed_store<atomic_store_64>
|
||||
(am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
|
||||
(STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
|
||||
def : Pat<(relaxed_store<atomic_store_64>
|
||||
(am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
|
||||
(STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;
|
||||
|
||||
//===----------------------------------
|
||||
// Low-level exclusive operations
|
||||
//===----------------------------------
|
||||
|
@ -205,19 +264,27 @@ def : Pat<(relaxed_store<atomic_store_64>
|
|||
|
||||
def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
|
||||
}
|
||||
|
||||
def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
|
||||
}
|
||||
|
||||
def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
|
||||
}
|
||||
|
||||
def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
|
||||
}
|
||||
|
||||
def : Pat<(ldxr_1 GPR64sp:$addr),
|
||||
(SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>;
|
||||
|
@ -238,19 +305,27 @@ def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff),
|
|||
|
||||
def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
|
||||
}
|
||||
|
||||
def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
|
||||
}
|
||||
|
||||
def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
|
||||
}
|
||||
|
||||
def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
|
||||
}
|
||||
|
||||
def : Pat<(ldaxr_1 GPR64sp:$addr),
|
||||
(SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>;
|
||||
|
@ -272,22 +347,30 @@ def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff),
|
|||
def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_aarch64_stxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
|
||||
}
|
||||
|
||||
def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_aarch64_stxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
|
||||
}
|
||||
|
||||
def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_aarch64_stxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
|
||||
}
|
||||
|
||||
def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_aarch64_stxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
|
||||
}
|
||||
|
||||
|
||||
def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr),
|
||||
|
@ -318,22 +401,30 @@ def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
|
|||
def stlxr_1 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_aarch64_stlxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }];
|
||||
}
|
||||
|
||||
def stlxr_2 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_aarch64_stlxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }];
|
||||
}
|
||||
|
||||
def stlxr_4 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_aarch64_stlxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }];
|
||||
}
|
||||
|
||||
def stlxr_8 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_aarch64_stlxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||
}]>;
|
||||
}]> {
|
||||
let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }];
|
||||
}
|
||||
|
||||
|
||||
def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr),
|
||||
|
@ -398,11 +489,16 @@ def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
|
|||
}
|
||||
|
||||
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch",
|
||||
mayLoad = 1, mayStore = 1 in
|
||||
def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch),
|
||||
(ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
|
||||
GPR64:$newLo, GPR64:$newHi), []>,
|
||||
Sched<[WriteAtomic]>;
|
||||
mayLoad = 1, mayStore = 1 in {
|
||||
class cmp_swap_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32common:$scratch),
|
||||
(ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
|
||||
GPR64:$newLo, GPR64:$newHi), []>,
|
||||
Sched<[WriteAtomic]>;
|
||||
def CMP_SWAP_128 : cmp_swap_128;
|
||||
def CMP_SWAP_128_RELEASE : cmp_swap_128;
|
||||
def CMP_SWAP_128_ACQUIRE : cmp_swap_128;
|
||||
def CMP_SWAP_128_MONOTONIC : cmp_swap_128;
|
||||
}
|
||||
|
||||
// v8.1 Atomic instructions:
|
||||
let Predicates = [HasLSE] in {
|
||||
|
@ -423,4 +519,3 @@ let Predicates = [HasLSE] in {
|
|||
defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">;
|
||||
defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,287 @@
|
|||
//=----- AArch64InstrGISel.td - AArch64 GISel target pseudos -*- tablegen -*-=//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// AArch64 GlobalISel target pseudo instruction definitions. This is kept
|
||||
// separately from the other tablegen files for organizational purposes, but
|
||||
// share the same infrastructure.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
class AArch64GenericInstruction : GenericInstruction {
|
||||
let Namespace = "AArch64";
|
||||
}
|
||||
|
||||
// A pseudo to represent a relocatable add instruction as part of address
|
||||
// computation.
|
||||
def G_ADD_LOW : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type1:$src, type2:$imm);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Pseudo for a rev16 instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_REV16 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Pseudo for a rev32 instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_REV32 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Pseudo for a rev64 instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_REV64 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents an uzp1 instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_UZP1 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$v1, type0:$v2);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents an uzp2 instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_UZP2 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$v1, type0:$v2);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents a zip1 instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_ZIP1 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$v1, type0:$v2);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents a zip2 instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_ZIP2 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$v1, type0:$v2);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents a dup instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_DUP: AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type1:$lane);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents a lane duplicate operation.
|
||||
def G_DUPLANE8 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src, type1:$lane);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
def G_DUPLANE16 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src, type1:$lane);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
def G_DUPLANE32 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src, type1:$lane);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
def G_DUPLANE64 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src, type1:$lane);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents a trn1 instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_TRN1 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$v1, type0:$v2);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents a trn2 instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_TRN2 : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$v1, type0:$v2);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents an ext instruction. Produced post-legalization from
|
||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||
def G_EXT: AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents a vector G_ASHR with an immediate.
|
||||
def G_VASHR : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents a vector G_LSHR with an immediate.
|
||||
def G_VLSHR : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Represents an integer to FP conversion on the FPR bank.
|
||||
def G_SITOF : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
def G_UITOF : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_FCMEQ : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src1, type1:$src2);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_FCMGE : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src1, type1:$src2);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_FCMGT : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src1, type1:$src2);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_FCMEQZ : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_FCMGEZ : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_FCMGTZ : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_FCMLEZ : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_FCMLTZ : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def : GINodeEquiv<G_REV16, AArch64rev16>;
|
||||
def : GINodeEquiv<G_REV32, AArch64rev32>;
|
||||
def : GINodeEquiv<G_REV64, AArch64rev64>;
|
||||
def : GINodeEquiv<G_UZP1, AArch64uzp1>;
|
||||
def : GINodeEquiv<G_UZP2, AArch64uzp2>;
|
||||
def : GINodeEquiv<G_ZIP1, AArch64zip1>;
|
||||
def : GINodeEquiv<G_ZIP2, AArch64zip2>;
|
||||
def : GINodeEquiv<G_DUP, AArch64dup>;
|
||||
def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>;
|
||||
def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>;
|
||||
def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>;
|
||||
def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>;
|
||||
def : GINodeEquiv<G_TRN1, AArch64trn1>;
|
||||
def : GINodeEquiv<G_TRN2, AArch64trn2>;
|
||||
def : GINodeEquiv<G_EXT, AArch64ext>;
|
||||
def : GINodeEquiv<G_VASHR, AArch64vashr>;
|
||||
def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
|
||||
def : GINodeEquiv<G_SITOF, AArch64sitof>;
|
||||
def : GINodeEquiv<G_UITOF, AArch64uitof>;
|
||||
|
||||
def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
|
||||
def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
|
||||
def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
|
||||
|
||||
def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
|
||||
def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
|
||||
def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
|
||||
def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
|
||||
def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
|
||||
|
||||
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
|
||||
|
||||
// These are patterns that we only use for GlobalISel via the importer.
|
||||
def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
|
||||
(vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
|
||||
(f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>;
|
||||
|
||||
let Predicates = [HasNEON] in {
|
||||
def : Pat<(v2f64 (sint_to_fp v2i32:$src)),
|
||||
(SCVTFv2f64 (SSHLLv2i32_shift V64:$src, 0))>;
|
||||
def : Pat<(v2f64 (uint_to_fp v2i32:$src)),
|
||||
(UCVTFv2f64 (USHLLv2i32_shift V64:$src, 0))>;
|
||||
def : Pat<(v2f32 (sint_to_fp v2i64:$src)),
|
||||
(FCVTNv2i32 (SCVTFv2f64 V128:$src))>;
|
||||
def : Pat<(v2f32 (uint_to_fp v2i64:$src)),
|
||||
(FCVTNv2i32 (UCVTFv2f64 V128:$src))>;
|
||||
|
||||
def : Pat<(v2i64 (fp_to_sint v2f32:$src)),
|
||||
(FCVTZSv2f64 (FCVTLv2i32 V64:$src))>;
|
||||
def : Pat<(v2i64 (fp_to_uint v2f32:$src)),
|
||||
(FCVTZUv2f64 (FCVTLv2i32 V64:$src))>;
|
||||
def : Pat<(v2i32 (fp_to_sint v2f64:$src)),
|
||||
(XTNv2i32 (FCVTZSv2f64 V128:$src))>;
|
||||
def : Pat<(v2i32 (fp_to_uint v2f64:$src)),
|
||||
(XTNv2i32 (FCVTZUv2f64 V128:$src))>;
|
||||
|
||||
}
|
||||
|
||||
let Predicates = [HasNoLSE] in {
|
||||
def : Pat<(atomic_cmp_swap_8 GPR64:$addr, GPR32:$desired, GPR32:$new),
|
||||
(CMP_SWAP_8 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
|
||||
|
||||
def : Pat<(atomic_cmp_swap_16 GPR64:$addr, GPR32:$desired, GPR32:$new),
|
||||
(CMP_SWAP_16 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
|
||||
|
||||
def : Pat<(atomic_cmp_swap_32 GPR64:$addr, GPR32:$desired, GPR32:$new),
|
||||
(CMP_SWAP_32 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
|
||||
|
||||
def : Pat<(atomic_cmp_swap_64 GPR64:$addr, GPR64:$desired, GPR64:$new),
|
||||
(CMP_SWAP_64 GPR64:$addr, GPR64:$desired, GPR64:$new)>;
|
||||
}
|
||||
|
||||
def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
|
||||
(STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
|
||||
def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
|
||||
(STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,18 @@
|
|||
//===-- AArch64PfmCounters.td - AArch64 Hardware Counters --*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This describes the available hardware counters for AArch64.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def CpuCyclesPfmCounter : PfmCounter<"CPU_CYCLES">;
|
||||
|
||||
def DefaultPfmCounters : ProcPfmCounters {
|
||||
let CycleCounter = CpuCyclesPfmCounter;
|
||||
}
|
||||
def : PfmCountersDefaultBinding<DefaultPfmCounters>;
|
|
@ -1,9 +1,8 @@
|
|||
//=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -11,7 +10,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// General Purpose Registers: W, X.
|
||||
def GPRRegBank : RegisterBank<"GPR", [GPR64all]>;
|
||||
def GPRRegBank : RegisterBank<"GPR", [XSeqPairsClass]>;
|
||||
|
||||
/// Floating Point/Vector Registers: B, H, S, D, Q.
|
||||
def FPRRegBank : RegisterBank<"FPR", [QQQQ]>;
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
//=- AArch64RegisterInfo.td - Describe the AArch64 Registers -*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -25,11 +24,9 @@ let Namespace = "AArch64" in {
|
|||
def bsub : SubRegIndex<8>;
|
||||
def hsub : SubRegIndex<16>;
|
||||
def ssub : SubRegIndex<32>;
|
||||
def dsub : SubRegIndex<32>;
|
||||
def dsub : SubRegIndex<64>;
|
||||
def sube32 : SubRegIndex<32>;
|
||||
def subo32 : SubRegIndex<32>;
|
||||
def qhisub : SubRegIndex<64>;
|
||||
def qsub : SubRegIndex<64>;
|
||||
def sube64 : SubRegIndex<64>;
|
||||
def subo64 : SubRegIndex<64>;
|
||||
// SVE
|
||||
|
@ -48,6 +45,16 @@ let Namespace = "AArch64" in {
|
|||
def qsub1 : SubRegIndex<128>;
|
||||
def qsub2 : SubRegIndex<128>;
|
||||
def qsub3 : SubRegIndex<128>;
|
||||
// Note: Code depends on these having consecutive numbers
|
||||
def zasubb : SubRegIndex<2048>; // (16 x 16)/1 bytes = 2048 bits
|
||||
def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits
|
||||
def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits
|
||||
def zasubs0 : SubRegIndex<512>; // (16 x 16)/4 bytes = 512 bits
|
||||
def zasubs1 : SubRegIndex<512>; // (16 x 16)/4 bytes = 512 bits
|
||||
def zasubd0 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits
|
||||
def zasubd1 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits
|
||||
def zasubq0 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits
|
||||
def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits
|
||||
}
|
||||
|
||||
let Namespace = "AArch64" in {
|
||||
|
@ -134,6 +141,9 @@ def NZCV : AArch64Reg<0, "nzcv">;
|
|||
// First fault status register
|
||||
def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>;
|
||||
|
||||
// Purely virtual Vector Granule (VG) Dwarf register
|
||||
def VG : AArch64Reg<0, "vg">, DwarfRegNum<[46]>;
|
||||
|
||||
// GPR register classes with the intersections of GPR32/GPR32sp and
|
||||
// GPR64/GPR64sp for use by the coalescer.
|
||||
def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> {
|
||||
|
@ -188,6 +198,10 @@ def GPR64z : RegisterOperand<GPR64> {
|
|||
let GIZeroRegister = XZR;
|
||||
}
|
||||
|
||||
// GPR argument registers.
|
||||
def GPR32arg : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 7)>;
|
||||
def GPR64arg : RegisterClass<"AArch64", [i64], 64, (sequence "X%u", 0, 7)>;
|
||||
|
||||
// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
|
||||
// constraint used by any instructions, it is used as a common super-class.
|
||||
def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>;
|
||||
|
@ -200,6 +214,17 @@ def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X2
|
|||
X22, X23, X24, X25, X26,
|
||||
X27, X28, FP, LR)>;
|
||||
|
||||
// Restricted set of tail call registers, for use when branch target
|
||||
// enforcement is enabled. These are the only registers which can be used to
|
||||
// indirectly branch (not call) to the "BTI c" instruction at the start of a
|
||||
// BTI-protected function.
|
||||
def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>;
|
||||
|
||||
// Register set that excludes registers that are reserved for procedure calls.
|
||||
// This is used for pseudo-instructions that are actually implemented using a
|
||||
// procedure call.
|
||||
def GPR64noip : RegisterClass<"AArch64", [i64], 64, (sub GPR64, X16, X17, LR)>;
|
||||
|
||||
// GPR register classes for post increment amount of vector load/store that
|
||||
// has alternate printing when Rm=31 and prints a constant immediate value
|
||||
// equal to the total number of bytes transferred.
|
||||
|
@ -408,25 +433,35 @@ def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
|
|||
def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> {
|
||||
let Size = 8;
|
||||
}
|
||||
def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> {
|
||||
def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> {
|
||||
let Size = 16;
|
||||
}
|
||||
|
||||
def FPR16_lo : RegisterClass<"AArch64", [f16], 16, (trunc FPR16, 16)> {
|
||||
let Size = 16;
|
||||
}
|
||||
def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
|
||||
def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
|
||||
v1i64, v4f16],
|
||||
64, (sequence "D%u", 0, 31)>;
|
||||
v1i64, v4f16, v4bf16],
|
||||
64, (sequence "D%u", 0, 31)>;
|
||||
def FPR64_lo : RegisterClass<"AArch64",
|
||||
[v8i8, v4i16, v2i32, v1i64, v4f16, v4bf16, v2f32,
|
||||
v1f64],
|
||||
64, (trunc FPR64, 16)>;
|
||||
|
||||
// We don't (yet) have an f128 legal type, so don't use that here. We
|
||||
// normalize 128-bit vectors to v2f64 for arg passing and such, so use
|
||||
// that here.
|
||||
def FPR128 : RegisterClass<"AArch64",
|
||||
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128,
|
||||
v8f16],
|
||||
v8f16, v8bf16],
|
||||
128, (sequence "Q%u", 0, 31)>;
|
||||
|
||||
// The lower 16 vector registers. Some instructions can only take registers
|
||||
// in this range.
|
||||
def FPR128_lo : RegisterClass<"AArch64",
|
||||
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
|
||||
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16,
|
||||
v8bf16],
|
||||
128, (trunc FPR128, 16)>;
|
||||
|
||||
// Pairs, triples, and quads of 64-bit vector registers.
|
||||
|
@ -467,7 +502,7 @@ def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> {
|
|||
|
||||
|
||||
// Vector operand versions of the FP registers. Alternate name printing and
|
||||
// assmebler matching.
|
||||
// assembler matching.
|
||||
def VectorReg64AsmOperand : AsmOperandClass {
|
||||
let Name = "VectorReg64";
|
||||
let PredicateMethod = "isNeonVectorReg";
|
||||
|
@ -489,6 +524,9 @@ def VectorRegLoAsmOperand : AsmOperandClass {
|
|||
let Name = "VectorRegLo";
|
||||
let PredicateMethod = "isNeonVectorRegLo";
|
||||
}
|
||||
def V64_lo : RegisterOperand<FPR64_lo, "printVRegOperand"> {
|
||||
let ParserMatchClass = VectorRegLoAsmOperand;
|
||||
}
|
||||
def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
|
||||
let ParserMatchClass = VectorRegLoAsmOperand;
|
||||
}
|
||||
|
@ -627,6 +665,10 @@ def FPR16Op : RegisterOperand<FPR16, "printOperand"> {
|
|||
let ParserMatchClass = FPRAsmOperand<"FPR16">;
|
||||
}
|
||||
|
||||
def FPR16Op_lo : RegisterOperand<FPR16_lo, "printOperand"> {
|
||||
let ParserMatchClass = FPRAsmOperand<"FPR16_lo">;
|
||||
}
|
||||
|
||||
def FPR32Op : RegisterOperand<FPR32, "printOperand"> {
|
||||
let ParserMatchClass = FPRAsmOperand<"FPR32">;
|
||||
}
|
||||
|
@ -643,16 +685,18 @@ def FPR128Op : RegisterOperand<FPR128, "printOperand"> {
|
|||
// ARMv8.1a atomic CASP register operands
|
||||
|
||||
|
||||
def WSeqPairs : RegisterTuples<[sube32, subo32],
|
||||
[(rotl GPR32, 0), (rotl GPR32, 1)]>;
|
||||
def XSeqPairs : RegisterTuples<[sube64, subo64],
|
||||
[(rotl GPR64, 0), (rotl GPR64, 1)]>;
|
||||
def WSeqPairs : RegisterTuples<[sube32, subo32],
|
||||
[(decimate (rotl GPR32, 0), 2),
|
||||
(decimate (rotl GPR32, 1), 2)]>;
|
||||
def XSeqPairs : RegisterTuples<[sube64, subo64],
|
||||
[(decimate (rotl GPR64, 0), 2),
|
||||
(decimate (rotl GPR64, 1), 2)]>;
|
||||
|
||||
def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32,
|
||||
def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32,
|
||||
(add WSeqPairs)>{
|
||||
let Size = 64;
|
||||
}
|
||||
def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64,
|
||||
def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64,
|
||||
(add XSeqPairs)>{
|
||||
let Size = 128;
|
||||
}
|
||||
|
@ -675,6 +719,34 @@ def XSeqPairClassOperand :
|
|||
|
||||
//===----- END: v8.1a atomic CASP register operands -----------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Armv8.7a accelerator extension register operands: 8 consecutive GPRs
|
||||
// starting with an even one
|
||||
|
||||
let Namespace = "AArch64" in {
|
||||
foreach i = 0-7 in
|
||||
def "x8sub_"#i : SubRegIndex<64, !mul(64, i)>;
|
||||
}
|
||||
|
||||
def Tuples8X : RegisterTuples<
|
||||
!foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)),
|
||||
!foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>;
|
||||
|
||||
def GPR64x8Class : RegisterClass<"AArch64", [i64x8], 512, (trunc Tuples8X, 12)> {
|
||||
let Size = 512;
|
||||
}
|
||||
def GPR64x8AsmOp : AsmOperandClass {
|
||||
let Name = "GPR64x8";
|
||||
let ParserMethod = "tryParseGPR64x8";
|
||||
let RenderMethod = "addRegOperands";
|
||||
}
|
||||
def GPR64x8 : RegisterOperand<GPR64x8Class, "printGPR64x8"> {
|
||||
let ParserMatchClass = GPR64x8AsmOp;
|
||||
let PrintMethod = "printGPR64x8";
|
||||
}
|
||||
|
||||
//===----- END: v8.7a accelerator extension register operands -------------===//
|
||||
|
||||
// SVE predicate registers
|
||||
def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>;
|
||||
def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>;
|
||||
|
@ -764,7 +836,7 @@ def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
|
|||
def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
|
||||
}
|
||||
|
||||
// Enum descibing the element size for destructive
|
||||
// Enum describing the element size for destructive
|
||||
// operations.
|
||||
class ElementSizeEnum<bits<3> val> {
|
||||
bits<3> Value = val;
|
||||
|
@ -829,48 +901,25 @@ def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>;
|
|||
def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>;
|
||||
|
||||
def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>;
|
||||
def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>;
|
||||
def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>;
|
||||
def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>;
|
||||
def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>;
|
||||
|
||||
def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
|
||||
def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>;
|
||||
def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>;
|
||||
def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>;
|
||||
def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>;
|
||||
|
||||
//******************************************************************************
|
||||
|
||||
// SVE vector register class
|
||||
def ZPR : RegisterClass<"AArch64",
|
||||
[nxv16i8, nxv8i16, nxv4i32, nxv2i64,
|
||||
nxv2f16, nxv4f16, nxv8f16,
|
||||
nxv1f32, nxv2f32, nxv4f32,
|
||||
nxv1f64, nxv2f64],
|
||||
128, (sequence "Z%u", 0, 31)> {
|
||||
// SVE vector register classes
|
||||
class ZPRClass<int lastreg> : RegisterClass<"AArch64",
|
||||
[nxv16i8, nxv8i16, nxv4i32, nxv2i64,
|
||||
nxv2f16, nxv4f16, nxv8f16,
|
||||
nxv2bf16, nxv4bf16, nxv8bf16,
|
||||
nxv2f32, nxv4f32,
|
||||
nxv2f64],
|
||||
128, (sequence "Z%u", 0, lastreg)> {
|
||||
let Size = 128;
|
||||
}
|
||||
|
||||
// SVE restricted 4 bit scalable vector register class
|
||||
def ZPR_4b : RegisterClass<"AArch64",
|
||||
[nxv16i8, nxv8i16, nxv4i32, nxv2i64,
|
||||
nxv2f16, nxv4f16, nxv8f16,
|
||||
nxv1f32, nxv2f32, nxv4f32,
|
||||
nxv1f64, nxv2f64],
|
||||
128, (sequence "Z%u", 0, 15)> {
|
||||
let Size = 128;
|
||||
}
|
||||
|
||||
// SVE restricted 3 bit scalable vector register class
|
||||
def ZPR_3b : RegisterClass<"AArch64",
|
||||
[nxv16i8, nxv8i16, nxv4i32, nxv2i64,
|
||||
nxv2f16, nxv4f16, nxv8f16,
|
||||
nxv1f32, nxv2f32, nxv4f32,
|
||||
nxv1f64, nxv2f64],
|
||||
128, (sequence "Z%u", 0, 7)> {
|
||||
let Size = 128;
|
||||
}
|
||||
def ZPR : ZPRClass<31>;
|
||||
def ZPR_4b : ZPRClass<15>; // Restricted 4 bit SVE vector register class.
|
||||
def ZPR_3b : ZPRClass<7>; // Restricted 3 bit SVE vector register class.
|
||||
|
||||
class ZPRAsmOperand<string name, int Width, string RegClassSuffix = "">
|
||||
: AsmOperandClass {
|
||||
|
@ -1104,10 +1153,235 @@ class GPR64ExtendRegisterOperand<string Name, int Scale, RegisterClass RegClass>
|
|||
let PrintMethod = "printRegWithShiftExtend<false, " # Scale # ", 'x', 0>";
|
||||
}
|
||||
|
||||
foreach Scale = [8, 16, 32, 64] in {
|
||||
foreach Scale = [8, 16, 32, 64, 128] in {
|
||||
def GPR64shiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64shifted", Scale, "GPR64">;
|
||||
def GPR64shifted # Scale : GPR64ExtendRegisterOperand<"GPR64shiftedAsmOpnd" # Scale, Scale, GPR64>;
|
||||
|
||||
def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">;
|
||||
def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>;
|
||||
}
|
||||
|
||||
// Accumulator array tiles.
|
||||
def ZAQ0 : AArch64Reg<0, "za0.q">;
|
||||
def ZAQ1 : AArch64Reg<1, "za1.q">;
|
||||
def ZAQ2 : AArch64Reg<2, "za2.q">;
|
||||
def ZAQ3 : AArch64Reg<3, "za3.q">;
|
||||
def ZAQ4 : AArch64Reg<4, "za4.q">;
|
||||
def ZAQ5 : AArch64Reg<5, "za5.q">;
|
||||
def ZAQ6 : AArch64Reg<6, "za6.q">;
|
||||
def ZAQ7 : AArch64Reg<7, "za7.q">;
|
||||
def ZAQ8 : AArch64Reg<8, "za8.q">;
|
||||
def ZAQ9 : AArch64Reg<9, "za9.q">;
|
||||
def ZAQ10 : AArch64Reg<10, "za10.q">;
|
||||
def ZAQ11 : AArch64Reg<11, "za11.q">;
|
||||
def ZAQ12 : AArch64Reg<12, "za12.q">;
|
||||
def ZAQ13 : AArch64Reg<13, "za13.q">;
|
||||
def ZAQ14 : AArch64Reg<14, "za14.q">;
|
||||
def ZAQ15 : AArch64Reg<15, "za15.q">;
|
||||
|
||||
let SubRegIndices = [zasubq0, zasubq1] in {
|
||||
def ZAD0 : AArch64Reg<0, "za0.d", [ZAQ0, ZAQ8]>;
|
||||
def ZAD1 : AArch64Reg<1, "za1.d", [ZAQ1, ZAQ9]>;
|
||||
def ZAD2 : AArch64Reg<2, "za2.d", [ZAQ2, ZAQ10]>;
|
||||
def ZAD3 : AArch64Reg<3, "za3.d", [ZAQ3, ZAQ11]>;
|
||||
def ZAD4 : AArch64Reg<4, "za4.d", [ZAQ4, ZAQ12]>;
|
||||
def ZAD5 : AArch64Reg<5, "za5.d", [ZAQ5, ZAQ13]>;
|
||||
def ZAD6 : AArch64Reg<6, "za6.d", [ZAQ6, ZAQ14]>;
|
||||
def ZAD7 : AArch64Reg<7, "za7.d", [ZAQ7, ZAQ15]>;
|
||||
}
|
||||
|
||||
let SubRegIndices = [zasubd0, zasubd1] in {
|
||||
def ZAS0 : AArch64Reg<0, "za0.s", [ZAD0, ZAD4]>;
|
||||
def ZAS1 : AArch64Reg<1, "za1.s", [ZAD1, ZAD5]>;
|
||||
def ZAS2 : AArch64Reg<2, "za2.s", [ZAD2, ZAD6]>;
|
||||
def ZAS3 : AArch64Reg<3, "za3.s", [ZAD3, ZAD7]>;
|
||||
}
|
||||
|
||||
let SubRegIndices = [zasubs0, zasubs1] in {
|
||||
def ZAH0 : AArch64Reg<0, "za0.h", [ZAS0, ZAS2]>;
|
||||
def ZAH1 : AArch64Reg<1, "za1.h", [ZAS1, ZAS3]>;
|
||||
}
|
||||
|
||||
let SubRegIndices = [zasubh0, zasubh1] in {
|
||||
def ZAB0 : AArch64Reg<0, "za0.b", [ZAH0, ZAH1]>;
|
||||
}
|
||||
|
||||
let SubRegIndices = [zasubb] in {
|
||||
def ZA : AArch64Reg<0, "za", [ZAB0]>;
|
||||
}
|
||||
|
||||
// SME Register Classes
|
||||
|
||||
// Accumulator array
|
||||
def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> {
|
||||
let Size = 2048;
|
||||
}
|
||||
|
||||
// Accumulator array as single tiles
|
||||
def MPR8 : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> {
|
||||
let Size = 2048;
|
||||
}
|
||||
def MPR16 : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> {
|
||||
let Size = 1024;
|
||||
}
|
||||
def MPR32 : RegisterClass<"AArch64", [untyped], 512, (add (sequence "ZAS%u", 0, 3))> {
|
||||
let Size = 512;
|
||||
}
|
||||
def MPR64 : RegisterClass<"AArch64", [untyped], 256, (add (sequence "ZAD%u", 0, 7))> {
|
||||
let Size = 256;
|
||||
}
|
||||
def MPR128 : RegisterClass<"AArch64", [untyped], 128, (add (sequence "ZAQ%u", 0, 15))> {
|
||||
let Size = 128;
|
||||
}
|
||||
|
||||
// SME Register Operands
|
||||
// There are three types of SME matrix register operands:
|
||||
// * Tiles:
|
||||
//
|
||||
// These tiles make up the larger accumulator matrix. The tile representation
|
||||
// has an element type suffix, e.g. za0.b or za15.q and can be any of the
|
||||
// registers:
|
||||
// ZAQ0..ZAQ15
|
||||
// ZAD0..ZAD7
|
||||
// ZAS0..ZAS3
|
||||
// ZAH0..ZAH1
|
||||
// or ZAB0
|
||||
//
|
||||
// * Tile vectors:
|
||||
//
|
||||
// Their representation is similar to regular tiles, but they have an extra
|
||||
// 'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile,
|
||||
// horizontally or vertically.
|
||||
//
|
||||
// e.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and
|
||||
// ZAQ15, respectively. The horizontal/vertical is more a property of the
|
||||
// instruction, than a property of the asm-operand itself, or its register.
|
||||
// The distinction is required for the parsing/printing of the operand,
|
||||
// as from a compiler's perspective, the whole tile is read/written.
|
||||
//
|
||||
// * Accumulator matrix:
|
||||
//
|
||||
// This is the entire matrix accumulator register ZA (<=> ZAB0), printed as
|
||||
// 'za'.
|
||||
|
||||
//
|
||||
// Tiles
|
||||
//
|
||||
|
||||
class MatrixTileAsmOperand<string RC, int EltSize> : AsmOperandClass {
|
||||
let Name = "MatrixTile" # EltSize;
|
||||
let DiagnosticType = "Invalid" # Name;
|
||||
let ParserMethod = "tryParseMatrixRegister";
|
||||
let RenderMethod = "addMatrixOperands";
|
||||
let PredicateMethod = "isMatrixRegOperand<"
|
||||
# "MatrixKind::Tile" # ", "
|
||||
# EltSize # ", AArch64::" # RC # "RegClassID>";
|
||||
}
|
||||
|
||||
class MatrixTileOperand<int EltSize, int NumBitsForTile, RegisterClass RC>
|
||||
: RegisterOperand<RC> {
|
||||
let ParserMatchClass = MatrixTileAsmOperand<!cast<string>(RC), EltSize>;
|
||||
let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
|
||||
let PrintMethod = "printMatrixTile";
|
||||
}
|
||||
|
||||
def TileOp32 : MatrixTileOperand<32, 2, MPR32>;
|
||||
def TileOp64 : MatrixTileOperand<64, 3, MPR64>;
|
||||
|
||||
//
|
||||
// Tile vectors (horizontal and vertical)
|
||||
//
|
||||
|
||||
class MatrixTileVectorAsmOperand<string RC, int EltSize, int IsVertical>
|
||||
: AsmOperandClass {
|
||||
let Name = "MatrixTileVector" # !if(IsVertical, "V", "H") # EltSize;
|
||||
let DiagnosticType = "Invalid" # Name;
|
||||
let ParserMethod = "tryParseMatrixRegister";
|
||||
let RenderMethod = "addMatrixOperands";
|
||||
let PredicateMethod = "isMatrixRegOperand<"
|
||||
# "MatrixKind::"
|
||||
# !if(IsVertical, "Col", "Row") # ", "
|
||||
# EltSize # ", AArch64::" # RC # "RegClassID>";
|
||||
}
|
||||
|
||||
class MatrixTileVectorOperand<int EltSize, int NumBitsForTile,
|
||||
RegisterClass RC, int IsVertical>
|
||||
: RegisterOperand<RC> {
|
||||
let ParserMatchClass = MatrixTileVectorAsmOperand<!cast<string>(RC), EltSize,
|
||||
IsVertical>;
|
||||
let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
|
||||
let PrintMethod = "printMatrixTileVector<" # IsVertical # ">";
|
||||
}
|
||||
|
||||
def TileVectorOpH8 : MatrixTileVectorOperand< 8, 0, MPR8, 0>;
|
||||
def TileVectorOpH16 : MatrixTileVectorOperand< 16, 1, MPR16, 0>;
|
||||
def TileVectorOpH32 : MatrixTileVectorOperand< 32, 2, MPR32, 0>;
|
||||
def TileVectorOpH64 : MatrixTileVectorOperand< 64, 3, MPR64, 0>;
|
||||
def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>;
|
||||
|
||||
def TileVectorOpV8 : MatrixTileVectorOperand< 8, 0, MPR8, 1>;
|
||||
def TileVectorOpV16 : MatrixTileVectorOperand< 16, 1, MPR16, 1>;
|
||||
def TileVectorOpV32 : MatrixTileVectorOperand< 32, 2, MPR32, 1>;
|
||||
def TileVectorOpV64 : MatrixTileVectorOperand< 64, 3, MPR64, 1>;
|
||||
def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>;
|
||||
|
||||
//
|
||||
// Accumulator matrix
|
||||
//
|
||||
|
||||
class MatrixAsmOperand<string RC, int EltSize> : AsmOperandClass {
|
||||
let Name = "Matrix";
|
||||
let DiagnosticType = "Invalid" # Name;
|
||||
let ParserMethod = "tryParseMatrixRegister";
|
||||
let RenderMethod = "addMatrixOperands";
|
||||
let PredicateMethod = "isMatrixRegOperand<"
|
||||
# "MatrixKind::Array" # ", "
|
||||
# EltSize # ", AArch64::" # RC # "RegClassID>";
|
||||
}
|
||||
|
||||
class MatrixOperand<RegisterClass RC, int EltSize> : RegisterOperand<RC> {
|
||||
let ParserMatchClass = MatrixAsmOperand<!cast<string>(RC), EltSize>;
|
||||
let PrintMethod = "printMatrix<" # EltSize # ">";
|
||||
}
|
||||
|
||||
def MatrixOp : MatrixOperand<MPR, 0>;
|
||||
|
||||
class MatrixTileListAsmOperand : AsmOperandClass {
|
||||
let Name = "MatrixTileList";
|
||||
let ParserMethod = "tryParseMatrixTileList";
|
||||
let RenderMethod = "addMatrixTileListOperands";
|
||||
let PredicateMethod = "isMatrixTileList";
|
||||
}
|
||||
|
||||
class MatrixTileListOperand : Operand<i8> {
|
||||
let ParserMatchClass = MatrixTileListAsmOperand<>;
|
||||
let DecoderMethod = "DecodeMatrixTileListRegisterClass";
|
||||
let EncoderMethod = "EncodeMatrixTileListRegisterClass";
|
||||
let PrintMethod = "printMatrixTileList";
|
||||
}
|
||||
|
||||
def MatrixTileList : MatrixTileListOperand<>;
|
||||
|
||||
def MatrixIndexGPR32_12_15 : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 12, 15)> {
|
||||
let DiagnosticType = "InvalidMatrixIndexGPR32_12_15";
|
||||
}
|
||||
def MatrixIndexGPR32Op12_15 : RegisterOperand<MatrixIndexGPR32_12_15> {
|
||||
let EncoderMethod = "encodeMatrixIndexGPR32";
|
||||
}
|
||||
|
||||
def SVCROperand : AsmOperandClass {
|
||||
let Name = "SVCR";
|
||||
let ParserMethod = "tryParseSVCR";
|
||||
let DiagnosticType = "Invalid" # Name;
|
||||
}
|
||||
|
||||
def svcr_op : Operand<i32> {
|
||||
let ParserMatchClass = SVCROperand;
|
||||
let PrintMethod = "printSVCROp";
|
||||
let DecoderMethod = "DecodeSVCROp";
|
||||
let MCOperandPredicate = [{
|
||||
if (!MCOp.isImm())
|
||||
return false;
|
||||
return AArch64SVCR::lookupSVCRByEncoding(MCOp.getImm()) != nullptr;
|
||||
}];
|
||||
}
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
//=- AArch64SMEInstrInfo.td - AArch64 SME Instructions -*- tablegen -*-----=//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// AArch64 Scalable Matrix Extension (SME) Instruction definitions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Add vector elements horizontally or vertically to ZA tile.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [HasSME] in {
|
||||
def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">;
|
||||
def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSMEI64] in {
|
||||
def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">;
|
||||
def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME] in {
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Outer products
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa">;
|
||||
defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops">;
|
||||
|
||||
def FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa">;
|
||||
def FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSMEF64] in {
|
||||
def FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa">;
|
||||
def FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME] in {
|
||||
defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa">;
|
||||
defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops">;
|
||||
|
||||
def SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa">;
|
||||
def SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops">;
|
||||
def UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa">;
|
||||
def UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops">;
|
||||
def SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa">;
|
||||
def SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops">;
|
||||
def USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa">;
|
||||
def USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSMEI64] in {
|
||||
def SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa">;
|
||||
def SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops">;
|
||||
def UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa">;
|
||||
def UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops">;
|
||||
def SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa">;
|
||||
def SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops">;
|
||||
def USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa">;
|
||||
def USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME] in {
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Loads and stores
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm LD1_MXIPXX : sme_mem_ld_ss<"ld1">;
|
||||
defm ST1_MXIPXX : sme_mem_st_ss<"st1">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Spill + fill
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm LDR_ZA : sme_fill<"ldr">;
|
||||
defm STR_ZA : sme_spill<"str">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Move instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm INSERT_MXIPZ : sme_vector_to_tile<"mova">;
|
||||
defm EXTRACT_ZPMXI : sme_tile_to_vector<"mova">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Zero instruction
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm ZERO_M : sme_zero<"zero">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Mode selection and state access instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or
|
||||
// both fields:
|
||||
//
|
||||
// MSR SVCRSM, #<imm1>
|
||||
// MSR SVCRZA, #<imm1>
|
||||
// MSR SVCRSMZA, #<imm1>
|
||||
//
|
||||
// It's tricky to using the existing pstate operand defined in
|
||||
// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
|
||||
// when these fields are also encoded in CRm[3:1].
|
||||
class MSRpstatesvcrImm0_1
|
||||
: PstateWriteSimple<(ins svcr_op:$pstatefield, imm0_1:$imm), "msr",
|
||||
"\t$pstatefield, $imm">,
|
||||
Sched<[WriteSys]> {
|
||||
bits<3> pstatefield;
|
||||
bit imm;
|
||||
let Inst{18-16} = 0b011; // op1
|
||||
let Inst{11-9} = pstatefield;
|
||||
let Inst{8} = imm;
|
||||
let Inst{7-5} = 0b011; // op2
|
||||
}
|
||||
|
||||
def MSRpstatesvcrImm1 : MSRpstatesvcrImm0_1;
|
||||
def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>;
|
||||
def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>;
|
||||
def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>;
|
||||
|
||||
def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>;
|
||||
def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>;
|
||||
def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE2 instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def REVD_ZPmZ : sve2_int_perm_revd<"revd">;
|
||||
|
||||
defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0>;
|
||||
defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1>;
|
||||
|
||||
defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel">;
|
||||
|
||||
} // End let Predicates = [HasSME]
|
File diff suppressed because it is too large
Load Diff
|
@ -1,9 +1,8 @@
|
|||
//==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -27,7 +26,9 @@ def CortexA53Model : SchedMachineModel {
|
|||
// v 1.0 Spreadsheet
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F);
|
||||
}
|
||||
|
||||
|
||||
|
@ -126,7 +127,8 @@ def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; }
|
|||
def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVd, [A53UnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVq, [A53UnitFPALU]> { let Latency = 6; }
|
||||
|
||||
// FP Mul, Div, Sqrt
|
||||
def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
|
||||
|
@ -148,6 +150,7 @@ def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32;
|
|||
// No forwarding for these reads.
|
||||
def : ReadAdvance<ReadExtrHi, 0>;
|
||||
def : ReadAdvance<ReadAdrBase, 0>;
|
||||
def : ReadAdvance<ReadST, 0>;
|
||||
def : ReadAdvance<ReadVLD, 0>;
|
||||
|
||||
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
|
||||
|
|
|
@ -0,0 +1,361 @@
|
|||
//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the machine model for the ARM Cortex-A55 processors.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// ===---------------------------------------------------------------------===//
|
||||
// The following definitions describe the per-operand machine model.
|
||||
// This works with MachineScheduler. See MCSchedModel.h for details.
|
||||
|
||||
// Cortex-A55 machine model for scheduling and other instruction cost heuristics.
|
||||
def CortexA55Model : SchedMachineModel {
|
||||
let MicroOpBufferSize = 0; // The Cortex-A55 is an in-order processor
|
||||
let IssueWidth = 2; // It dual-issues under most circumstances
|
||||
let LoadLatency = 4; // Cycles for loads to access the cache. The
|
||||
// optimisation guide shows that most loads have
|
||||
// a latency of 3, but some have a latency of 4
|
||||
// or 5. Setting it 4 looked to be good trade-off.
|
||||
let MispredictPenalty = 8; // A branch direction mispredict.
|
||||
let PostRAScheduler = 1; // Enable PostRA scheduler pass.
|
||||
let CompleteModel = 0; // Covers instructions applicable to Cortex-A55.
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define each kind of processor resource and number available.
|
||||
|
||||
// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
|
||||
// Cortex-A55 is in-order.
|
||||
|
||||
def CortexA55UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
|
||||
def CortexA55UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
|
||||
def CortexA55UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined
|
||||
def CortexA55UnitLd : ProcResource<1> { let BufferSize = 0; } // Load pipe
|
||||
def CortexA55UnitSt : ProcResource<1> { let BufferSize = 0; } // Store pipe
|
||||
def CortexA55UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
|
||||
|
||||
// The FP DIV/SQRT instructions execute totally differently from the FP ALU
|
||||
// instructions, which can mostly be dual-issued; that's why for now we model
|
||||
// them with 2 resources.
|
||||
def CortexA55UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
|
||||
def CortexA55UnitFPMAC : ProcResource<2> { let BufferSize = 0; } // FP MAC
|
||||
def CortexA55UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Subtarget-specific SchedWrite types
|
||||
|
||||
let SchedModel = CortexA55Model in {
|
||||
|
||||
// These latencies are modeled without taking into account forwarding paths
|
||||
// (the software optimisation guide lists latencies taking into account
|
||||
// typical forwarding paths).
|
||||
def : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; } // MOVN, MOVZ
|
||||
def : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; } // ALU
|
||||
def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Shifted-Reg
|
||||
def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Extended-Reg
|
||||
def : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; } // EXTR from a reg pair
|
||||
def : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; } // Shift/Scale
|
||||
|
||||
// MAC
|
||||
def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; } // 32-bit Multiply
|
||||
def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; } // 64-bit Multiply
|
||||
|
||||
// Div
|
||||
def : WriteRes<WriteID32, [CortexA55UnitDiv]> {
|
||||
let Latency = 8; let ResourceCycles = [8];
|
||||
}
|
||||
def : WriteRes<WriteID64, [CortexA55UnitDiv]> {
|
||||
let Latency = 8; let ResourceCycles = [8];
|
||||
}
|
||||
|
||||
// Load
|
||||
def : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; }
|
||||
def : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; }
|
||||
def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
|
||||
|
||||
// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
|
||||
// below, choosing the median of 3 which makes the latency 6.
|
||||
// An extra cycle is needed to get the swizzling right.
|
||||
def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
|
||||
let ResourceCycles = [3]; }
|
||||
def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
|
||||
def CortexA55WriteVLD1SI : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; let SingleIssue = 1; }
|
||||
def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
|
||||
let ResourceCycles = [2]; }
|
||||
def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
|
||||
let ResourceCycles = [3]; }
|
||||
def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
|
||||
let ResourceCycles = [4]; }
|
||||
def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
|
||||
let ResourceCycles = [5]; }
|
||||
def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
|
||||
let ResourceCycles = [6]; }
|
||||
def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
|
||||
let ResourceCycles = [7]; }
|
||||
def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
|
||||
let ResourceCycles = [8]; }
|
||||
|
||||
def CortexA55WriteLDP1 : SchedWriteRes<[]> { let Latency = 4; }
|
||||
def CortexA55WriteLDP2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; }
|
||||
def CortexA55WriteLDP4 : SchedWriteRes<[CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd]> { let Latency = 6; }
|
||||
|
||||
// Pre/Post Indexing - Performed as part of address generation
|
||||
def : WriteRes<WriteAdr, []> { let Latency = 0; }
|
||||
|
||||
// Store
|
||||
let RetireOOO = 1 in {
|
||||
def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 1; }
|
||||
def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 1; }
|
||||
def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 1; }
|
||||
}
|
||||
def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
|
||||
|
||||
// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
|
||||
def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
|
||||
let ResourceCycles = [2];}
|
||||
def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
|
||||
def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
|
||||
let ResourceCycles = [2]; }
|
||||
def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
|
||||
let ResourceCycles = [3]; }
|
||||
def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
|
||||
let ResourceCycles = [4]; }
|
||||
|
||||
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
|
||||
|
||||
// Branch
|
||||
def : WriteRes<WriteBr, [CortexA55UnitB]>;
|
||||
def : WriteRes<WriteBrReg, [CortexA55UnitB]>;
|
||||
def : WriteRes<WriteSys, [CortexA55UnitB]>;
|
||||
def : WriteRes<WriteBarrier, [CortexA55UnitB]>;
|
||||
def : WriteRes<WriteHint, [CortexA55UnitB]>;
|
||||
|
||||
// FP ALU
|
||||
// As WriteF result is produced in F5 and it can be mostly forwarded
|
||||
// to consumer at F1, the effectively latency is set as 4.
|
||||
def : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; }
|
||||
def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
|
||||
def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVd, [CortexA55UnitFPALU]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVq, [CortexA55UnitFPALU,CortexA55UnitFPALU]> { let Latency = 4; let BeginGroup = 1; }
|
||||
|
||||
// FP ALU specific new schedwrite definitions
|
||||
def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;}
|
||||
def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
|
||||
def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
|
||||
def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
|
||||
|
||||
// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
|
||||
def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
|
||||
|
||||
let RetireOOO = 1 in {
|
||||
def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
|
||||
let ResourceCycles = [29]; }
|
||||
def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
|
||||
def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
|
||||
let ResourceCycles = [5]; }
|
||||
def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
|
||||
let ResourceCycles = [10]; }
|
||||
def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
|
||||
let ResourceCycles = [19]; }
|
||||
def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
|
||||
let ResourceCycles = [5]; }
|
||||
def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
|
||||
let ResourceCycles = [9]; }
|
||||
def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
|
||||
let ResourceCycles = [19]; }
|
||||
}
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Subtarget-specific SchedRead types.
|
||||
|
||||
def : ReadAdvance<ReadVLD, 0>;
|
||||
def : ReadAdvance<ReadExtrHi, 1>;
|
||||
def : ReadAdvance<ReadAdrBase, 1>;
|
||||
def : ReadAdvance<ReadST, 1>;
|
||||
|
||||
// ALU - ALU input operands are generally needed in EX1. An operand produced in
|
||||
// in say EX2 can be forwarded for consumption to ALU in EX1, thereby
|
||||
// allowing back-to-back ALU operations such as add. If an operand requires
|
||||
// a shift, it will, however, be required in ISS stage.
|
||||
def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
|
||||
WriteISReg, WriteIEReg,WriteIS,
|
||||
WriteID32,WriteID64,
|
||||
WriteIM32,WriteIM64]>;
|
||||
// Shifted operand
|
||||
def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
|
||||
WriteISReg, WriteIEReg,WriteIS,
|
||||
WriteID32,WriteID64,
|
||||
WriteIM32,WriteIM64]>;
|
||||
def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
|
||||
WriteISReg, WriteIEReg,WriteIS,
|
||||
WriteID32,WriteID64,
|
||||
WriteIM32,WriteIM64]>;
|
||||
def CortexA55ReadISReg : SchedReadVariant<[
|
||||
SchedVar<RegShiftedPred, [CortexA55ReadShifted]>,
|
||||
SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
|
||||
def : SchedAlias<ReadISReg, CortexA55ReadISReg>;
|
||||
|
||||
def CortexA55ReadIEReg : SchedReadVariant<[
|
||||
SchedVar<RegExtendedPred, [CortexA55ReadShifted]>,
|
||||
SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
|
||||
def : SchedAlias<ReadIEReg, CortexA55ReadIEReg>;
|
||||
|
||||
// MUL
|
||||
def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
|
||||
WriteISReg, WriteIEReg,WriteIS,
|
||||
WriteID32,WriteID64,
|
||||
WriteIM32,WriteIM64]>;
|
||||
def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
|
||||
WriteISReg, WriteIEReg,WriteIS,
|
||||
WriteID32,WriteID64,
|
||||
WriteIM32,WriteIM64]>;
|
||||
|
||||
// Div
|
||||
def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
|
||||
WriteISReg, WriteIEReg,WriteIS,
|
||||
WriteID32,WriteID64,
|
||||
WriteIM32,WriteIM64]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Subtarget-specific InstRWs.
|
||||
|
||||
//---
|
||||
// Miscellaneous
|
||||
//---
|
||||
def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?Wi")>;
|
||||
def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPSi")>;
|
||||
def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)i")>;
|
||||
def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQi")>;
|
||||
def : InstRW<[WriteAdr, CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W(pre|post)")>;
|
||||
def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS(pre|post)")>;
|
||||
def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>;
|
||||
def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>;
|
||||
def : InstRW<[WriteI], (instrs COPY)>;
|
||||
//---
|
||||
// Vector Loads - 64-bit per cycle
|
||||
//---
|
||||
// 1-element structures
|
||||
def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; // single element
|
||||
def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
|
||||
def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
|
||||
def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
|
||||
|
||||
def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
// 2-element structures
|
||||
def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
|
||||
|
||||
def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
|
||||
def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
|
||||
|
||||
// 3-element structures
|
||||
def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
|
||||
|
||||
def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
// 4-element structures
|
||||
def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
|
||||
def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
|
||||
def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
|
||||
def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
|
||||
|
||||
def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
//---
|
||||
// Vector Stores
|
||||
//---
|
||||
def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
|
||||
def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
|
||||
def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
|
||||
def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
|
||||
|
||||
def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
|
||||
def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
//---
|
||||
// Floating Point Conversions, MAC, DIV, SQRT
|
||||
//---
|
||||
def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>;
|
||||
def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>;
|
||||
def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
|
||||
def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
|
||||
|
||||
def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
|
||||
def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
|
||||
def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
|
||||
|
||||
def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
|
||||
def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>;
|
||||
def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>;
|
||||
def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>;
|
||||
def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>;
|
||||
def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>;
|
||||
def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>;
|
||||
def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>;
|
||||
def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
|
||||
def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
|
||||
def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
|
||||
|
||||
}
|
|
@ -1,9 +1,8 @@
|
|||
//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -32,7 +31,9 @@ def CortexA57Model : SchedMachineModel {
|
|||
let LoopMicroOpBufferSize = 16;
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -93,9 +94,10 @@ def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>;
|
|||
def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>;
|
||||
def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
|
||||
def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
|
||||
def : SchedAlias<WriteFMul, A57Write_5cyc_1V>;
|
||||
def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
|
||||
def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>;
|
||||
def : SchedAlias<WriteV, A57Write_3cyc_1V>;
|
||||
def : SchedAlias<WriteVd, A57Write_3cyc_1V>;
|
||||
def : SchedAlias<WriteVq, A57Write_3cyc_1V>;
|
||||
def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
|
||||
def : SchedAlias<WriteVST, A57Write_1cyc_1S>;
|
||||
|
||||
|
@ -115,6 +117,7 @@ def : ReadAdvance<ReadIM, 0>;
|
|||
def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
|
||||
def : ReadAdvance<ReadID, 0>;
|
||||
def : ReadAdvance<ReadExtrHi, 0>;
|
||||
def : ReadAdvance<ReadST, 0>;
|
||||
def : ReadAdvance<ReadAdrBase, 0>;
|
||||
def : ReadAdvance<ReadVLD, 0>;
|
||||
|
||||
|
@ -350,12 +353,16 @@ def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")
|
|||
// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
|
||||
// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
|
||||
|
||||
// Cortex A57 Software Optimization Guide Sec 3.14
|
||||
// Advance for absolute diff accum, pairwise add and accumulate, shift accumulate
|
||||
def A57ReadIVA3 : SchedReadAdvance<3, [A57Write_4cyc_1X_NonMul_Forward, A57Write_5cyc_2X_NonMul_Forward]>;
|
||||
|
||||
// ASIMD absolute diff accum, D-form
|
||||
def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
|
||||
def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
|
||||
// ASIMD absolute diff accum, Q-form
|
||||
def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
|
||||
def : InstRW<[A57Write_5cyc_2X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
|
||||
// ASIMD absolute diff accum long
|
||||
def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABAL")>;
|
||||
def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABAL")>;
|
||||
|
||||
// ASIMD arith, reduce, 4H/4S
|
||||
def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
|
||||
|
@ -372,32 +379,41 @@ def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>
|
|||
def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
|
||||
|
||||
// ASIMD multiply, D-form
|
||||
def : InstRW<[A57Write_5cyc_1W], (instregex "^(P?MUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
|
||||
// MUL
|
||||
def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^MUL(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
|
||||
// PMUL, SQDMULH, SQRDMULH
|
||||
def : InstRW<[A57Write_5cyc_1W], (instregex "^(PMUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
|
||||
|
||||
// ASIMD multiply, Q-form
|
||||
def : InstRW<[A57Write_6cyc_2W], (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
|
||||
// MUL
|
||||
def : InstRW<[A57Write_6cyc_2W_Mul_Forward], (instregex "^MUL(v16i8|v8i16|v4i32)(_indexed)?$")>;
|
||||
// PMUL, SQDMULH, SQRDMULH
|
||||
def : InstRW<[A57Write_6cyc_2W], (instregex "^(PMUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
|
||||
|
||||
// Cortex A57 Software Optimization Guide Sec 3.14
|
||||
def A57ReadIVMA4 : SchedReadAdvance<4 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
|
||||
def A57ReadIVMA3 : SchedReadAdvance<3 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
|
||||
|
||||
// ASIMD multiply accumulate, D-form
|
||||
def : InstRW<[A57Write_5cyc_1W], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
|
||||
def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
|
||||
// ASIMD multiply accumulate, Q-form
|
||||
def : InstRW<[A57Write_6cyc_2W], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
|
||||
def : InstRW<[A57Write_6cyc_2W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
|
||||
|
||||
// ASIMD multiply accumulate long
|
||||
// ASIMD multiply accumulate saturating long
|
||||
def A57WriteIVMA : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
|
||||
def A57ReadIVMA4 : SchedReadAdvance<4, [A57WriteIVMA]>;
|
||||
def : InstRW<[A57WriteIVMA, A57ReadIVMA4], (instregex "^(S|U|SQD)ML[AS]L")>;
|
||||
def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^(S|U)ML[AS]L")>;
|
||||
def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA3], (instregex "^SQDML[AS]L")>;
|
||||
|
||||
// ASIMD multiply long
|
||||
def : InstRW<[A57Write_5cyc_1W], (instregex "^(S|U|SQD)MULL")>;
|
||||
def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^(S|U)MULL")>;
|
||||
def : InstRW<[A57Write_5cyc_1W], (instregex "^SQDMULL")>;
|
||||
def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>;
|
||||
def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>;
|
||||
|
||||
// ASIMD pairwise add and accumulate
|
||||
// ASIMD shift accumulate
|
||||
def A57WriteIVA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
|
||||
def A57ReadIVA3 : SchedReadAdvance<3, [A57WriteIVA]>;
|
||||
def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^[SU]ADALP")>;
|
||||
def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
|
||||
def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ADALP")>;
|
||||
def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
|
||||
|
||||
// ASIMD shift by immed, complex
|
||||
def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>;
|
||||
|
@ -474,17 +490,22 @@ def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i6
|
|||
def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>;
|
||||
|
||||
// ASIMD FP multiply, D-form, FZ
|
||||
def : InstRW<[A57Write_5cyc_1V], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
|
||||
def : InstRW<[A57Write_5cyc_1V_FP_Forward], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
|
||||
// ASIMD FP multiply, Q-form, FZ
|
||||
def : InstRW<[A57Write_5cyc_2V], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
|
||||
def : InstRW<[A57Write_5cyc_2V_FP_Forward], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
|
||||
|
||||
// ASIMD FP multiply accumulate, D-form, FZ
|
||||
// ASIMD FP multiply accumulate, Q-form, FZ
|
||||
def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
|
||||
def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10; }
|
||||
def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ]>;
|
||||
|
||||
// Cortex A57 Software Optimization Guide Sec 3.15
|
||||
// Advances from FP mul and mul-accum to mul-accum
|
||||
def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
|
||||
def A57ReadFPVMA6 : SchedReadAdvance<6, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
|
||||
|
||||
def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
|
||||
def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA5], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
|
||||
def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA6], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
|
||||
|
||||
// ASIMD FP round, D-form
|
||||
def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>;
|
||||
|
@ -502,10 +523,10 @@ def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
|
|||
// Q form - v16i8, v8i16, v4i32, v2i64
|
||||
|
||||
// ASIMD bitwise insert, Q-form
|
||||
def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL)v16i8")>;
|
||||
def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL|BSP)v16i8")>;
|
||||
|
||||
// ASIMD duplicate, gen reg, D-form and Q-form
|
||||
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY")>;
|
||||
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUP(i8|i16|i32|i64)$")>;
|
||||
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>;
|
||||
|
||||
// ASIMD move, saturating
|
||||
|
@ -547,8 +568,9 @@ def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v
|
|||
|
||||
def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>;
|
||||
|
||||
// Cortex A57 Software Optimization Guide Sec 3.10
|
||||
def A57WriteFPMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
|
||||
def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA]>;
|
||||
def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA, WriteFMul]>;
|
||||
def A57ReadFPM : SchedReadAdvance<0>;
|
||||
def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
|
||||
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -14,6 +13,10 @@
|
|||
// Prefix: A57Write
|
||||
// Latency: #cyc
|
||||
// MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
|
||||
// Postfix (optional): (XYZ)_Forward
|
||||
//
|
||||
// The postfix is added to differentiate SchedWriteRes that are used in
|
||||
// subsequent SchedReadAdvances.
|
||||
//
|
||||
// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
|
||||
// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes.
|
||||
|
@ -26,7 +29,9 @@
|
|||
def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; }
|
||||
def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
|
||||
def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
|
||||
def A57Write_5cyc_1V_FP_Forward : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
|
||||
def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
|
||||
def A57Write_5cyc_1W_Mul_Forward : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
|
||||
def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
|
||||
def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
|
||||
let ResourceCycles = [17]; }
|
||||
|
@ -46,6 +51,7 @@ def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
|
|||
def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; }
|
||||
def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
|
||||
def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
|
||||
def A57Write_4cyc_1X_NonMul_Forward : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
|
||||
def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
|
||||
def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; }
|
||||
def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
|
||||
|
@ -94,6 +100,10 @@ def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
|
|||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def A57Write_6cyc_2W_Mul_Forward : SchedWriteRes<[A57UnitW, A57UnitW]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI,
|
||||
A57UnitL]> {
|
||||
let Latency = 5;
|
||||
|
@ -103,10 +113,18 @@ def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
|
|||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def A57Write_5cyc_2V_FP_Forward : SchedWriteRes<[A57UnitV, A57UnitV]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def A57Write_5cyc_2X_NonMul_Forward : SchedWriteRes<[A57UnitX, A57UnitX]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
|
||||
A57UnitV]> {
|
||||
let Latency = 10;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,9 +1,8 @@
|
|||
//=- AArch64SchedCyclone.td - Cyclone Scheduling Definitions -*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -19,7 +18,9 @@ def CycloneModel : SchedMachineModel {
|
|||
let MispredictPenalty = 16; // 14-19 cycles are typical.
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -257,6 +258,7 @@ def CyReadAdrBase : SchedReadVariant<[
|
|||
SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
|
||||
SchedVar<NoSchedPred, [ReadDefault]>]>; // Read base reg with no shift.
|
||||
def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map AArch64->Cyclone type.
|
||||
def : ReadAdvance<ReadST, 0>;
|
||||
|
||||
//---
|
||||
// 7.8.9,7.8.11. Load/Store, paired
|
||||
|
@ -302,7 +304,8 @@ def : WriteRes<WriteSys, []> {let Latency = -1;}
|
|||
// 7.9 Vector Unit Instructions
|
||||
|
||||
// Simple vector operations take 2 cycles.
|
||||
def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
|
||||
def : WriteRes<WriteVd, [CyUnitV]> {let Latency = 2;}
|
||||
def : WriteRes<WriteVq, [CyUnitV]> {let Latency = 2;}
|
||||
|
||||
// Define some longer latency vector op types for Cyclone.
|
||||
def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
|
||||
|
@ -333,7 +336,7 @@ def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
|
|||
// COPY is handled above in the WriteMov Variant.
|
||||
def WriteVMov : SchedWriteVariant<[
|
||||
SchedVar<WriteVMovPred, [WriteX]>,
|
||||
SchedVar<NoSchedPred, [WriteV]>]>;
|
||||
SchedVar<NoSchedPred, [WriteVq]>]>;
|
||||
def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
|
||||
|
||||
// FMOVSr,FMOVDr are WriteF.
|
||||
|
@ -353,7 +356,7 @@ def : WriteRes<WriteFCopy, [CyUnitLS]> {
|
|||
def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
|
||||
|
||||
// INS V[x],R
|
||||
def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
|
||||
def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteVq]>;
|
||||
def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
|
||||
|
||||
// SMOV,UMOV R,V[x]
|
||||
|
@ -495,7 +498,7 @@ def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>;
|
|||
// WriteV includes:
|
||||
// SHLL,SSHLL,USHLL
|
||||
// SLI,SRI
|
||||
// BIF,BIT,BSL
|
||||
// BIF,BIT,BSL,BSP
|
||||
// EXT
|
||||
// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN
|
||||
// XTN2
|
||||
|
@ -569,7 +572,7 @@ def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
|
|||
//---
|
||||
|
||||
// FCVT lengthen f16/s32
|
||||
def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
|
||||
def : InstRW<[WriteVq], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
|
||||
|
||||
// FCVT,FCVTN,FCVTXN
|
||||
// SCVTF,UCVTF V,V
|
||||
|
@ -679,61 +682,61 @@ def : InstRW<[WriteVLDShuffle],
|
|||
def : InstRW<[WriteVLDShuffle, WriteAdr],
|
||||
(instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
|
||||
(instregex "LD2Twov(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
|
||||
(instregex "LD2Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
|
||||
(instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
|
||||
(instregex "LD2i(8|16|32)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
|
||||
(instregex "LD2i(8|16|32)_POST")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
|
||||
(instregex "LD2i64$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
|
||||
(instregex "LD2i64_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
|
||||
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD3Threev(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD3Threev(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
|
||||
(instregex "LD3Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
|
||||
(instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq],
|
||||
(instregex "LD3i(8|16|32)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq],
|
||||
(instregex "LD3i(8|16|32)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD3i64$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD3i64_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq],
|
||||
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
|
||||
(instrs LD3Rv1d,LD3Rv2d)>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
|
||||
(instrs LD3Rv1d_POST,LD3Rv2d_POST)>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instregex "LD4Fourv(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instregex "LD4Fourv(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
|
||||
WriteVLDPairShuffle, WriteVLDPairShuffle],
|
||||
|
@ -742,25 +745,25 @@ def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
|
|||
WriteVLDPairShuffle, WriteVLDPairShuffle],
|
||||
(instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq, WriteVq],
|
||||
(instregex "LD4i(8|16|32)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq, WriteVq],
|
||||
(instregex "LD4i(8|16|32)_POST")>;
|
||||
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instrs LD4i64)>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
|
||||
(instrs LD4i64_POST)>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq, WriteVq],
|
||||
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq, WriteVq],
|
||||
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instrs LD4Rv1d,LD4Rv2d)>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
|
||||
|
||||
//---
|
||||
|
|
|
@ -1,847 +0,0 @@
|
|||
//=- AArch64SchedExynosM1.td - Samsung Exynos M1 Sched Defs --*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the machine model for the Samsung Exynos M1 to support
|
||||
// instruction scheduling and other instruction cost heuristics.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// The Exynos-M1 is a traditional superscalar microprocessor with a
|
||||
// 4-wide in-order stage for decode and dispatch and a wider issue stage.
|
||||
// The execution units and loads and stores are out-of-order.
|
||||
|
||||
def ExynosM1Model : SchedMachineModel {
|
||||
let IssueWidth = 4; // Up to 4 uops per cycle.
|
||||
let MicroOpBufferSize = 96; // ROB size.
|
||||
let LoopMicroOpBufferSize = 24; // Based on the instruction queue size.
|
||||
let LoadLatency = 4; // Optimistic load cases.
|
||||
let MispredictPenalty = 14; // Minimum branch misprediction penalty.
|
||||
let CompleteModel = 1; // Use the default model otherwise.
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define each kind of processor resource and number available on the Exynos-M1,
|
||||
// which has 9 pipelines, each with its own queue with out-of-order dispatch.
|
||||
|
||||
let SchedModel = ExynosM1Model in {
|
||||
|
||||
def M1UnitA : ProcResource<2>; // Simple integer
|
||||
def M1UnitC : ProcResource<1>; // Simple and complex integer
|
||||
def M1UnitD : ProcResource<1>; // Integer division (inside C, serialized)
|
||||
def M1UnitB : ProcResource<2>; // Branch
|
||||
def M1UnitL : ProcResource<1>; // Load
|
||||
def M1UnitS : ProcResource<1>; // Store
|
||||
def M1PipeF0 : ProcResource<1>; // FP #0
|
||||
let Super = M1PipeF0 in {
|
||||
def M1UnitFMAC : ProcResource<1>; // FP multiplication
|
||||
def M1UnitNAL0 : ProcResource<1>; // Simple vector
|
||||
def M1UnitNMISC : ProcResource<1>; // Miscellanea
|
||||
def M1UnitFCVT : ProcResource<1>; // FP conversion
|
||||
def M1UnitNCRYPT : ProcResource<1>; // Cryptographic
|
||||
}
|
||||
def M1PipeF1 : ProcResource<1>; // FP #1
|
||||
let Super = M1PipeF1 in {
|
||||
def M1UnitFADD : ProcResource<1>; // Simple FP
|
||||
def M1UnitNAL1 : ProcResource<1>; // Simple vector
|
||||
def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized)
|
||||
def M1UnitFST : ProcResource<1>; // FP store
|
||||
}
|
||||
|
||||
def M1UnitALU : ProcResGroup<[M1UnitA,
|
||||
M1UnitC]>; // All integer
|
||||
def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
|
||||
M1UnitNAL1]>; // All simple vector
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Predicates.
|
||||
|
||||
def M1BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
|
||||
MI->getOperand(0).getReg() != AArch64::LR}]>;
|
||||
def M1ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Coarse scheduling model.
|
||||
|
||||
def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
|
||||
def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; }
|
||||
def M1WriteAA : SchedWriteRes<[M1UnitALU]> { let Latency = 2;
|
||||
let ResourceCycles = [2]; }
|
||||
def M1WriteAB : SchedWriteRes<[M1UnitALU,
|
||||
M1UnitC]> { let Latency = 1;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteAC : SchedWriteRes<[M1UnitALU,
|
||||
M1UnitALU,
|
||||
M1UnitC]> { let Latency = 2;
|
||||
let NumMicroOps = 3; }
|
||||
def M1WriteAD : SchedWriteRes<[M1UnitALU,
|
||||
M1UnitC]> { let Latency = 2;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M1WriteAA]>]>;
|
||||
def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
|
||||
def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
|
||||
|
||||
def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
|
||||
def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkFastPred, [M1WriteAB]>,
|
||||
SchedVar<NoSchedPred, [M1WriteAC]>]>;
|
||||
|
||||
def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
|
||||
def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
|
||||
def M1WriteLA : SchedWriteRes<[M1UnitL]> { let Latency = 6;
|
||||
let ResourceCycles = [2]; }
|
||||
def M1WriteLB : SchedWriteRes<[M1UnitL,
|
||||
M1UnitA]> { let Latency = 4;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteLC : SchedWriteRes<[M1UnitL,
|
||||
M1UnitA]> { let Latency = 5;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteLD : SchedWriteRes<[M1UnitL,
|
||||
M1UnitA]> { let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2, 1]; }
|
||||
def M1WriteLH : SchedWriteRes<[]> { let Latency = 5;
|
||||
let NumMicroOps = 0; }
|
||||
def M1WriteLX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
|
||||
SchedVar<NoSchedPred, [M1WriteLC]>]>;
|
||||
def M1WriteLY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
|
||||
SchedVar<NoSchedPred, [M1WriteLD]>]>;
|
||||
|
||||
def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
|
||||
def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; }
|
||||
def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
|
||||
def M1WriteSA : SchedWriteRes<[M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST]> { let Latency = 1;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteSB : SchedWriteRes<[M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitA]> { let Latency = 3;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteSC : SchedWriteRes<[M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitA]> { let Latency = 3;
|
||||
let NumMicroOps = 3; }
|
||||
def M1WriteSD : SchedWriteRes<[M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitA]> { let Latency = 1;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteSE : SchedWriteRes<[M1UnitS,
|
||||
M1UnitA]> { let Latency = 2;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteSX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
|
||||
SchedVar<NoSchedPred, [M1WriteSE]>]>;
|
||||
def M1WriteSY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
|
||||
SchedVar<NoSchedPred, [M1WriteSB]>]>;
|
||||
|
||||
def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
|
||||
SchedVar<NoSchedPred, [ReadDefault]>]>;
|
||||
|
||||
// Branch instructions.
|
||||
def : WriteRes<WriteBr, []> { let Latency = 0; }
|
||||
def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
|
||||
|
||||
// Arithmetic and logical integer instructions.
|
||||
def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; }
|
||||
def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
|
||||
def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
|
||||
def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; }
|
||||
|
||||
// Move instructions.
|
||||
def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; }
|
||||
|
||||
// Divide and multiply instructions.
|
||||
def : WriteRes<WriteID32, [M1UnitC,
|
||||
M1UnitD]> { let Latency = 13;
|
||||
let ResourceCycles = [1, 13]; }
|
||||
def : WriteRes<WriteID64, [M1UnitC,
|
||||
M1UnitD]> { let Latency = 21;
|
||||
let ResourceCycles = [1, 21]; }
|
||||
// TODO: Long multiplication take 5 cycles and also the ALU.
|
||||
def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
|
||||
def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4;
|
||||
let ResourceCycles = [2]; }
|
||||
|
||||
// Miscellaneous instructions.
|
||||
def : WriteRes<WriteExtr, [M1UnitALU,
|
||||
M1UnitALU]> { let Latency = 2;
|
||||
let NumMicroOps = 2; }
|
||||
|
||||
// Addressing modes.
|
||||
def : WriteRes<WriteAdr, []> { let Latency = 1;
|
||||
let NumMicroOps = 0; }
|
||||
def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
|
||||
|
||||
// Load instructions.
|
||||
def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; }
|
||||
def : WriteRes<WriteLDHi, []> { let Latency = 4;
|
||||
let NumMicroOps = 0; }
|
||||
def : SchedAlias<WriteLDIdx, M1WriteLX>;
|
||||
|
||||
// Store instructions.
|
||||
def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; }
|
||||
def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; }
|
||||
def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; }
|
||||
def : SchedAlias<WriteSTIdx, M1WriteSX>;
|
||||
|
||||
// FP data instructions.
|
||||
def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
|
||||
def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15;
|
||||
let ResourceCycles = [15]; }
|
||||
def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; }
|
||||
|
||||
// FP miscellaneous instructions.
|
||||
def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; }
|
||||
def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; }
|
||||
|
||||
// FP load instructions.
|
||||
def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; }
|
||||
|
||||
// FP store instructions.
|
||||
def : WriteRes<WriteVST, [M1UnitS,
|
||||
M1UnitFST]> { let Latency = 1;
|
||||
let NumMicroOps = 1; }
|
||||
|
||||
// ASIMD FP instructions.
|
||||
def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
|
||||
|
||||
// Other miscellaneous instructions.
|
||||
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
|
||||
def : WriteRes<WriteHint, []> { let Latency = 1; }
|
||||
def : WriteRes<WriteSys, []> { let Latency = 1; }
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Fast forwarding.
|
||||
|
||||
// TODO: Add FP register forwarding rules.
|
||||
def : ReadAdvance<ReadI, 0>;
|
||||
def : ReadAdvance<ReadISReg, 0>;
|
||||
def : ReadAdvance<ReadIEReg, 0>;
|
||||
def : ReadAdvance<ReadIM, 0>;
|
||||
// TODO: The forwarding for WriteIM32 saves actually 2 cycles.
|
||||
def : ReadAdvance<ReadIMA, 3, [WriteIM32, WriteIM64]>;
|
||||
def : ReadAdvance<ReadID, 0>;
|
||||
def : ReadAdvance<ReadExtrHi, 0>;
|
||||
def : ReadAdvance<ReadAdrBase, 0>;
|
||||
def : ReadAdvance<ReadVLD, 0>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Finer scheduling model.
|
||||
|
||||
def M1WriteNEONA : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitNALU,
|
||||
M1UnitFADD]> { let Latency = 9;
|
||||
let NumMicroOps = 3; }
|
||||
def M1WriteNEONB : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitFST]> { let Latency = 5;
|
||||
let NumMicroOps = 2;}
|
||||
def M1WriteNEONC : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitFST]> { let Latency = 6;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteNEOND : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitFST,
|
||||
M1UnitL]> { let Latency = 10;
|
||||
let NumMicroOps = 3; }
|
||||
def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT,
|
||||
M1UnitFST]> { let Latency = 8;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT,
|
||||
M1UnitFST,
|
||||
M1UnitL]> { let Latency = 13;
|
||||
let NumMicroOps = 3; }
|
||||
def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC,
|
||||
M1UnitFST]> { let Latency = 6;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteNEONH : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitFST]> { let Latency = 3;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteNEONI : SchedWriteRes<[M1UnitFST,
|
||||
M1UnitL]> { let Latency = 9;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteNEONJ : SchedWriteRes<[M1UnitNMISC,
|
||||
M1UnitFMAC]> { let Latency = 6;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteNEONK : SchedWriteRes<[M1UnitNMISC,
|
||||
M1UnitFMAC]> { let Latency = 7;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteNEONL : SchedWriteRes<[M1UnitNALU]> { let Latency = 2;
|
||||
let ResourceCycles = [2]; }
|
||||
def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; }
|
||||
def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; }
|
||||
def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; }
|
||||
def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; }
|
||||
def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; }
|
||||
// TODO
|
||||
def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15;
|
||||
let ResourceCycles = [15]; }
|
||||
def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23;
|
||||
let ResourceCycles = [23]; }
|
||||
def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; }
|
||||
def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; }
|
||||
def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; }
|
||||
def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; }
|
||||
def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; }
|
||||
def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
|
||||
def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; }
|
||||
def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; }
|
||||
def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; }
|
||||
def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; }
|
||||
def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; }
|
||||
def M1WriteTB : SchedWriteRes<[M1UnitC,
|
||||
M1UnitALU]> { let Latency = 2;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteVLDA : SchedWriteRes<[M1UnitL,
|
||||
M1UnitL]> { let Latency = 6;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteVLDB : SchedWriteRes<[M1UnitL,
|
||||
M1UnitL,
|
||||
M1UnitL]> { let Latency = 7;
|
||||
let NumMicroOps = 3; }
|
||||
def M1WriteVLDC : SchedWriteRes<[M1UnitL,
|
||||
M1UnitL,
|
||||
M1UnitL,
|
||||
M1UnitL]> { let Latency = 8;
|
||||
let NumMicroOps = 4; }
|
||||
def M1WriteVLDD : SchedWriteRes<[M1UnitL,
|
||||
M1UnitNALU]> { let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2, 1]; }
|
||||
def M1WriteVLDE : SchedWriteRes<[M1UnitL,
|
||||
M1UnitNALU]> { let Latency = 6;
|
||||
let NumMicroOps = 2; }
|
||||
def M1WriteVLDF : SchedWriteRes<[M1UnitL,
|
||||
M1UnitL]> { let Latency = 10;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1, 1]; }
|
||||
def M1WriteVLDG : SchedWriteRes<[M1UnitL,
|
||||
M1UnitNALU,
|
||||
M1UnitNALU]> { let Latency = 7;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2, 1, 1]; }
|
||||
def M1WriteVLDH : SchedWriteRes<[M1UnitL,
|
||||
M1UnitNALU,
|
||||
M1UnitNALU]> { let Latency = 6;
|
||||
let NumMicroOps = 3; }
|
||||
def M1WriteVLDI : SchedWriteRes<[M1UnitL,
|
||||
M1UnitL,
|
||||
M1UnitL]> { let Latency = 12;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2, 2, 2]; }
|
||||
def M1WriteVLDJ : SchedWriteRes<[M1UnitL,
|
||||
M1UnitNALU,
|
||||
M1UnitNALU,
|
||||
M1UnitNALU]> { let Latency = 9;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [2, 1, 1, 1]; }
|
||||
def M1WriteVLDK : SchedWriteRes<[M1UnitL,
|
||||
M1UnitNALU,
|
||||
M1UnitNALU,
|
||||
M1UnitNALU,
|
||||
M1UnitNALU]> { let Latency = 9;
|
||||
let NumMicroOps = 5;
|
||||
let ResourceCycles = [2, 1, 1, 1, 1]; }
|
||||
def M1WriteVLDL : SchedWriteRes<[M1UnitL,
|
||||
M1UnitNALU,
|
||||
M1UnitNALU,
|
||||
M1UnitL,
|
||||
M1UnitNALU]> { let Latency = 7;
|
||||
let NumMicroOps = 5;
|
||||
let ResourceCycles = [1, 1, 1, 1, 1]; }
|
||||
def M1WriteVLDM : SchedWriteRes<[M1UnitL,
|
||||
M1UnitNALU,
|
||||
M1UnitNALU,
|
||||
M1UnitL,
|
||||
M1UnitNALU,
|
||||
M1UnitNALU]> { let Latency = 7;
|
||||
let NumMicroOps = 6;
|
||||
let ResourceCycles = [1, 1, 1, 1, 1, 1]; }
|
||||
def M1WriteVLDN : SchedWriteRes<[M1UnitL,
|
||||
M1UnitL,
|
||||
M1UnitL,
|
||||
M1UnitL]> { let Latency = 14;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [2, 1, 2, 1]; }
|
||||
def M1WriteVSTA : WriteSequence<[WriteVST], 2>;
|
||||
def M1WriteVSTB : WriteSequence<[WriteVST], 3>;
|
||||
def M1WriteVSTC : WriteSequence<[WriteVST], 4>;
|
||||
def M1WriteVSTD : SchedWriteRes<[M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitFST]> { let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [7, 1, 1]; }
|
||||
def M1WriteVSTE : SchedWriteRes<[M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitFST]> { let Latency = 8;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [7, 1, 1, 1, 1]; }
|
||||
def M1WriteVSTF : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitFST,
|
||||
M1UnitFST]> { let Latency = 15;
|
||||
let NumMicroOps = 5;
|
||||
let ResourceCycles = [1, 7, 1, 7, 1, 1, 1]; }
|
||||
def M1WriteVSTG : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitFST,
|
||||
M1UnitFST]> { let Latency = 16;
|
||||
let NumMicroOps = 6;
|
||||
let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1]; }
|
||||
def M1WriteVSTH : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitFST,
|
||||
M1UnitFST]> { let Latency = 14;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [1, 7, 1, 7, 1]; }
|
||||
def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitS,
|
||||
M1UnitFST,
|
||||
M1UnitFST,
|
||||
M1UnitFST]> { let Latency = 17;
|
||||
let NumMicroOps = 7;
|
||||
let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
|
||||
|
||||
// Branch instructions
|
||||
def : InstRW<[M1WriteB1], (instrs Bcc)>;
|
||||
def : InstRW<[M1WriteA1], (instrs BL)>;
|
||||
def : InstRW<[M1WriteBX], (instrs BLR)>;
|
||||
def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
|
||||
def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
|
||||
|
||||
// Arithmetic and logical integer instructions.
|
||||
def : InstRW<[M1WriteA1], (instrs COPY)>;
|
||||
def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
|
||||
|
||||
// Divide and multiply instructions.
|
||||
|
||||
// Miscellaneous instructions.
|
||||
|
||||
// Load instructions.
|
||||
def : InstRW<[M1WriteLB,
|
||||
WriteLDHi,
|
||||
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
|
||||
def : InstRW<[M1WriteLX,
|
||||
ReadAdrBase], (instregex "^PRFMro[WX]")>;
|
||||
|
||||
// Store instructions.
|
||||
|
||||
// FP data instructions.
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
|
||||
def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>;
|
||||
def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>;
|
||||
def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>;
|
||||
def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>;
|
||||
def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>;
|
||||
def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>;
|
||||
def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>;
|
||||
def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
|
||||
def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>;
|
||||
def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>;
|
||||
def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>;
|
||||
def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>;
|
||||
|
||||
// FP miscellaneous instructions.
|
||||
def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>;
|
||||
def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
|
||||
def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>;
|
||||
def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev1")>;
|
||||
def : InstRW<[M1WriteNMISC1], (instregex "^FRECPXv1")>;
|
||||
def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)S(16|32|64)")>;
|
||||
def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>;
|
||||
def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>;
|
||||
|
||||
// FP load instructions.
|
||||
def : InstRW<[WriteVLD], (instregex "^LDR[DSQ]l")>;
|
||||
def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
|
||||
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
|
||||
def : InstRW<[M1WriteLY,
|
||||
ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
|
||||
def : InstRW<[M1WriteLD,
|
||||
ReadAdrBase], (instregex "^LDRQro[WX]")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
M1WriteLH], (instregex "^LDN?P[DS]i")>;
|
||||
def : InstRW<[M1WriteLA,
|
||||
M1WriteLH], (instregex "^LDN?PQi")>;
|
||||
def : InstRW<[M1WriteLC,
|
||||
M1WriteLH,
|
||||
WriteAdr], (instregex "^LDP[DS](post|pre)")>;
|
||||
def : InstRW<[M1WriteLD,
|
||||
M1WriteLH,
|
||||
WriteAdr], (instregex "^LDPQ(post|pre)")>;
|
||||
|
||||
// FP store instructions.
|
||||
def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
|
||||
def : InstRW<[WriteVST,
|
||||
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
|
||||
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
|
||||
def : InstRW<[M1WriteSY,
|
||||
ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
|
||||
def : InstRW<[M1WriteSB,
|
||||
ReadAdrBase], (instregex "^STRQro[WX]")>;
|
||||
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
|
||||
def : InstRW<[WriteVST,
|
||||
WriteAdr], (instregex "^STP[DS](post|pre)")>;
|
||||
def : InstRW<[M1WriteSC,
|
||||
WriteAdr], (instregex "^STPQ(post|pre)")>;
|
||||
|
||||
// ASIMD instructions.
|
||||
def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>;
|
||||
def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>;
|
||||
def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>;
|
||||
def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
|
||||
def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>;
|
||||
def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>;
|
||||
def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>;
|
||||
def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>;
|
||||
def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>;
|
||||
def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>;
|
||||
def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
|
||||
def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>;
|
||||
def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>;
|
||||
def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>;
|
||||
def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>;
|
||||
def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>;
|
||||
def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
|
||||
def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
|
||||
def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^SHL[dv]")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^[SU]SH[LR][dv]")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^S[RS]I[dv]")>;
|
||||
def : InstRW<[M1WriteNAL13], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>;
|
||||
def : InstRW<[M1WriteNAL13], (instregex "^[SU]RSH[LR][dv]")>;
|
||||
def : InstRW<[M1WriteNAL13], (instregex "^[SU]QR?SHLU?[bdhsv]")>;
|
||||
|
||||
// ASIMD FP instructions.
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>;
|
||||
def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>;
|
||||
def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>;
|
||||
def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
|
||||
def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>;
|
||||
def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>;
|
||||
def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>;
|
||||
def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>;
|
||||
def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>;
|
||||
def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>;
|
||||
def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
|
||||
def : InstRW<[M1WriteNEONJ], (instregex "^FMULX?v.i")>;
|
||||
def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v.f")>;
|
||||
def : InstRW<[M1WriteNEONK], (instregex "^FML[AS]v.i")>;
|
||||
def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v.f")>;
|
||||
def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>;
|
||||
|
||||
// ASIMD miscellaneous instructions.
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>;
|
||||
def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>;
|
||||
def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^EXTv8")>;
|
||||
def : InstRW<[M1WriteNEONL], (instregex "^EXTv16")>;
|
||||
def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^CPY")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^MOVI[Dv]")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^FMOVv")>;
|
||||
def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev[248]")>;
|
||||
def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>;
|
||||
def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>;
|
||||
def : InstRW<[WriteSequence<[M1WriteNAL11], 2>],
|
||||
(instregex "^TB[LX]v8i8Two")>;
|
||||
def : InstRW<[WriteSequence<[M1WriteNAL11], 3>],
|
||||
(instregex "^TB[LX]v8i8Three")>;
|
||||
def : InstRW<[WriteSequence<[M1WriteNAL11], 4>],
|
||||
(instregex "^TB[LX]v8i8Four")>;
|
||||
def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>;
|
||||
def : InstRW<[WriteSequence<[M1WriteNAL12], 2>],
|
||||
(instregex "^TB[LX]v16i8Two")>;
|
||||
def : InstRW<[WriteSequence<[M1WriteNAL12], 3>],
|
||||
(instregex "^TB[LX]v16i8Three")>;
|
||||
def : InstRW<[WriteSequence<[M1WriteNAL12], 4>],
|
||||
(instregex "^TB[LX]v16i8Four")>;
|
||||
def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>;
|
||||
def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>;
|
||||
def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>;
|
||||
def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>;
|
||||
|
||||
// ASIMD load instructions.
|
||||
def : InstRW<[M1WriteVLDD], (instregex "LD1i(8|16|32)$")>;
|
||||
def : InstRW<[M1WriteVLDD,
|
||||
WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>;
|
||||
def : InstRW<[M1WriteVLDE,
|
||||
WriteAdr], (instregex "LD1i(64)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
|
||||
def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteL5,
|
||||
WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M1WriteVLDA,
|
||||
WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteVLDA,
|
||||
WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M1WriteVLDB,
|
||||
WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteVLDB,
|
||||
WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M1WriteVLDC,
|
||||
WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteVLDC,
|
||||
WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVLDG], (instregex "LD2i(8|16)$")>;
|
||||
def : InstRW<[M1WriteVLDG,
|
||||
WriteAdr], (instregex "LD2i(8|16)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDG], (instregex "LD2i(32)$")>;
|
||||
def : InstRW<[M1WriteVLDG,
|
||||
WriteAdr], (instregex "LD2i(32)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDH], (instregex "LD2i(64)$")>;
|
||||
def : InstRW<[M1WriteVLDH,
|
||||
WriteAdr], (instregex "LD2i(64)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteVLDA,
|
||||
WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(1d)$")>;
|
||||
def : InstRW<[M1WriteVLDA,
|
||||
WriteAdr], (instregex "LD2Rv(1d)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteVLDA,
|
||||
WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteVLDF,
|
||||
WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(16b|8h|4s)$")>;
|
||||
def : InstRW<[M1WriteVLDF,
|
||||
WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(2d)$")>;
|
||||
def : InstRW<[M1WriteVLDF,
|
||||
WriteAdr], (instregex "LD2Twov(2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVLDJ], (instregex "LD3i(8|16)$")>;
|
||||
def : InstRW<[M1WriteVLDJ,
|
||||
WriteAdr], (instregex "LD3i(8|16)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDJ], (instregex "LD3i(32)$")>;
|
||||
def : InstRW<[M1WriteVLDJ,
|
||||
WriteAdr], (instregex "LD3i(32)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDL], (instregex "LD3i(64)$")>;
|
||||
def : InstRW<[M1WriteVLDL,
|
||||
WriteAdr], (instregex "LD3i(64)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteVLDB,
|
||||
WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(1d)$")>;
|
||||
def : InstRW<[M1WriteVLDB,
|
||||
WriteAdr], (instregex "LD3Rv(1d)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(16b|8h|4s)$")>;
|
||||
def : InstRW<[M1WriteVLDB,
|
||||
WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(2d)$")>;
|
||||
def : InstRW<[M1WriteVLDB,
|
||||
WriteAdr], (instregex "LD3Rv(2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteVLDI,
|
||||
WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(16b|8h|4s)$")>;
|
||||
def : InstRW<[M1WriteVLDI,
|
||||
WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(2d)$")>;
|
||||
def : InstRW<[M1WriteVLDI,
|
||||
WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVLDK], (instregex "LD4i(8|16)$")>;
|
||||
def : InstRW<[M1WriteVLDK,
|
||||
WriteAdr], (instregex "LD4i(8|16)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDK], (instregex "LD4i(32)$")>;
|
||||
def : InstRW<[M1WriteVLDK,
|
||||
WriteAdr], (instregex "LD4i(32)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDM], (instregex "LD4i(64)$")>;
|
||||
def : InstRW<[M1WriteVLDM,
|
||||
WriteAdr], (instregex "LD4i(64)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteVLDC,
|
||||
WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(1d)$")>;
|
||||
def : InstRW<[M1WriteVLDC,
|
||||
WriteAdr], (instregex "LD4Rv(1d)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(16b|8h|4s)$")>;
|
||||
def : InstRW<[M1WriteVLDC,
|
||||
WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(2d)$")>;
|
||||
def : InstRW<[M1WriteVLDC,
|
||||
WriteAdr], (instregex "LD4Rv(2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteVLDN,
|
||||
WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(16b|8h|4s)$")>;
|
||||
def : InstRW<[M1WriteVLDN,
|
||||
WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(2d)$")>;
|
||||
def : InstRW<[M1WriteVLDN,
|
||||
WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
|
||||
|
||||
// ASIMD store instructions.
|
||||
def : InstRW<[M1WriteVSTD], (instregex "ST1i(8|16|32)$")>;
|
||||
def : InstRW<[M1WriteVSTD,
|
||||
WriteAdr], (instregex "ST1i(8|16|32)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTD], (instregex "ST1i(64)$")>;
|
||||
def : InstRW<[M1WriteVSTD,
|
||||
WriteAdr], (instregex "ST1i(64)_POST$")>;
|
||||
|
||||
def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[WriteVST,
|
||||
WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVST,
|
||||
WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M1WriteVSTA,
|
||||
WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteVSTA,
|
||||
WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M1WriteVSTB,
|
||||
WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteVSTB,
|
||||
WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M1WriteVSTC,
|
||||
WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M1WriteVSTC,
|
||||
WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVSTD], (instregex "ST2i(8|16|32)$")>;
|
||||
def : InstRW<[M1WriteVSTD,
|
||||
WriteAdr], (instregex "ST2i(8|16|32)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTD], (instregex "ST2i(64)$")>;
|
||||
def : InstRW<[M1WriteVSTD,
|
||||
WriteAdr], (instregex "ST2i(64)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteVSTD,
|
||||
WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(16b|8h|4s)$")>;
|
||||
def : InstRW<[M1WriteVSTE,
|
||||
WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(2d)$")>;
|
||||
def : InstRW<[M1WriteVSTE,
|
||||
WriteAdr], (instregex "ST2Twov(2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVSTH], (instregex "ST3i(8|16)$")>;
|
||||
def : InstRW<[M1WriteVSTH,
|
||||
WriteAdr], (instregex "ST3i(8|16)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTH], (instregex "ST3i(32)$")>;
|
||||
def : InstRW<[M1WriteVSTH,
|
||||
WriteAdr], (instregex "ST3i(32)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTF], (instregex "ST3i(64)$")>;
|
||||
def : InstRW<[M1WriteVSTF,
|
||||
WriteAdr], (instregex "ST3i(64)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteVSTF,
|
||||
WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(16b|8h|4s)$")>;
|
||||
def : InstRW<[M1WriteVSTG,
|
||||
WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(2d)$")>;
|
||||
def : InstRW<[M1WriteVSTG,
|
||||
WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVSTH], (instregex "ST4i(8|16)$")>;
|
||||
def : InstRW<[M1WriteVSTH,
|
||||
WriteAdr], (instregex "ST4i(8|16)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTH], (instregex "ST4i(32)$")>;
|
||||
def : InstRW<[M1WriteVSTH,
|
||||
WriteAdr], (instregex "ST4i(32)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTF], (instregex "ST4i(64)$")>;
|
||||
def : InstRW<[M1WriteVSTF,
|
||||
WriteAdr], (instregex "ST4i(64)_POST$")>;
|
||||
|
||||
def : InstRW<[M1WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>;
|
||||
def : InstRW<[M1WriteVSTF,
|
||||
WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(16b|8h|4s)$")>;
|
||||
def : InstRW<[M1WriteVSTI,
|
||||
WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
|
||||
def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(2d)$")>;
|
||||
def : InstRW<[M1WriteVSTI,
|
||||
WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
|
||||
|
||||
// Cryptography instructions.
|
||||
def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
|
||||
def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
|
||||
def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
|
||||
def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
|
||||
|
||||
def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
|
||||
def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
|
||||
def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>;
|
||||
def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>;
|
||||
def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>;
|
||||
|
||||
// CRC instructions.
|
||||
def : InstRW<[M1WriteC2], (instregex "^CRC32")>;
|
||||
|
||||
} // SchedModel = ExynosM1Model
|
|
@ -1,9 +1,8 @@
|
|||
//=- AArch64SchedExynosM3.td - Samsung Exynos M3 Sched Defs --*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -25,10 +24,9 @@ def ExynosM3Model : SchedMachineModel {
|
|||
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
|
||||
let CompleteModel = 1; // Use the default model otherwise.
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -106,24 +104,13 @@ def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0,
|
|||
M3UnitNSHF1,
|
||||
M3UnitNSHF2]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Predicates.
|
||||
|
||||
def M3BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
|
||||
MI->getOperand(0).isReg() &&
|
||||
MI->getOperand(0).getReg() != AArch64::LR}]>;
|
||||
def M3ResetFastPred : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>;
|
||||
def M3RotateRightFastPred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri ||
|
||||
MI->getOpcode() == AArch64::EXTRXrri) &&
|
||||
MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
|
||||
MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>;
|
||||
def M3ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Coarse scheduling model.
|
||||
|
||||
def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0;
|
||||
let NumMicroOps = 1; }
|
||||
def M3WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
|
||||
let NumMicroOps = 0; }
|
||||
|
||||
def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; }
|
||||
def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2;
|
||||
|
@ -140,15 +127,25 @@ def M3WriteAD : SchedWriteRes<[M3UnitALU,
|
|||
let NumMicroOps = 2; }
|
||||
def M3WriteC1 : SchedWriteRes<[M3UnitC]> { let Latency = 1; }
|
||||
def M3WriteC2 : SchedWriteRes<[M3UnitC]> { let Latency = 2; }
|
||||
def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
|
||||
SchedVar<M3ShiftLeftFastPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotateRightFastPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
|
||||
SchedVar<ExynosArithPred, [M3WriteA1]>,
|
||||
SchedVar<ExynosLogicPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
|
||||
SchedVar<ExynosArithPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
|
||||
SchedVar<ExynosLogicPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M3WriteA1]>,
|
||||
SchedVar<ExynosLogicPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
def M3WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M3WriteA1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAA]>]>;
|
||||
|
||||
def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
|
||||
def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkFastPred, [M3WriteAB]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAC]>]>;
|
||||
def M3WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M3WriteAC]>,
|
||||
SchedVar<NoSchedPred, [M3WriteAB]>]>;
|
||||
|
||||
def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; }
|
||||
def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; }
|
||||
|
@ -165,44 +162,46 @@ def M3WriteLC : SchedWriteRes<[M3UnitA,
|
|||
def M3WriteLD : SchedWriteRes<[M3UnitA,
|
||||
M3UnitL]> { let Latency = 4;
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteLE : SchedWriteRes<[M3UnitA,
|
||||
M3UnitL]> { let Latency = 6;
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteLH : SchedWriteRes<[]> { let Latency = 5;
|
||||
let NumMicroOps = 0; }
|
||||
|
||||
def M3WriteLX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteL5]>,
|
||||
SchedVar<NoSchedPred, [M3WriteLB]>]>;
|
||||
def M3WriteLX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteL5]>,
|
||||
SchedVar<NoSchedPred, [M3WriteL4]>]>;
|
||||
def M3WriteLY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteLE]>,
|
||||
SchedVar<NoSchedPred, [M3WriteL5]>]>;
|
||||
|
||||
def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; }
|
||||
def M3WriteSA : SchedWriteRes<[M3UnitA,
|
||||
M3UnitS,
|
||||
M3UnitFST]> { let Latency = 2;
|
||||
M3UnitFST]> { let Latency = 3;
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteSB : SchedWriteRes<[M3UnitA,
|
||||
M3UnitS]> { let Latency = 1;
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteSC : SchedWriteRes<[M3UnitA,
|
||||
M3UnitS]> { let Latency = 2;
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteSC : SchedWriteRes<[M3UnitA,
|
||||
M3UnitS,
|
||||
M3UnitFST]> { let Latency = 1;
|
||||
let NumMicroOps = 2; }
|
||||
def M3WriteSY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteSA]>,
|
||||
SchedVar<NoSchedPred, [WriteVST]>]>;
|
||||
|
||||
def M3WriteSX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteSB]>]>;
|
||||
def M3WriteSY : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteSC]>]>;
|
||||
|
||||
def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
|
||||
SchedVar<NoSchedPred, [ReadDefault]>]>;
|
||||
def M3ReadAdrBase : SchedReadVariant<[SchedVar<ExynosScaledIdxPred, [ReadDefault]>,
|
||||
SchedVar<NoSchedPred, [ReadDefault]>]>;
|
||||
|
||||
// Branch instructions.
|
||||
def : SchedAlias<WriteBr, M3WriteZ0>;
|
||||
def : WriteRes<WriteBrReg, [M3UnitC]> { let Latency = 1; }
|
||||
def : SchedAlias<WriteBrReg, M3WriteC1>;
|
||||
|
||||
// Arithmetic and logical integer instructions.
|
||||
def : WriteRes<WriteI, [M3UnitALU]> { let Latency = 1; }
|
||||
def : WriteRes<WriteISReg, [M3UnitALU]> { let Latency = 1; }
|
||||
def : WriteRes<WriteIEReg, [M3UnitALU]> { let Latency = 1; }
|
||||
def : WriteRes<WriteIS, [M3UnitALU]> { let Latency = 1; }
|
||||
def : SchedAlias<WriteI, M3WriteA1>;
|
||||
def : SchedAlias<WriteISReg, M3WriteA1>;
|
||||
def : SchedAlias<WriteIEReg, M3WriteA1>;
|
||||
def : SchedAlias<WriteIS, M3WriteA1>;
|
||||
|
||||
// Move instructions.
|
||||
def : WriteRes<WriteImm, [M3UnitALU]> { let Latency = 1; }
|
||||
def : SchedAlias<WriteImm, M3WriteA1>;
|
||||
|
||||
// Divide and multiply instructions.
|
||||
def : WriteRes<WriteID32, [M3UnitC,
|
||||
|
@ -216,26 +215,23 @@ def : WriteRes<WriteIM64, [M3UnitC]> { let Latency = 4;
|
|||
let ResourceCycles = [2]; }
|
||||
|
||||
// Miscellaneous instructions.
|
||||
def : WriteRes<WriteExtr, [M3UnitALU,
|
||||
M3UnitALU]> { let Latency = 1;
|
||||
let NumMicroOps = 2; }
|
||||
def : SchedAlias<WriteExtr, M3WriteAY>;
|
||||
|
||||
// Addressing modes.
|
||||
def : WriteRes<WriteAdr, []> { let Latency = 1;
|
||||
let NumMicroOps = 0; }
|
||||
def : SchedAlias<WriteAdr, M3WriteZ1>;
|
||||
def : SchedAlias<ReadAdrBase, M3ReadAdrBase>;
|
||||
|
||||
// Load instructions.
|
||||
def : SchedAlias<WriteLD, M3WriteL4>;
|
||||
def : WriteRes<WriteLDHi, []> { let Latency = 4;
|
||||
let NumMicroOps = 0; }
|
||||
def : SchedAlias<WriteLDIdx, M3WriteLX>;
|
||||
def : SchedAlias<WriteLDIdx, M3WriteLB>;
|
||||
|
||||
// Store instructions.
|
||||
def : SchedAlias<WriteST, M3WriteS1>;
|
||||
def : SchedAlias<WriteSTP, M3WriteS1>;
|
||||
def : SchedAlias<WriteSTX, M3WriteS1>;
|
||||
def : SchedAlias<WriteSTIdx, M3WriteSX>;
|
||||
def : SchedAlias<WriteSTIdx, M3WriteSB>;
|
||||
|
||||
// FP data instructions.
|
||||
def : WriteRes<WriteF, [M3UnitFADD]> { let Latency = 2; }
|
||||
|
@ -245,7 +241,6 @@ def : WriteRes<WriteFDiv, [M3UnitFDIV]> { let Latency = 12;
|
|||
def : WriteRes<WriteFMul, [M3UnitFMAC]> { let Latency = 4; }
|
||||
|
||||
// FP miscellaneous instructions.
|
||||
// TODO: Conversion between register files is much different.
|
||||
def : WriteRes<WriteFCvt, [M3UnitFCVT]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFImm, [M3UnitNALU]> { let Latency = 1; }
|
||||
def : WriteRes<WriteFCopy, [M3UnitNALU]> { let Latency = 1; }
|
||||
|
@ -259,7 +254,8 @@ def : WriteRes<WriteVST, [M3UnitS,
|
|||
let NumMicroOps = 1; }
|
||||
|
||||
// ASIMD FP instructions.
|
||||
def : WriteRes<WriteV, [M3UnitNALU]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVd, [M3UnitNALU]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVq, [M3UnitNALU]> { let Latency = 3; }
|
||||
|
||||
// Other miscellaneous instructions.
|
||||
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
|
||||
|
@ -282,6 +278,7 @@ def : ReadAdvance<ReadID, 0>;
|
|||
def : ReadAdvance<ReadExtrHi, 0>;
|
||||
def : ReadAdvance<ReadAdrBase, 0>;
|
||||
def : ReadAdvance<ReadVLD, 0>;
|
||||
def : ReadAdvance<ReadST, 0>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Finer scheduling model.
|
||||
|
@ -481,11 +478,15 @@ def M3WriteVSTI : SchedWriteRes<[M3UnitNALU,
|
|||
|
||||
// Special cases.
|
||||
def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; }
|
||||
def M3WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M3WriteNALU1]>,
|
||||
SchedVar<NoSchedPred, [M3WriteZ0]>]>;
|
||||
def M3WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M3WriteZ0]>,
|
||||
SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
|
||||
|
||||
// Fast forwarding.
|
||||
def M3ReadAES : SchedReadAdvance<1, [M3WriteAES]>;
|
||||
def M3ReadFMAC : SchedReadAdvance<1, [M3WriteFMAC4,
|
||||
M3WriteFMAC5]>;
|
||||
def M3WriteMOVI : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
|
||||
SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
|
||||
def M3ReadNMUL : SchedReadAdvance<1, [M3WriteNMUL3]>;
|
||||
|
||||
// Branch instructions
|
||||
|
@ -496,29 +497,40 @@ def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>;
|
|||
def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>;
|
||||
|
||||
// Arithmetic and logical integer instructions.
|
||||
def : InstRW<[M3WriteA1], (instrs COPY)>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
|
||||
def : InstRW<[M3WriteAU], (instrs ORRWrs, ORRXrs)>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
|
||||
def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
|
||||
def : InstRW<[M3WriteAV], (instrs ADDWri, ADDXri)>;
|
||||
def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>;
|
||||
|
||||
// Move instructions.
|
||||
def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
|
||||
def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
|
||||
def : InstRW<[M3WriteCOPY], (instrs COPY)>;
|
||||
def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
|
||||
def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
|
||||
|
||||
// Divide and multiply instructions.
|
||||
|
||||
// Miscellaneous instructions.
|
||||
def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>;
|
||||
|
||||
// Load instructions.
|
||||
def : InstRW<[M3WriteLD,
|
||||
WriteLDHi,
|
||||
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
|
||||
def : InstRW<[M3WriteLB,
|
||||
ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
|
||||
def : InstRW<[M3WriteLX,
|
||||
ReadAdrBase], (instregex "^PRFMro[WX]")>;
|
||||
ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
|
||||
def : InstRW<[M3WriteLB,
|
||||
ReadAdrBase], (instrs PRFMroW)>;
|
||||
def : InstRW<[M3WriteLX,
|
||||
ReadAdrBase], (instrs PRFMroX)>;
|
||||
|
||||
// Store instructions.
|
||||
def : InstRW<[M3WriteSB,
|
||||
ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
|
||||
def : InstRW<[WriteST,
|
||||
ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
|
||||
|
||||
// FP data instructions.
|
||||
def : InstRW<[M3WriteNSHF1], (instregex "^FABS[DS]r")>;
|
||||
|
@ -555,9 +567,11 @@ def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
|
|||
def : InstRW<[WriteVLD,
|
||||
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
|
||||
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
|
||||
def : InstRW<[M3WriteLX,
|
||||
ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
|
||||
def : InstRW<[M3WriteLB,
|
||||
def : InstRW<[M3WriteLE,
|
||||
ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
|
||||
def : InstRW<[M3WriteLY,
|
||||
ReadAdrBase], (instregex "^LDRQro[WX]")>;
|
||||
def : InstRW<[WriteVLD,
|
||||
M3WriteLH], (instregex "^LDN?P[DS]i")>;
|
||||
|
@ -575,20 +589,24 @@ def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
|
|||
def : InstRW<[WriteVST,
|
||||
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
|
||||
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
|
||||
def : InstRW<[M3WriteSY,
|
||||
ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
|
||||
def : InstRW<[M3WriteSA,
|
||||
ReadAdrBase], (instregex "^STRQro[WX]")>;
|
||||
ReadAdrBase], (instregex "^STR[BDHS]roW")>;
|
||||
def : InstRW<[M3WriteSA,
|
||||
ReadAdrBase], (instregex "^STRQroW")>;
|
||||
def : InstRW<[WriteVST,
|
||||
ReadAdrBase], (instregex "^STR[BDHS]roX")>;
|
||||
def : InstRW<[M3WriteSY,
|
||||
ReadAdrBase], (instregex "^STRQroX")>;
|
||||
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
|
||||
def : InstRW<[WriteVST,
|
||||
WriteAdr], (instregex "^STP[DS](post|pre)")>;
|
||||
def : InstRW<[M3WriteSA,
|
||||
def : InstRW<[M3WriteSC,
|
||||
WriteAdr], (instregex "^STPQ(post|pre)")>;
|
||||
|
||||
// ASIMD instructions.
|
||||
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ABAL?v")>;
|
||||
def : InstRW<[M3WriteNMSC1], (instregex "^[SU]ABDL?v")>;
|
||||
def : InstRW<[M3WriteNMSC1], (instregex "^(SQ)?(ABS|NEG)v")>;
|
||||
def : InstRW<[M3WriteNMSC1], (instregex "^((SQ)?ABS|SQNEG)v")>;
|
||||
def : InstRW<[M3WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
|
||||
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Pv")>;
|
||||
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]H(ADD|SUB)v")>;
|
||||
|
@ -597,7 +615,6 @@ def : InstRW<[M3WriteNMSC3], (instregex "^R?(ADD|SUB)HN2?v")>;
|
|||
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]Q(ADD|SUB)v")>;
|
||||
def : InstRW<[M3WriteNMSC3], (instregex "^(SU|US)QADDv")>;
|
||||
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]RHADDv")>;
|
||||
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Vv")>;
|
||||
def : InstRW<[M3WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
|
||||
def : InstRW<[M3WriteNALU1], (instregex "^CMTSTv")>;
|
||||
def : InstRW<[M3WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
|
||||
|
@ -647,12 +664,12 @@ def : InstRW<[M3WriteNEONY], (instrs FSQRTv2f64)>;
|
|||
|
||||
// ASIMD miscellaneous instructions.
|
||||
def : InstRW<[M3WriteNALU1], (instregex "^RBITv")>;
|
||||
def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>;
|
||||
def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL|BSP)v")>;
|
||||
def : InstRW<[M3WriteNEONB], (instregex "^DUPv.+gpr")>;
|
||||
def : InstRW<[M3WriteNSHF1], (instregex "^DUPv.+lane")>;
|
||||
def : InstRW<[M3WriteNSHF1], (instregex "^EXTv")>;
|
||||
def : InstRW<[M3WriteNSHF1], (instregex "^[SU]?Q?XTU?Nv")>;
|
||||
def : InstRW<[M3WriteNSHF1], (instregex "^CPY")>;
|
||||
def : InstRW<[M3WriteNSHF1], (instregex "^DUP(i8|i16|i32|i64)$")>;
|
||||
def : InstRW<[M3WriteNSHF1], (instregex "^INSv.+lane")>;
|
||||
def : InstRW<[M3WriteMOVI], (instregex "^MOVI")>;
|
||||
def : InstRW<[M3WriteNALU1], (instregex "^FMOVv")>;
|
||||
|
@ -668,108 +685,108 @@ def : InstRW<[M3WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>;
|
|||
// ASIMD load instructions.
|
||||
def : InstRW<[M3WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M3WriteL5,
|
||||
WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
|
||||
M3WriteA1], (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[M3WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteL5,
|
||||
WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M3WriteVLDA,
|
||||
WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
|
||||
M3WriteA1], (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteVLDA,
|
||||
WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M3WriteVLDB,
|
||||
WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
|
||||
M3WriteA1], (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteVLDB,
|
||||
WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M3WriteVLDC,
|
||||
WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
|
||||
M3WriteA1], (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteVLDC,
|
||||
WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDD], (instregex "LD1i(8|16|32)$")>;
|
||||
def : InstRW<[M3WriteVLDD,
|
||||
WriteAdr], (instregex "LD1i(8|16|32)_POST")>;
|
||||
M3WriteA1], (instregex "LD1i(8|16|32)_POST")>;
|
||||
def : InstRW<[M3WriteVLDE], (instregex "LD1i(64)$")>;
|
||||
def : InstRW<[M3WriteVLDE,
|
||||
WriteAdr], (instregex "LD1i(64)_POST")>;
|
||||
M3WriteA1], (instregex "LD1i(64)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteL5], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M3WriteL5,
|
||||
WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
|
||||
M3WriteA1], (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[M3WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteL5,
|
||||
WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[M3WriteVLDF,
|
||||
WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST")>;
|
||||
M3WriteA1], (instregex "LD2Twov(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteVLDF,
|
||||
WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDG], (instregex "LD2i(8|16|32)$")>;
|
||||
def : InstRW<[M3WriteVLDG,
|
||||
WriteAdr], (instregex "LD2i(8|16|32)_POST")>;
|
||||
M3WriteA1], (instregex "LD2i(8|16|32)_POST")>;
|
||||
def : InstRW<[M3WriteVLDH], (instregex "LD2i(64)$")>;
|
||||
def : InstRW<[M3WriteVLDH,
|
||||
WriteAdr], (instregex "LD2i(64)_POST")>;
|
||||
M3WriteA1], (instregex "LD2i(64)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M3WriteVLDA,
|
||||
WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
|
||||
M3WriteA1], (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteVLDA,
|
||||
WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
|
||||
def : InstRW<[M3WriteVLDI,
|
||||
WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST")>;
|
||||
M3WriteA1], (instregex "LD3Threev(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteVLDI,
|
||||
WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
|
||||
def : InstRW<[M3WriteVLDJ,
|
||||
WriteAdr], (instregex "LD3i(8|16|32)_POST")>;
|
||||
M3WriteA1], (instregex "LD3i(8|16|32)_POST")>;
|
||||
def : InstRW<[M3WriteVLDL], (instregex "LD3i(64)$")>;
|
||||
def : InstRW<[M3WriteVLDL,
|
||||
WriteAdr], (instregex "LD3i(64)_POST")>;
|
||||
M3WriteA1], (instregex "LD3i(64)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M3WriteVLDB,
|
||||
WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
|
||||
M3WriteA1], (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteVLDB,
|
||||
WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
|
||||
def : InstRW<[M3WriteVLDN,
|
||||
WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST")>;
|
||||
M3WriteA1], (instregex "LD4Fourv(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteVLDN,
|
||||
WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDK], (instregex "LD4i(8|16|32)$")>;
|
||||
def : InstRW<[M3WriteVLDK,
|
||||
WriteAdr], (instregex "LD4i(8|16|32)_POST")>;
|
||||
M3WriteA1], (instregex "LD4i(8|16|32)_POST")>;
|
||||
def : InstRW<[M3WriteVLDM], (instregex "LD4i(64)$")>;
|
||||
def : InstRW<[M3WriteVLDM,
|
||||
WriteAdr], (instregex "LD4i(64)_POST")>;
|
||||
M3WriteA1], (instregex "LD4i(64)_POST")>;
|
||||
|
||||
def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[M3WriteVLDC,
|
||||
WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
|
||||
M3WriteA1], (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[M3WriteVLDC,
|
||||
WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
|
||||
M3WriteA1], (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
// ASIMD store instructions.
|
||||
def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,9 +1,8 @@
|
|||
//==- AArch64SchedFalkor.td - Falkor Scheduling Definitions -*- tablegen -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -24,8 +23,9 @@ def FalkorModel : SchedMachineModel {
|
|||
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F);
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
@ -92,7 +92,8 @@ def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
|
|||
def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteV, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteVd, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteVq, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteVST, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteSys, []> { let Unsupported = 1; }
|
||||
|
@ -111,6 +112,7 @@ def : ReadAdvance<ReadID, 0>;
|
|||
def : ReadAdvance<ReadExtrHi, 0>;
|
||||
def : ReadAdvance<ReadAdrBase, 0>;
|
||||
def : ReadAdvance<ReadVLD, 0>;
|
||||
def : ReadAdvance<ReadST, 0>;
|
||||
|
||||
// Detailed Refinements
|
||||
// -----------------------------------------------------------------------------
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -909,10 +908,10 @@ def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
|
|||
// -----------------------------------------------------------------------------
|
||||
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
|
||||
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
|
||||
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>;
|
||||
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(i8|i16|i32|i64)$")>;
|
||||
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
|
||||
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
|
||||
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
|
||||
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL|BSP)v8i8$")>;
|
||||
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
|
||||
def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
|
||||
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
|
||||
|
@ -936,7 +935,7 @@ def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
|
|||
def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
|
||||
(instregex "^INSv(i32|i64)(gpr|lane)$")>;
|
||||
def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
|
||||
def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>;
|
||||
def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL|BSP)v16i8$")>;
|
||||
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
|
||||
def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
|
||||
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
//==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -28,8 +27,9 @@ def KryoModel : SchedMachineModel {
|
|||
let LoopMicroOpBufferSize = 16;
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F);
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
@ -95,7 +95,8 @@ def : WriteRes<WriteFMul, [KryoUnitX, KryoUnitX]>
|
|||
{ let Latency = 6; let NumMicroOps = 2; }
|
||||
def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]>
|
||||
{ let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
|
||||
def : WriteRes<WriteV, [KryoUnitXY]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVd, [KryoUnitXY]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVq, [KryoUnitXY]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; }
|
||||
|
||||
|
@ -117,6 +118,7 @@ def : ReadAdvance<ReadID, 0>;
|
|||
def : ReadAdvance<ReadExtrHi, 0>;
|
||||
def : ReadAdvance<ReadAdrBase, 0>;
|
||||
def : ReadAdvance<ReadVLD, 0>;
|
||||
def : ReadAdvance<ReadST, 0>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
//=- AArch64SchedKryoDetails.td - QC Kryo Scheduling Defs ----*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -463,13 +462,13 @@ def KryoWrite_1cyc_X_noRSV_74ln :
|
|||
let Latency = 1; let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[KryoWrite_1cyc_X_noRSV_74ln],
|
||||
(instrs BIFv8i8, BITv8i8, BSLv8i8)>;
|
||||
(instrs BIFv8i8, BITv8i8, BSLv8i8, BSPv8i8)>;
|
||||
def KryoWrite_1cyc_X_X_75ln :
|
||||
SchedWriteRes<[KryoUnitX, KryoUnitX]> {
|
||||
let Latency = 1; let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[KryoWrite_1cyc_X_X_75ln],
|
||||
(instrs BIFv16i8, BITv16i8, BSLv16i8)>;
|
||||
(instrs BIFv16i8, BITv16i8, BSLv16i8, BSPv16i8)>;
|
||||
def KryoWrite_0cyc_noRSV_11ln :
|
||||
SchedWriteRes<[]> {
|
||||
let Latency = 0; let NumMicroOps = 1;
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
//===- AArch64SchedPredAmpere.td - AArch64 Sched Preds -----*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines scheduling predicate definitions that are used by the
|
||||
// AArch64 Ampere Computing processors.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Auxiliary predicates.
|
||||
|
||||
// Check for a LSL shift <= 4
|
||||
def AmpereCheapLSL : MCSchedPredicate<
|
||||
CheckAny<[CheckShiftBy0,
|
||||
CheckAll<
|
||||
[CheckShiftLSL,
|
||||
CheckAny<
|
||||
[CheckShiftBy1,
|
||||
CheckShiftBy2,
|
||||
CheckShiftBy3,
|
||||
CheckShiftBy4]>]>]>>;
|
|
@ -0,0 +1,157 @@
|
|||
//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines scheduling predicate definitions that are used by the
|
||||
// AArch64 Exynos processors.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Auxiliary predicates.
|
||||
|
||||
// Check the shift in arithmetic and logic instructions.
|
||||
def ExynosCheckShift : CheckAny<[CheckShiftBy0,
|
||||
CheckAll<
|
||||
[CheckShiftLSL,
|
||||
CheckAny<
|
||||
[CheckShiftBy1,
|
||||
CheckShiftBy2,
|
||||
CheckShiftBy3]>]>]>;
|
||||
|
||||
// Exynos predicates.
|
||||
|
||||
// Identify BLR specifying the LR register as the indirect target register.
|
||||
def ExynosBranchLinkLRPred : MCSchedPredicate<
|
||||
CheckAll<[CheckOpcode<[BLR]>,
|
||||
CheckRegOperand<0, LR>]>>;
|
||||
|
||||
// Identify arithmetic instructions without or with limited extension or shift.
|
||||
def ExynosArithFn : TIIPredicate<
|
||||
"isExynosArithFast",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsArithExtOp.ValidOpcodes,
|
||||
MCReturnStatement<
|
||||
CheckAny<[CheckExtBy0,
|
||||
CheckAll<
|
||||
[CheckAny<
|
||||
[CheckExtUXTW,
|
||||
CheckExtUXTX]>,
|
||||
CheckAny<
|
||||
[CheckExtBy1,
|
||||
CheckExtBy2,
|
||||
CheckExtBy3]>]>]>>>,
|
||||
MCOpcodeSwitchCase<
|
||||
IsArithShiftOp.ValidOpcodes,
|
||||
MCReturnStatement<ExynosCheckShift>>,
|
||||
MCOpcodeSwitchCase<
|
||||
IsArithUnshiftOp.ValidOpcodes,
|
||||
MCReturnStatement<TruePred>>,
|
||||
MCOpcodeSwitchCase<
|
||||
IsArithImmOp.ValidOpcodes,
|
||||
MCReturnStatement<TruePred>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def ExynosArithPred : MCSchedPredicate<ExynosArithFn>;
|
||||
|
||||
// Identify logic instructions with limited shift.
|
||||
def ExynosLogicFn : TIIPredicate<
|
||||
"isExynosLogicFast",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsLogicShiftOp.ValidOpcodes,
|
||||
MCReturnStatement<ExynosCheckShift>>,
|
||||
MCOpcodeSwitchCase<
|
||||
IsLogicUnshiftOp.ValidOpcodes,
|
||||
MCReturnStatement<TruePred>>,
|
||||
MCOpcodeSwitchCase<
|
||||
IsLogicImmOp.ValidOpcodes,
|
||||
MCReturnStatement<TruePred>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def ExynosLogicPred : MCSchedPredicate<ExynosLogicFn>;
|
||||
|
||||
// Identify more logic instructions with limited shift.
|
||||
def ExynosLogicExFn : TIIPredicate<
|
||||
"isExynosLogicExFast",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsLogicShiftOp.ValidOpcodes,
|
||||
MCReturnStatement<
|
||||
CheckAny<
|
||||
[ExynosCheckShift,
|
||||
CheckAll<
|
||||
[CheckShiftLSL,
|
||||
CheckShiftBy8]>]>>>,
|
||||
MCOpcodeSwitchCase<
|
||||
IsLogicUnshiftOp.ValidOpcodes,
|
||||
MCReturnStatement<TruePred>>,
|
||||
MCOpcodeSwitchCase<
|
||||
IsLogicImmOp.ValidOpcodes,
|
||||
MCReturnStatement<TruePred>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def ExynosLogicExPred : MCSchedPredicate<ExynosLogicExFn>;
|
||||
|
||||
// Identify a load or store using the register offset addressing mode
|
||||
// with a scaled non-extended register.
|
||||
def ExynosScaledIdxFn : TIIPredicate<"isExynosScaledAddr",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsLoadStoreRegOffsetOp.ValidOpcodes,
|
||||
MCReturnStatement<
|
||||
CheckAny<
|
||||
[CheckMemExtSXTW,
|
||||
CheckMemExtUXTW,
|
||||
CheckMemScaled]>>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
|
||||
|
||||
// Identify FP instructions.
|
||||
def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckHForm,
|
||||
CheckSForm,
|
||||
CheckDForm,
|
||||
CheckQForm]>>;
|
||||
|
||||
// Identify 128-bit NEON instructions.
|
||||
def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
|
||||
|
||||
// Identify instructions that reset a register efficiently.
|
||||
def ExynosResetFn : TIIPredicate<
|
||||
"isExynosResetFast",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
[ADR, ADRP,
|
||||
MOVNWi, MOVNXi,
|
||||
MOVZWi, MOVZXi],
|
||||
MCReturnStatement<TruePred>>,
|
||||
MCOpcodeSwitchCase<
|
||||
[ORRWri, ORRXri],
|
||||
MCReturnStatement<
|
||||
CheckAll<
|
||||
[CheckIsRegOperand<1>,
|
||||
CheckAny<
|
||||
[CheckRegOperand<1, WZR>,
|
||||
CheckRegOperand<1, XZR>]>]>>>],
|
||||
MCReturnStatement<
|
||||
CheckAny<
|
||||
[IsCopyIdiomFn,
|
||||
IsZeroFPIdiomFn]>>>>;
|
||||
def ExynosResetPred : MCSchedPredicate<ExynosResetFn>;
|
||||
|
||||
// Identify EXTR as the alias for ROR (immediate).
|
||||
def ExynosRotateRightImmPred : MCSchedPredicate<
|
||||
CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
|
||||
CheckSameRegOperand<1, 2>]>>;
|
||||
|
||||
// Identify cheap arithmetic and logic immediate instructions.
|
||||
def ExynosCheapFn : TIIPredicate<
|
||||
"isExynosCheapAsMove",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsArithLogicImmOp.ValidOpcodes,
|
||||
MCReturnStatement<TruePred>>],
|
||||
MCReturnStatement<
|
||||
CheckAny<
|
||||
[ExynosArithFn, ExynosResetFn, ExynosLogicFn]>>>>;
|
|
@ -0,0 +1,441 @@
|
|||
//===- AArch64SchedPredicates.td - AArch64 Sched Preds -----*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines scheduling predicate definitions that are used by the
|
||||
// AArch64 subtargets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Function mappers.
|
||||
|
||||
// Check the extension type in arithmetic instructions.
|
||||
let FunctionMapper = "AArch64_AM::getArithExtendType" in {
|
||||
def CheckExtUXTB : CheckImmOperand_s<3, "AArch64_AM::UXTB">;
|
||||
def CheckExtUXTH : CheckImmOperand_s<3, "AArch64_AM::UXTH">;
|
||||
def CheckExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
|
||||
def CheckExtUXTX : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
|
||||
def CheckExtSXTB : CheckImmOperand_s<3, "AArch64_AM::SXTB">;
|
||||
def CheckExtSXTH : CheckImmOperand_s<3, "AArch64_AM::SXTH">;
|
||||
def CheckExtSXTW : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
|
||||
def CheckExtSXTX : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
|
||||
}
|
||||
|
||||
// Check for shifting in extended arithmetic instructions.
|
||||
foreach I = {0-3} in {
|
||||
let FunctionMapper = "AArch64_AM::getArithShiftValue" in
|
||||
def CheckExtBy#I : CheckImmOperand<3, I>;
|
||||
}
|
||||
|
||||
// Check the extension type in the register offset addressing mode.
|
||||
let FunctionMapper = "AArch64_AM::getMemExtendType" in {
|
||||
def CheckMemExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
|
||||
def CheckMemExtLSL : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
|
||||
def CheckMemExtSXTW : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
|
||||
def CheckMemExtSXTX : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
|
||||
}
|
||||
|
||||
// Check for scaling in the register offset addressing mode.
|
||||
let FunctionMapper = "AArch64_AM::getMemDoShift" in
|
||||
def CheckMemScaled : CheckImmOperandSimple<4>;
|
||||
|
||||
// Check the shifting type in arithmetic and logic instructions.
|
||||
let FunctionMapper = "AArch64_AM::getShiftType" in {
|
||||
def CheckShiftLSL : CheckImmOperand_s<3, "AArch64_AM::LSL">;
|
||||
def CheckShiftLSR : CheckImmOperand_s<3, "AArch64_AM::LSR">;
|
||||
def CheckShiftASR : CheckImmOperand_s<3, "AArch64_AM::ASR">;
|
||||
def CheckShiftROR : CheckImmOperand_s<3, "AArch64_AM::ROR">;
|
||||
def CheckShiftMSL : CheckImmOperand_s<3, "AArch64_AM::MSL">;
|
||||
}
|
||||
|
||||
// Check for shifting in arithmetic and logic instructions.
|
||||
foreach I = {0-4, 8} in {
|
||||
let FunctionMapper = "AArch64_AM::getShiftValue" in
|
||||
def CheckShiftBy#I : CheckImmOperand<3, I>;
|
||||
}
|
||||
|
||||
// Generic predicates.
|
||||
|
||||
// Identify whether an instruction is the 16-bit NEON form based on its result.
|
||||
def CheckHForm : CheckAll<[CheckIsRegOperand<0>,
|
||||
CheckAny<[CheckRegOperand<0, H0>,
|
||||
CheckRegOperand<0, H1>,
|
||||
CheckRegOperand<0, H2>,
|
||||
CheckRegOperand<0, H3>,
|
||||
CheckRegOperand<0, H4>,
|
||||
CheckRegOperand<0, H5>,
|
||||
CheckRegOperand<0, H6>,
|
||||
CheckRegOperand<0, H7>,
|
||||
CheckRegOperand<0, H8>,
|
||||
CheckRegOperand<0, H9>,
|
||||
CheckRegOperand<0, H10>,
|
||||
CheckRegOperand<0, H11>,
|
||||
CheckRegOperand<0, H12>,
|
||||
CheckRegOperand<0, H13>,
|
||||
CheckRegOperand<0, H14>,
|
||||
CheckRegOperand<0, H15>,
|
||||
CheckRegOperand<0, H16>,
|
||||
CheckRegOperand<0, H17>,
|
||||
CheckRegOperand<0, H18>,
|
||||
CheckRegOperand<0, H19>,
|
||||
CheckRegOperand<0, H20>,
|
||||
CheckRegOperand<0, H21>,
|
||||
CheckRegOperand<0, H22>,
|
||||
CheckRegOperand<0, H23>,
|
||||
CheckRegOperand<0, H24>,
|
||||
CheckRegOperand<0, H25>,
|
||||
CheckRegOperand<0, H26>,
|
||||
CheckRegOperand<0, H27>,
|
||||
CheckRegOperand<0, H28>,
|
||||
CheckRegOperand<0, H29>,
|
||||
CheckRegOperand<0, H30>,
|
||||
CheckRegOperand<0, H31>]>]>;
|
||||
|
||||
// Identify whether an instruction is the 32-bit NEON form based on its result.
|
||||
def CheckSForm : CheckAll<[CheckIsRegOperand<0>,
|
||||
CheckAny<[CheckRegOperand<0, S0>,
|
||||
CheckRegOperand<0, S1>,
|
||||
CheckRegOperand<0, S2>,
|
||||
CheckRegOperand<0, S3>,
|
||||
CheckRegOperand<0, S4>,
|
||||
CheckRegOperand<0, S5>,
|
||||
CheckRegOperand<0, S6>,
|
||||
CheckRegOperand<0, S7>,
|
||||
CheckRegOperand<0, S8>,
|
||||
CheckRegOperand<0, S9>,
|
||||
CheckRegOperand<0, S10>,
|
||||
CheckRegOperand<0, S11>,
|
||||
CheckRegOperand<0, S12>,
|
||||
CheckRegOperand<0, S13>,
|
||||
CheckRegOperand<0, S14>,
|
||||
CheckRegOperand<0, S15>,
|
||||
CheckRegOperand<0, S16>,
|
||||
CheckRegOperand<0, S17>,
|
||||
CheckRegOperand<0, S18>,
|
||||
CheckRegOperand<0, S19>,
|
||||
CheckRegOperand<0, S20>,
|
||||
CheckRegOperand<0, S21>,
|
||||
CheckRegOperand<0, S22>,
|
||||
CheckRegOperand<0, S23>,
|
||||
CheckRegOperand<0, S24>,
|
||||
CheckRegOperand<0, S25>,
|
||||
CheckRegOperand<0, S26>,
|
||||
CheckRegOperand<0, S27>,
|
||||
CheckRegOperand<0, S28>,
|
||||
CheckRegOperand<0, S29>,
|
||||
CheckRegOperand<0, S30>,
|
||||
CheckRegOperand<0, S31>]>]>;
|
||||
|
||||
// Identify whether an instruction is the 64-bit NEON form based on its result.
|
||||
def CheckDForm : CheckAll<[CheckIsRegOperand<0>,
|
||||
CheckAny<[CheckRegOperand<0, D0>,
|
||||
CheckRegOperand<0, D1>,
|
||||
CheckRegOperand<0, D2>,
|
||||
CheckRegOperand<0, D3>,
|
||||
CheckRegOperand<0, D4>,
|
||||
CheckRegOperand<0, D5>,
|
||||
CheckRegOperand<0, D6>,
|
||||
CheckRegOperand<0, D7>,
|
||||
CheckRegOperand<0, D8>,
|
||||
CheckRegOperand<0, D9>,
|
||||
CheckRegOperand<0, D10>,
|
||||
CheckRegOperand<0, D11>,
|
||||
CheckRegOperand<0, D12>,
|
||||
CheckRegOperand<0, D13>,
|
||||
CheckRegOperand<0, D14>,
|
||||
CheckRegOperand<0, D15>,
|
||||
CheckRegOperand<0, D16>,
|
||||
CheckRegOperand<0, D17>,
|
||||
CheckRegOperand<0, D18>,
|
||||
CheckRegOperand<0, D19>,
|
||||
CheckRegOperand<0, D20>,
|
||||
CheckRegOperand<0, D21>,
|
||||
CheckRegOperand<0, D22>,
|
||||
CheckRegOperand<0, D23>,
|
||||
CheckRegOperand<0, D24>,
|
||||
CheckRegOperand<0, D25>,
|
||||
CheckRegOperand<0, D26>,
|
||||
CheckRegOperand<0, D27>,
|
||||
CheckRegOperand<0, D28>,
|
||||
CheckRegOperand<0, D29>,
|
||||
CheckRegOperand<0, D30>,
|
||||
CheckRegOperand<0, D31>]>]>;
|
||||
|
||||
// Identify whether an instruction is the 128-bit NEON form based on its result.
|
||||
def CheckQForm : CheckAll<[CheckIsRegOperand<0>,
|
||||
CheckAny<[CheckRegOperand<0, Q0>,
|
||||
CheckRegOperand<0, Q1>,
|
||||
CheckRegOperand<0, Q2>,
|
||||
CheckRegOperand<0, Q3>,
|
||||
CheckRegOperand<0, Q4>,
|
||||
CheckRegOperand<0, Q5>,
|
||||
CheckRegOperand<0, Q6>,
|
||||
CheckRegOperand<0, Q7>,
|
||||
CheckRegOperand<0, Q8>,
|
||||
CheckRegOperand<0, Q9>,
|
||||
CheckRegOperand<0, Q10>,
|
||||
CheckRegOperand<0, Q11>,
|
||||
CheckRegOperand<0, Q12>,
|
||||
CheckRegOperand<0, Q13>,
|
||||
CheckRegOperand<0, Q14>,
|
||||
CheckRegOperand<0, Q15>,
|
||||
CheckRegOperand<0, Q16>,
|
||||
CheckRegOperand<0, Q17>,
|
||||
CheckRegOperand<0, Q18>,
|
||||
CheckRegOperand<0, Q19>,
|
||||
CheckRegOperand<0, Q20>,
|
||||
CheckRegOperand<0, Q21>,
|
||||
CheckRegOperand<0, Q22>,
|
||||
CheckRegOperand<0, Q23>,
|
||||
CheckRegOperand<0, Q24>,
|
||||
CheckRegOperand<0, Q25>,
|
||||
CheckRegOperand<0, Q26>,
|
||||
CheckRegOperand<0, Q27>,
|
||||
CheckRegOperand<0, Q28>,
|
||||
CheckRegOperand<0, Q29>,
|
||||
CheckRegOperand<0, Q30>,
|
||||
CheckRegOperand<0, Q31>]>]>;
|
||||
|
||||
// Identify arithmetic instructions with extend.
|
||||
def IsArithExtOp : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx,
|
||||
SUBWrx, SUBXrx, SUBSWrx, SUBSXrx,
|
||||
ADDXrx64, ADDSXrx64,
|
||||
SUBXrx64, SUBSXrx64]>;
|
||||
|
||||
// Identify arithmetic immediate instructions.
|
||||
def IsArithImmOp : CheckOpcode<[ADDWri, ADDXri, ADDSWri, ADDSXri,
|
||||
SUBWri, SUBXri, SUBSWri, SUBSXri]>;
|
||||
|
||||
// Identify arithmetic instructions with shift.
|
||||
def IsArithShiftOp : CheckOpcode<[ADDWrs, ADDXrs, ADDSWrs, ADDSXrs,
|
||||
SUBWrs, SUBXrs, SUBSWrs, SUBSXrs]>;
|
||||
|
||||
// Identify arithmetic instructions without shift.
|
||||
def IsArithUnshiftOp : CheckOpcode<[ADDWrr, ADDXrr, ADDSWrr, ADDSXrr,
|
||||
SUBWrr, SUBXrr, SUBSWrr, SUBSXrr]>;
|
||||
|
||||
// Identify logic immediate instructions.
|
||||
def IsLogicImmOp : CheckOpcode<[ANDWri, ANDXri,
|
||||
EORWri, EORXri,
|
||||
ORRWri, ORRXri]>;
|
||||
|
||||
// Identify logic instructions with shift.
|
||||
def IsLogicShiftOp : CheckOpcode<[ANDWrs, ANDXrs, ANDSWrs, ANDSXrs,
|
||||
BICWrs, BICXrs, BICSWrs, BICSXrs,
|
||||
EONWrs, EONXrs,
|
||||
EORWrs, EORXrs,
|
||||
ORNWrs, ORNXrs,
|
||||
ORRWrs, ORRXrs]>;
|
||||
|
||||
// Identify logic instructions without shift.
|
||||
def IsLogicUnshiftOp : CheckOpcode<[ANDWrr, ANDXrr, ANDSWrr, ANDSXrr,
|
||||
BICWrr, BICXrr, BICSWrr, BICSXrr,
|
||||
EONWrr, EONXrr,
|
||||
EORWrr, EORXrr,
|
||||
ORNWrr, ORNXrr,
|
||||
ORRWrr, ORRXrr]>;
|
||||
|
||||
// Identify arithmetic and logic immediate instructions.
|
||||
def IsArithLogicImmOp : CheckOpcode<!listconcat(IsArithImmOp.ValidOpcodes,
|
||||
IsLogicImmOp.ValidOpcodes)>;
|
||||
|
||||
// Identify arithmetic and logic instructions with shift.
|
||||
def IsArithLogicShiftOp : CheckOpcode<!listconcat(IsArithShiftOp.ValidOpcodes,
|
||||
IsLogicShiftOp.ValidOpcodes)>;
|
||||
|
||||
// Identify arithmetic and logic instructions without shift.
|
||||
def IsArithLogicUnshiftOp : CheckOpcode<!listconcat(IsArithUnshiftOp.ValidOpcodes,
|
||||
IsLogicUnshiftOp.ValidOpcodes)>;
|
||||
|
||||
// Identify whether an instruction is an ASIMD
|
||||
// load using the post index addressing mode.
|
||||
def IsLoadASIMDPostOp : CheckOpcode<[LD1Onev8b_POST, LD1Onev4h_POST, LD1Onev2s_POST, LD1Onev1d_POST,
|
||||
LD1Onev16b_POST, LD1Onev8h_POST, LD1Onev4s_POST, LD1Onev2d_POST,
|
||||
LD1Twov8b_POST, LD1Twov4h_POST, LD1Twov2s_POST, LD1Twov1d_POST,
|
||||
LD1Twov16b_POST, LD1Twov8h_POST, LD1Twov4s_POST, LD1Twov2d_POST,
|
||||
LD1Threev8b_POST, LD1Threev4h_POST, LD1Threev2s_POST, LD1Threev1d_POST,
|
||||
LD1Threev16b_POST, LD1Threev8h_POST, LD1Threev4s_POST, LD1Threev2d_POST,
|
||||
LD1Fourv8b_POST, LD1Fourv4h_POST, LD1Fourv2s_POST, LD1Fourv1d_POST,
|
||||
LD1Fourv16b_POST, LD1Fourv8h_POST, LD1Fourv4s_POST, LD1Fourv2d_POST,
|
||||
LD1i8_POST, LD1i16_POST, LD1i32_POST, LD1i64_POST,
|
||||
LD1Rv8b_POST, LD1Rv4h_POST, LD1Rv2s_POST, LD1Rv1d_POST,
|
||||
LD1Rv16b_POST, LD1Rv8h_POST, LD1Rv4s_POST, LD1Rv2d_POST,
|
||||
LD2Twov8b_POST, LD2Twov4h_POST, LD2Twov2s_POST,
|
||||
LD2Twov16b_POST, LD2Twov8h_POST, LD2Twov4s_POST, LD2Twov2d_POST,
|
||||
LD2i8_POST, LD2i16_POST, LD2i32_POST, LD2i64_POST,
|
||||
LD2Rv8b_POST, LD2Rv4h_POST, LD2Rv2s_POST, LD2Rv1d_POST,
|
||||
LD2Rv16b_POST, LD2Rv8h_POST, LD2Rv4s_POST, LD2Rv2d_POST,
|
||||
LD3Threev8b_POST, LD3Threev4h_POST, LD3Threev2s_POST,
|
||||
LD3Threev16b_POST, LD3Threev8h_POST, LD3Threev4s_POST, LD3Threev2d_POST,
|
||||
LD3i8_POST, LD3i16_POST, LD3i32_POST, LD3i64_POST,
|
||||
LD3Rv8b_POST, LD3Rv4h_POST, LD3Rv2s_POST, LD3Rv1d_POST,
|
||||
LD3Rv16b_POST, LD3Rv8h_POST, LD3Rv4s_POST, LD3Rv2d_POST,
|
||||
LD4Fourv8b_POST, LD4Fourv4h_POST, LD4Fourv2s_POST,
|
||||
LD4Fourv16b_POST, LD4Fourv8h_POST, LD4Fourv4s_POST, LD4Fourv2d_POST,
|
||||
LD4i8_POST, LD4i16_POST, LD4i32_POST, LD4i64_POST,
|
||||
LD4Rv8b_POST, LD4Rv4h_POST, LD4Rv2s_POST, LD4Rv1d_POST,
|
||||
LD4Rv16b_POST, LD4Rv8h_POST, LD4Rv4s_POST, LD4Rv2d_POST]>;
|
||||
|
||||
// Identify whether an instruction is an ASIMD
|
||||
// store using the post index addressing mode.
|
||||
def IsStoreASIMDPostOp : CheckOpcode<[ST1Onev8b_POST, ST1Onev4h_POST, ST1Onev2s_POST, ST1Onev1d_POST,
|
||||
ST1Onev16b_POST, ST1Onev8h_POST, ST1Onev4s_POST, ST1Onev2d_POST,
|
||||
ST1Twov8b_POST, ST1Twov4h_POST, ST1Twov2s_POST, ST1Twov1d_POST,
|
||||
ST1Twov16b_POST, ST1Twov8h_POST, ST1Twov4s_POST, ST1Twov2d_POST,
|
||||
ST1Threev8b_POST, ST1Threev4h_POST, ST1Threev2s_POST, ST1Threev1d_POST,
|
||||
ST1Threev16b_POST, ST1Threev8h_POST, ST1Threev4s_POST, ST1Threev2d_POST,
|
||||
ST1Fourv8b_POST, ST1Fourv4h_POST, ST1Fourv2s_POST, ST1Fourv1d_POST,
|
||||
ST1Fourv16b_POST, ST1Fourv8h_POST, ST1Fourv4s_POST, ST1Fourv2d_POST,
|
||||
ST1i8_POST, ST1i16_POST, ST1i32_POST, ST1i64_POST,
|
||||
ST2Twov8b_POST, ST2Twov4h_POST, ST2Twov2s_POST,
|
||||
ST2Twov16b_POST, ST2Twov8h_POST, ST2Twov4s_POST, ST2Twov2d_POST,
|
||||
ST2i8_POST, ST2i16_POST, ST2i32_POST, ST2i64_POST,
|
||||
ST3Threev8b_POST, ST3Threev4h_POST, ST3Threev2s_POST,
|
||||
ST3Threev16b_POST, ST3Threev8h_POST, ST3Threev4s_POST, ST3Threev2d_POST,
|
||||
ST3i8_POST, ST3i16_POST, ST3i32_POST, ST3i64_POST,
|
||||
ST4Fourv8b_POST, ST4Fourv4h_POST, ST4Fourv2s_POST,
|
||||
ST4Fourv16b_POST, ST4Fourv8h_POST, ST4Fourv4s_POST, ST4Fourv2d_POST,
|
||||
ST4i8_POST, ST4i16_POST, ST4i32_POST, ST4i64_POST]>;
|
||||
|
||||
// Identify whether an instruction is an ASIMD load
|
||||
// or store using the post index addressing mode.
|
||||
def IsLoadStoreASIMDPostOp : CheckOpcode<!listconcat(IsLoadASIMDPostOp.ValidOpcodes,
|
||||
IsStoreASIMDPostOp.ValidOpcodes)>;
|
||||
|
||||
// Identify whether an instruction is a load
|
||||
// using the register offset addressing mode.
|
||||
def IsLoadRegOffsetOp : CheckOpcode<[PRFMroW, PRFMroX,
|
||||
LDRBBroW, LDRBBroX,
|
||||
LDRSBWroW, LDRSBWroX, LDRSBXroW, LDRSBXroX,
|
||||
LDRHHroW, LDRHHroX,
|
||||
LDRSHWroW, LDRSHWroX, LDRSHXroW, LDRSHXroX,
|
||||
LDRWroW, LDRWroX,
|
||||
LDRSWroW, LDRSWroX,
|
||||
LDRXroW, LDRXroX,
|
||||
LDRBroW, LDRBroX,
|
||||
LDRHroW, LDRHroX,
|
||||
LDRSroW, LDRSroX,
|
||||
LDRDroW, LDRDroX,
|
||||
LDRQroW, LDRQroX]>;
|
||||
|
||||
// Identify whether an instruction is a store
|
||||
// using the register offset addressing mode.
|
||||
def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX,
|
||||
STRHHroW, STRHHroX,
|
||||
STRWroW, STRWroX,
|
||||
STRXroW, STRXroX,
|
||||
STRBroW, STRBroX,
|
||||
STRHroW, STRHroX,
|
||||
STRSroW, STRSroX,
|
||||
STRDroW, STRDroX,
|
||||
STRQroW, STRQroX]>;
|
||||
|
||||
// Identify whether an instruction is a load or
|
||||
// store using the register offset addressing mode.
|
||||
def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
|
||||
IsStoreRegOffsetOp.ValidOpcodes)>;
|
||||
|
||||
// Target predicates.
|
||||
|
||||
// Identify an instruction that effectively transfers a register to another.
|
||||
def IsCopyIdiomFn : TIIPredicate<"isCopyIdiom",
|
||||
MCOpcodeSwitchStatement<
|
||||
[// MOV {Rd, SP}, {SP, Rn} =>
|
||||
// ADD {Rd, SP}, {SP, Rn}, #0
|
||||
MCOpcodeSwitchCase<
|
||||
[ADDWri, ADDXri],
|
||||
MCReturnStatement<
|
||||
CheckAll<
|
||||
[CheckIsRegOperand<0>,
|
||||
CheckIsRegOperand<1>,
|
||||
CheckAny<
|
||||
[CheckRegOperand<0, WSP>,
|
||||
CheckRegOperand<0, SP>,
|
||||
CheckRegOperand<1, WSP>,
|
||||
CheckRegOperand<1, SP>]>,
|
||||
CheckZeroOperand<2>]>>>,
|
||||
// MOV Rd, Rm =>
|
||||
// ORR Rd, ZR, Rm, LSL #0
|
||||
MCOpcodeSwitchCase<
|
||||
[ORRWrs, ORRXrs],
|
||||
MCReturnStatement<
|
||||
CheckAll<
|
||||
[CheckIsRegOperand<1>,
|
||||
CheckIsRegOperand<2>,
|
||||
CheckAny<
|
||||
[CheckRegOperand<1, WZR>,
|
||||
CheckRegOperand<1, XZR>]>,
|
||||
CheckShiftBy0]>>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def IsCopyIdiomPred : MCSchedPredicate<IsCopyIdiomFn>;
|
||||
|
||||
// Identify arithmetic instructions with an extended register.
|
||||
def RegExtendedFn : TIIPredicate<"hasExtendedReg",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsArithExtOp.ValidOpcodes,
|
||||
MCReturnStatement<
|
||||
CheckNot<CheckZeroOperand<3>>>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def RegExtendedPred : MCSchedPredicate<RegExtendedFn>;
|
||||
|
||||
// Identify arithmetic and logic instructions with a shifted register.
|
||||
def RegShiftedFn : TIIPredicate<"hasShiftedReg",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsArithLogicShiftOp.ValidOpcodes,
|
||||
MCReturnStatement<
|
||||
CheckNot<CheckZeroOperand<3>>>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def RegShiftedPred : MCSchedPredicate<RegShiftedFn>;
|
||||
|
||||
// Identify a load or store using the register offset addressing mode
|
||||
// with an extended or scaled register.
|
||||
def ScaledIdxFn : TIIPredicate<"isScaledAddr",
|
||||
MCOpcodeSwitchStatement<
|
||||
[MCOpcodeSwitchCase<
|
||||
IsLoadStoreRegOffsetOp.ValidOpcodes,
|
||||
MCReturnStatement<
|
||||
CheckAny<[CheckNot<CheckMemExtLSL>,
|
||||
CheckMemScaled]>>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def ScaledIdxPred : MCSchedPredicate<ScaledIdxFn>;
|
||||
|
||||
// Identify an instruction that effectively resets a FP register to zero.
|
||||
def IsZeroFPIdiomFn : TIIPredicate<"isZeroFPIdiom",
|
||||
MCOpcodeSwitchStatement<
|
||||
[// MOVI Vd, #0
|
||||
MCOpcodeSwitchCase<
|
||||
[MOVIv8b_ns, MOVIv16b_ns,
|
||||
MOVID, MOVIv2d_ns],
|
||||
MCReturnStatement<CheckZeroOperand<1>>>,
|
||||
// MOVI Vd, #0, LSL #0
|
||||
MCOpcodeSwitchCase<
|
||||
[MOVIv4i16, MOVIv8i16,
|
||||
MOVIv2i32, MOVIv4i32],
|
||||
MCReturnStatement<
|
||||
CheckAll<
|
||||
[CheckZeroOperand<1>,
|
||||
CheckZeroOperand<2>]>>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def IsZeroFPIdiomPred : MCSchedPredicate<IsZeroFPIdiomFn>;
|
||||
|
||||
// Identify an instruction that effectively resets a GP register to zero.
|
||||
def IsZeroIdiomFn : TIIPredicate<"isZeroIdiom",
|
||||
MCOpcodeSwitchStatement<
|
||||
[// ORR Rd, ZR, #0
|
||||
MCOpcodeSwitchCase<
|
||||
[ORRWri, ORRXri],
|
||||
MCReturnStatement<
|
||||
CheckAll<
|
||||
[CheckIsRegOperand<1>,
|
||||
CheckAny<
|
||||
[CheckRegOperand<1, WZR>,
|
||||
CheckRegOperand<1, XZR>]>,
|
||||
CheckZeroOperand<2>]>>>],
|
||||
MCReturnStatement<FalsePred>>>;
|
||||
def IsZeroIdiomPred : MCSchedPredicate<IsZeroIdiomFn>;
|
|
@ -0,0 +1,747 @@
|
|||
//==- AArch64SchedTSV110.td - Huawei TSV110 Scheduling Definitions -*- tablegen -*-=//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the machine model for Huawei TSV110 to support
|
||||
// instruction scheduling and other instruction cost heuristics.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// ===---------------------------------------------------------------------===//
|
||||
// The following definitions describe the simpler per-operand machine model.
|
||||
// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
|
||||
|
||||
// Huawei TSV110 scheduling machine model.
|
||||
def TSV110Model : SchedMachineModel {
|
||||
let IssueWidth = 4; // 4 micro-ops dispatched per cycle.
|
||||
let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
|
||||
let LoopMicroOpBufferSize = 16;
|
||||
let LoadLatency = 4; // Optimistic load latency.
|
||||
let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F);
|
||||
}
|
||||
|
||||
// Define each kind of processor resource and number available on the TSV110,
|
||||
// which has 8 pipelines, each with its own queue where micro-ops wait for
|
||||
// their operands and issue out-of-order to one of eight execution pipelines.
|
||||
let SchedModel = TSV110Model in {
|
||||
def TSV110UnitALU : ProcResource<1>; // Int ALU
|
||||
def TSV110UnitAB : ProcResource<2>; // Int ALU/BRU
|
||||
def TSV110UnitMDU : ProcResource<1>; // Multi-Cycle
|
||||
def TSV110UnitFSU1 : ProcResource<1>; // FP/ASIMD
|
||||
def TSV110UnitFSU2 : ProcResource<1>; // FP/ASIMD
|
||||
def TSV110UnitLdSt : ProcResource<2>; // Load/Store
|
||||
|
||||
def TSV110UnitF : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2]>;
|
||||
def TSV110UnitALUAB : ProcResGroup<[TSV110UnitALU, TSV110UnitAB]>;
|
||||
def TSV110UnitFLdSt : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2, TSV110UnitLdSt]>;
|
||||
}
|
||||
|
||||
let SchedModel = TSV110Model in {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Map the target-defined scheduler read/write resources and latency for
|
||||
// TSV110
|
||||
|
||||
// Integer ALU
|
||||
def : WriteRes<WriteImm, [TSV110UnitALUAB]> { let Latency = 1; }
|
||||
def : WriteRes<WriteI, [TSV110UnitALUAB]> { let Latency = 1; }
|
||||
def : WriteRes<WriteISReg, [TSV110UnitMDU]> { let Latency = 2; }
|
||||
def : WriteRes<WriteIEReg, [TSV110UnitMDU]> { let Latency = 2; }
|
||||
def : WriteRes<WriteExtr, [TSV110UnitALUAB]> { let Latency = 1; }
|
||||
def : WriteRes<WriteIS, [TSV110UnitALUAB]> { let Latency = 1; }
|
||||
|
||||
// Integer Mul/MAC/Div
|
||||
def : WriteRes<WriteID32, [TSV110UnitMDU]> { let Latency = 12;
|
||||
let ResourceCycles = [12]; }
|
||||
def : WriteRes<WriteID64, [TSV110UnitMDU]> { let Latency = 20;
|
||||
let ResourceCycles = [20]; }
|
||||
def : WriteRes<WriteIM32, [TSV110UnitMDU]> { let Latency = 3; }
|
||||
def : WriteRes<WriteIM64, [TSV110UnitMDU]> { let Latency = 4; }
|
||||
|
||||
// Load
|
||||
def : WriteRes<WriteLD, [TSV110UnitLdSt]> { let Latency = 4; }
|
||||
def : WriteRes<WriteLDIdx, [TSV110UnitLdSt]> { let Latency = 4; }
|
||||
def : WriteRes<WriteLDHi, []> { let Latency = 4; }
|
||||
|
||||
// Pre/Post Indexing
|
||||
def : WriteRes<WriteAdr, [TSV110UnitALUAB]> { let Latency = 1; }
|
||||
|
||||
// Store
|
||||
def : WriteRes<WriteST, [TSV110UnitLdSt]> { let Latency = 1; }
|
||||
def : WriteRes<WriteSTP, [TSV110UnitLdSt]> { let Latency = 1; }
|
||||
def : WriteRes<WriteSTIdx, [TSV110UnitLdSt]> { let Latency = 1; }
|
||||
|
||||
// FP
|
||||
def : WriteRes<WriteF, [TSV110UnitF]> { let Latency = 2; }
|
||||
def : WriteRes<WriteFCmp, [TSV110UnitF]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFCvt, [TSV110UnitF]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFCopy, [TSV110UnitF]> { let Latency = 2; }
|
||||
def : WriteRes<WriteFImm, [TSV110UnitF]> { let Latency = 2; }
|
||||
def : WriteRes<WriteFMul, [TSV110UnitF]> { let Latency = 5; }
|
||||
|
||||
// FP Div, Sqrt
|
||||
def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; }
|
||||
|
||||
def : WriteRes<WriteVd, [TSV110UnitF]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVq, [TSV110UnitF]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVLD, [TSV110UnitFLdSt]> { let Latency = 5; }
|
||||
def : WriteRes<WriteVST, [TSV110UnitF]> { let Latency = 1; }
|
||||
|
||||
// Branch
|
||||
def : WriteRes<WriteBr, [TSV110UnitAB]> { let Latency = 1; }
|
||||
def : WriteRes<WriteBrReg, [TSV110UnitAB]> { let Latency = 1; }
|
||||
def : WriteRes<WriteSys, []> { let Latency = 1; }
|
||||
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
|
||||
def : WriteRes<WriteHint, []> { let Latency = 1; }
|
||||
|
||||
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
|
||||
|
||||
// Forwarding logic is modeled only for multiply and accumulate.
|
||||
def : ReadAdvance<ReadI, 0>;
|
||||
def : ReadAdvance<ReadISReg, 0>;
|
||||
def : ReadAdvance<ReadIEReg, 0>;
|
||||
def : ReadAdvance<ReadIM, 0>;
|
||||
def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
|
||||
def : ReadAdvance<ReadID, 0>;
|
||||
def : ReadAdvance<ReadExtrHi, 0>;
|
||||
def : ReadAdvance<ReadAdrBase, 0>;
|
||||
def : ReadAdvance<ReadVLD, 0>;
|
||||
def : ReadAdvance<ReadST, 0>;
|
||||
|
||||
def : InstRW<[WriteI], (instrs COPY)>;
|
||||
|
||||
// Detailed Refinements
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Contains all of the TSV110 specific SchedWriteRes types. The approach
|
||||
// below is to define a generic SchedWriteRes for every combination of
|
||||
// latency and microOps. The naming conventions is to use a prefix, one field
|
||||
// for latency, and one or more microOp count/type designators.
|
||||
// Prefix: TSV110Wr
|
||||
// Latency: #cyc
|
||||
// MicroOp Count/Types: #(ALU|AB|MDU|FSU1|FSU2|LdSt|ALUAB|F|FLdSt)
|
||||
//
|
||||
// e.g. TSV110Wr_6cyc_1ALU_6MDU_4LdSt means the total latency is 6 and there are
|
||||
// 1 micro-ops to be issued down one ALU pipe, six MDU pipes and four LdSt pipes.
|
||||
//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define Generic 1 micro-op types
|
||||
|
||||
def TSV110Wr_1cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 1; }
|
||||
def TSV110Wr_1cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 1; }
|
||||
def TSV110Wr_1cyc_1ALUAB : SchedWriteRes<[TSV110UnitALUAB]> { let Latency = 1; }
|
||||
def TSV110Wr_1cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 1; }
|
||||
|
||||
def TSV110Wr_2cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 2; }
|
||||
def TSV110Wr_2cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 2; }
|
||||
def TSV110Wr_2cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 2; }
|
||||
def TSV110Wr_2cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 2; }
|
||||
def TSV110Wr_2cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 2; }
|
||||
def TSV110Wr_2cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 2; }
|
||||
|
||||
def TSV110Wr_3cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 3; }
|
||||
def TSV110Wr_3cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 3; }
|
||||
def TSV110Wr_3cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 3; }
|
||||
|
||||
def TSV110Wr_4cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 4; }
|
||||
def TSV110Wr_4cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 4; }
|
||||
def TSV110Wr_4cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 4; }
|
||||
def TSV110Wr_4cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 4; }
|
||||
|
||||
def TSV110Wr_5cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 5; }
|
||||
def TSV110Wr_5cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 5; }
|
||||
def TSV110Wr_5cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 5; }
|
||||
def TSV110Wr_5cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 5; }
|
||||
|
||||
def TSV110Wr_6cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 6; }
|
||||
|
||||
def TSV110Wr_7cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 7; }
|
||||
|
||||
def TSV110Wr_8cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 8; }
|
||||
|
||||
def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 11; }
|
||||
|
||||
def TSV110Wr_12cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 12; }
|
||||
|
||||
def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 17; }
|
||||
|
||||
def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 18; }
|
||||
|
||||
def TSV110Wr_20cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 20; }
|
||||
|
||||
def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 24; }
|
||||
|
||||
def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 31; }
|
||||
|
||||
def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 36; }
|
||||
|
||||
def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 38; }
|
||||
|
||||
def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 64; }
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define Generic 2 micro-op types
|
||||
|
||||
def TSV110Wr_1cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
|
||||
TSV110UnitALUAB]> {
|
||||
let Latency = 1;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_2cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
|
||||
TSV110UnitALUAB]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_2cyc_2LdSt : SchedWriteRes<[TSV110UnitLdSt,
|
||||
TSV110UnitLdSt]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_2cyc_2F : SchedWriteRes<[TSV110UnitF,
|
||||
TSV110UnitF]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_2cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
|
||||
TSV110UnitFSU2]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_4cyc_2F : SchedWriteRes<[TSV110UnitF,
|
||||
TSV110UnitF]> {
|
||||
let Latency = 4;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_4cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
|
||||
TSV110UnitFSU2]> {
|
||||
let Latency = 4;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_4cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
|
||||
TSV110UnitALUAB]> {
|
||||
let Latency = 4;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_5cyc_1ALU_1F : SchedWriteRes<[TSV110UnitALU,
|
||||
TSV110UnitF]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_6cyc_2LdSt : SchedWriteRes<[TSV110UnitLdSt,
|
||||
TSV110UnitLdSt]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_6cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
|
||||
TSV110UnitALUAB]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_7cyc_1F_1LdSt : SchedWriteRes<[TSV110UnitF,
|
||||
TSV110UnitLdSt]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
def TSV110Wr_8cyc_2FSU1 : SchedWriteRes<[TSV110UnitFSU1,
|
||||
TSV110UnitFSU1]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
|
||||
def TSV110Wr_8cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
|
||||
TSV110UnitFSU2]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define Generic 3 micro-op types
|
||||
|
||||
def TSV110Wr_6cyc_3F : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
|
||||
TSV110UnitF]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
|
||||
def TSV110Wr_6cyc_3LdSt : SchedWriteRes<[TSV110UnitLdSt, TSV110UnitLdSt,
|
||||
TSV110UnitLdSt]> {
|
||||
let Latency = 6;
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
|
||||
def TSV110Wr_7cyc_2F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
|
||||
TSV110UnitLdSt]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define Generic 4 micro-op types
|
||||
|
||||
def TSV110Wr_8cyc_4F : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
|
||||
TSV110UnitF, TSV110UnitF]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 4;
|
||||
}
|
||||
|
||||
def TSV110Wr_8cyc_3F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
|
||||
TSV110UnitF, TSV110UnitLdSt]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 4;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define Generic 5 micro-op types
|
||||
|
||||
def TSV110Wr_8cyc_3F_2LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, TSV110UnitF,
|
||||
TSV110UnitLdSt, TSV110UnitLdSt]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 5;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define Generic 8 micro-op types
|
||||
|
||||
def TSV110Wr_10cyc_4F_4LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
|
||||
TSV110UnitF, TSV110UnitF,
|
||||
TSV110UnitLdSt, TSV110UnitLdSt,
|
||||
TSV110UnitLdSt, TSV110UnitLdSt]> {
|
||||
let Latency = 10;
|
||||
let NumMicroOps = 8;
|
||||
}
|
||||
|
||||
|
||||
// Branch Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_1cyc_1AB], (instrs B)>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BL)>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BLR)>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ))$")>;
|
||||
|
||||
|
||||
// Cryptography Extensions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AES[DE]")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AESI?MC")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA1SU1")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_2F], (instregex "^SHA1(H|SU0)")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA1[CMP]")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA256SU0")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^SHA256SU1")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA256(H|H2)")>;
|
||||
def TSV110ReadCRC: SchedReadAdvance<1, [TSV110Wr_2cyc_1MDU]>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1MDU, TSV110ReadCRC], (instregex "^CRC32.*$")>;
|
||||
|
||||
|
||||
// Arithmetic and Logical Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(BIC|EON|ORN)[WX]rr")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "(BIC)S[WX]rr")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(ADD|AND|EOR|ORR|SUB)[WX]r(r|i)")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "(ADD|AND|EOR|ORR|SUB)S[WX]r(r|i)")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(ADC|SBC|BIC)[WX]r$")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(ADC|SBC)S[WX]r$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_2cyc_1MDU], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1AB], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)S[WX]rs$")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1MDU], (instregex "^(ADD|SUB)[WX]r(s|x|x64)$")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1AB], (instregex "^(ADD|SUB)S[WX]r(s|x|x64)$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
|
||||
|
||||
|
||||
// Move and Shift Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instrs ADR, ADRP)>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^MOV[NZK][WX]i")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(LSLV|LSRV|ASRV|RORV)(W|X)r")>;
|
||||
|
||||
|
||||
// Divide and Multiply Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_12cyc_1MDU], (instregex "^(S|U)DIVWr$")>;
|
||||
def : InstRW<[TSV110Wr_20cyc_1MDU], (instregex "^(S|U)DIVXr$")>;
|
||||
|
||||
def TSV110ReadMAW : SchedReadAdvance<2, [TSV110Wr_3cyc_1MDU]>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instrs MADDWrrr, MSUBWrrr)>;
|
||||
def TSV110ReadMAQ : SchedReadAdvance<3, [TSV110Wr_4cyc_1MDU]>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1MDU, TSV110ReadMAQ], (instrs MADDXrrr, MSUBXrrr)>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instregex "(S|U)(MADDL|MSUBL)rrr")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1MDU], (instregex "^(S|U)MULHrr$")>;
|
||||
|
||||
|
||||
// Miscellaneous Data-Processing Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^EXTR(W|X)rri$")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(S|U)?BFM(W|X)ri$")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CLS|CLZ|RBIT|REV(16|32)?)(W|X)r$")>;
|
||||
|
||||
|
||||
// Load Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(W|X)l$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs LDRSWl)>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(BB|HH|W|X)ui$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTR(B|H|W|X)i$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDUR(BB|HH|W|X)i$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDP(W|X)i$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWi)>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFMl)>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFUMi)>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMui$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMro(W|X)$")>;
|
||||
|
||||
|
||||
// Store Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STN?P(W|X)i$")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR(BB|HH|W|X)i$")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STTR(B|H|W|X)i$")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)ui$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
|
||||
|
||||
|
||||
// FP Data Processing Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "F(ABS|NEG)(D|S)r")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCCMP(E)?(S|D)rr$")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCSEL(S|D)rrr$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_11cyc_1FSU1], (instrs FDIVSrr)>;
|
||||
def : InstRW<[TSV110Wr_18cyc_1FSU1], (instrs FDIVDrr)>;
|
||||
def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTSr)>;
|
||||
def : InstRW<[TSV110Wr_31cyc_1FSU2], (instrs FSQRTDr)>;
|
||||
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN).+rr")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^FN?M(ADD|SUB)Hrrr")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FN?M(ADD|SUB)Srrr")>;
|
||||
def : InstRW<[TSV110Wr_7cyc_1F], (instregex "^FN?M(ADD|SUB)Drrr")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Hrr")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|SUB)Srr")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Drr")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(N)?MULHrr$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULSrr$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULDrr$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT.+r")>;
|
||||
|
||||
|
||||
// FP Miscellaneous Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_5cyc_1ALU_1F], (instregex "^[SU]CVTF[SU][WX][SD]ri")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCVT[HSD][HSD]r")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^FMOV(DX|WS|XD|SW|DXHigh|XDHigh)r$")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOV[SD][ir]$")>;
|
||||
|
||||
|
||||
// FP Load Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[DSQ]l")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDUR[BDHSQ]i")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[BDHSQ]ui")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi], (instregex "^LDN?P[DQS]i")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>;
|
||||
|
||||
|
||||
// FP Store Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR[BHSDQ]i")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](post|pre)")>;
|
||||
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR[BHSDQ]ui")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_2LdSt], (instregex "^STN?P[SDQ]i")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr], (instregex "^STP[SDQ](post|pre)")>;
|
||||
|
||||
|
||||
// ASIMD Integer Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Reference for forms in this group
|
||||
// D form - v8i8, v4i16, v2i32
|
||||
// Q form - v16i8, v8i16, v4i32
|
||||
// D form - v1i8, v1i16, v1i32, v1i64
|
||||
// Q form - v16i8, v8i16, v4i32, v2i64
|
||||
// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
|
||||
// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
|
||||
|
||||
// ASIMD simple arithmetic
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(ABS|ADD(P)?|NEG|SUB)v")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](ADD(L|LP|W)|SUB(L|W))v")>;
|
||||
|
||||
// ASIMD complex arithmetic
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]H(ADD|SUB)v")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^R?(ADD|SUB)HN2?v")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]Q(ADD|SUB)v")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^(SU|US)QADDv")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]RHADDv")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABAL?v")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABDL?v")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ADALPv")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^((SQ)(ABS|NEG))v")>;
|
||||
|
||||
// ASIMD compare
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT|TST)v")>;
|
||||
|
||||
// ASIMD max/min
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)P?v")>;
|
||||
|
||||
// ASIMD logical
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(AND|BIC|BIF|BIT|BSL|EOR|MVN|NOT|ORN|ORR)v")>;
|
||||
|
||||
// ASIMD multiply accumulate, D-form
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)")>;
|
||||
// ASIMD multiply accumulate, Q-form
|
||||
def : InstRW<[TSV110Wr_8cyc_2FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v16i8|v8i16|v4i32)")>;
|
||||
|
||||
// ASIMD multiply accumulate long
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v8i8|v16i8)")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v1i64|v2i64)")>;
|
||||
|
||||
// ASIMD shift
|
||||
// ASIMD shift accumulate
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(S|SR|U|UR)SRA")>;
|
||||
// ASIMD shift by immed, basic
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1],
|
||||
(instregex "SHLv","SLIv","SRIv","SHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
|
||||
// ASIMD shift by immed, complex
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]?(Q|R){1,2}SHR")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^SQSHLU")>;
|
||||
// ASIMD shift by register, basic, Q-form
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
|
||||
// ASIMD shift by register, complex, D-form
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
|
||||
// ASIMD shift by register, complex, Q-form
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
|
||||
|
||||
// ASIMD reduction
|
||||
// ASIMD arith, reduce, 4H/4S
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
|
||||
// ASIMD arith, reduce, 8B/8H
|
||||
def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
|
||||
// ASIMD arith, reduce, 16B
|
||||
def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?Vv16i8v$")>;
|
||||
|
||||
// ASIMD max/min, reduce, 4H/4S
|
||||
def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
|
||||
// ASIMD max/min, reduce, 8B/8H
|
||||
def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
|
||||
// ASIMD max/min, reduce, 16B
|
||||
def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
|
||||
|
||||
|
||||
// Vector - Floating Point
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Reference for forms in this group
|
||||
// D form - v2f32
|
||||
// Q form - v4f32, v2f64
|
||||
// D form - 32, 64
|
||||
// D form - v1i32, v1i64
|
||||
// D form - v2i32
|
||||
// Q form - v4i32, v2i64
|
||||
|
||||
// ASIMD FP sign manipulation
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FABSv")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FNEGv")>;
|
||||
|
||||
// ASIMD FP compare
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v")>;
|
||||
|
||||
// ASIMD FP convert
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FCVT[AMNPZ][SU]v")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCVT(L)v")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FCVT(N|XN)v")>;
|
||||
|
||||
// ASIMD FP divide, D-form, F32
|
||||
def : InstRW<[TSV110Wr_11cyc_1FSU1], (instregex "FDIVv2f32")>;
|
||||
// ASIMD FP divide, Q-form, F32
|
||||
def : InstRW<[TSV110Wr_24cyc_1FSU1], (instregex "FDIVv4f32")>;
|
||||
// ASIMD FP divide, Q-form, F64
|
||||
def : InstRW<[TSV110Wr_38cyc_1FSU1], (instregex "FDIVv2f64")>;
|
||||
|
||||
// ASIMD FP SQRT
|
||||
def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTv2f32)>;
|
||||
def : InstRW<[TSV110Wr_36cyc_1FSU2], (instrs FSQRTv4f32)>;
|
||||
def : InstRW<[TSV110Wr_64cyc_1FSU2], (instrs FSQRTv2f64)>;
|
||||
|
||||
// ASIMD FP max,min
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN)(NM)?v")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN)(NM)?Pv")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(MAX|MIN)(NM)?Vv")>;
|
||||
|
||||
// ASIMD FP add
|
||||
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|ADDP|SUB)v")>;
|
||||
|
||||
// ASIMD FP multiply
|
||||
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FMULX?v")>;
|
||||
|
||||
|
||||
// ASIMD Miscellaneous Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(CLS|CLZ|CNT)v")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(DUP|INS)v.+lane")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^REV(16|32|64)v")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(UZP|ZIP)[12]v")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^EXTv")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^XTNv")>;
|
||||
def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^RBITv")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^(INS|DUP)v.+gpr")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^[SU]MOVv")>;
|
||||
|
||||
// ASIMD table lookup, D-form
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v8i8One")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v8i8Two")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v8i8Three")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v8i8Four")>;
|
||||
// ASIMD table lookup, Q-form
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v16i8One")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v16i8Two")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v16i8Three")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v16i8Four")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOVv")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT[AIMNPXZ]v")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[SU]CVTFv")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>;
|
||||
|
||||
|
||||
// ASIMD Load Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "LD1i(8|16|32|64)$")>;
|
||||
def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "LD2i(8|16|32|64)$")>;
|
||||
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "LD3i(8|16|32|64)$")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "LD4i(8|16|32|64)$")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_3LdSt], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_2LdSt], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_10cyc_4F_4LdSt], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
|
||||
// ASIMD Store Instructions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "ST1i(8|16|32|64)$")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "ST2i(8|16|32|64)$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "ST3i(8|16|32|64)$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_1F], (instregex "ST4i(8|16|32|64)$")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_1F], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[TSV110Wr_8cyc_1F], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
} // SchedModel = TSV110Model
|
|
@ -1,9 +1,8 @@
|
|||
//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -26,8 +25,9 @@ def ThunderXT8XModel : SchedMachineModel {
|
|||
let PostRAScheduler = 1; // Use PostRA scheduler.
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F);
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
@ -154,7 +154,8 @@ def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
|
|||
def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVd, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
|
||||
// FP Mul, Div, Sqrt
|
||||
def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
|
||||
|
@ -192,6 +193,7 @@ def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
|
|||
def : ReadAdvance<ReadExtrHi, 1>;
|
||||
def : ReadAdvance<ReadAdrBase, 2>;
|
||||
def : ReadAdvance<ReadVLD, 2>;
|
||||
def : ReadAdvance<ReadST, 2>;
|
||||
|
||||
// FIXME: This needs more targeted benchmarking.
|
||||
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -26,8 +25,9 @@ def ThunderX2T99Model : SchedMachineModel {
|
|||
let PostRAScheduler = 1; // Using PostRA sched.
|
||||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = [HasSVE];
|
||||
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F);
|
||||
// FIXME: Remove when all errors have been fixed.
|
||||
let FullInstRWOverlapCheck = 0;
|
||||
}
|
||||
|
@ -362,6 +362,7 @@ def : ReadAdvance<ReadID, 0>;
|
|||
def : ReadAdvance<ReadExtrHi, 0>;
|
||||
def : ReadAdvance<ReadAdrBase, 0>;
|
||||
def : ReadAdvance<ReadVLD, 0>;
|
||||
def : ReadAdvance<ReadST, 0>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 3. Instruction Tables.
|
||||
|
@ -1249,7 +1250,12 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
|
|||
// ASIMD shift by register, basic, Q-form
|
||||
// ASIMD shift by register, complex, D-form
|
||||
// ASIMD shift by register, complex, Q-form
|
||||
def : WriteRes<WriteV, [THX2T99F01]> {
|
||||
def : WriteRes<WriteVd, [THX2T99F01]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [4];
|
||||
}
|
||||
def : WriteRes<WriteVq, [THX2T99F01]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [4];
|
||||
|
@ -1483,7 +1489,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
|
|||
// ASIMD bitwise insert, D-form
|
||||
// ASIMD bitwise insert, Q-form
|
||||
def : InstRW<[THX2T99Write_5Cyc_F01],
|
||||
(instregex "^BIFv", "^BITv", "^BSLv")>;
|
||||
(instregex "^BIFv", "^BITv", "^BSLv", "^BSPv")>;
|
||||
|
||||
// ASIMD count, D-form
|
||||
// ASIMD count, Q-form
|
||||
|
@ -1493,7 +1499,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01],
|
|||
// ASIMD duplicate, gen reg
|
||||
// ASIMD duplicate, element
|
||||
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
|
||||
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>;
|
||||
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUP(i8|i16|i32|i64)$")>;
|
||||
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>;
|
||||
|
||||
// ASIMD extract
|
||||
|
@ -1518,25 +1524,6 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv")>;
|
|||
// ASIMD move, FP immed
|
||||
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;
|
||||
|
||||
// ASIMD table lookup, D-form
|
||||
def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>;
|
||||
def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>;
|
||||
def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>;
|
||||
def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>;
|
||||
|
||||
// ASIMD table lookup, Q-form
|
||||
def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>;
|
||||
def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>;
|
||||
def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>;
|
||||
def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>;
|
||||
|
||||
// ASIMD transpose
|
||||
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>;
|
||||
|
||||
// ASIMD unzip/zip
|
||||
def : InstRW<[THX2T99Write_5Cyc_F01],
|
||||
(instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
|
||||
|
||||
// ASIMD reciprocal estimate, D-form
|
||||
// ASIMD reciprocal estimate, Q-form
|
||||
def : InstRW<[THX2T99Write_5Cyc_F01],
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,9 +1,8 @@
|
|||
//==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
@ -48,19 +47,9 @@ def WriteAdr : SchedWrite; // Address pre/post increment.
|
|||
|
||||
def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
|
||||
def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
|
||||
def ReadST : SchedRead; // Read the stored value.
|
||||
def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
|
||||
|
||||
// Predicate for determining when a shiftable register is shifted.
|
||||
def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(*MI)}]>;
|
||||
|
||||
// Predicate for determining when a extendedable register is extended.
|
||||
def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(*MI)}]>;
|
||||
|
||||
// ScaledIdxPred is true if a WriteLDIdx operand will be
|
||||
// scaled. Subtargets can use this to dynamically select resources and
|
||||
// latency for WriteLDIdx and ReadAdrBase.
|
||||
def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(*MI)}]>;
|
||||
|
||||
// Serialized two-level address load.
|
||||
// EXAMPLE: LOADGot
|
||||
def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
|
||||
|
@ -88,7 +77,8 @@ def WriteFImm : SchedWrite; // Floating-point immediate.
|
|||
def WriteFMul : SchedWrite; // Floating-point multiply.
|
||||
def WriteFDiv : SchedWrite; // Floating-point division.
|
||||
|
||||
def WriteV : SchedWrite; // Vector ops.
|
||||
def WriteVd : SchedWrite; // 64bit Vector D ops.
|
||||
def WriteVq : SchedWrite; // 128bit Vector Q ops.
|
||||
def WriteVLD : SchedWrite; // Vector loads.
|
||||
def WriteVST : SchedWrite; // Vector stores.
|
||||
|
||||
|
@ -98,9 +88,9 @@ def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP)
|
|||
def ReadVLD : SchedRead;
|
||||
|
||||
// Sequential vector load and shuffle.
|
||||
def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>;
|
||||
def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
|
||||
def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteVq]>;
|
||||
def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteVq, WriteVq]>;
|
||||
|
||||
// Store a shuffled vector.
|
||||
def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
|
||||
def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
|
||||
def WriteVSTShuffle : WriteSequence<[WriteVq, WriteVST]>;
|
||||
def WriteVSTPairShuffle : WriteSequence<[WriteVq, WriteVq, WriteVST]>;
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
//===- AArch64SystemOperands.td ----------------------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -14,6 +13,30 @@
|
|||
|
||||
include "llvm/TableGen/SearchableTable.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Features that, for the compiler, only enable system operands and PStates
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def HasCCPP : Predicate<"Subtarget->hasCCPP()">,
|
||||
AssemblerPredicate<(all_of FeatureCCPP), "ccpp">;
|
||||
|
||||
def HasPAN : Predicate<"Subtarget->hasPAN()">,
|
||||
AssemblerPredicate<(all_of FeaturePAN),
|
||||
"ARM v8.1 Privileged Access-Never extension">;
|
||||
|
||||
def HasPsUAO : Predicate<"Subtarget->hasPsUAO()">,
|
||||
AssemblerPredicate<(all_of FeaturePsUAO),
|
||||
"ARM v8.2 UAO PState extension (psuao)">;
|
||||
|
||||
def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
|
||||
AssemblerPredicate<(all_of FeaturePAN_RWV),
|
||||
"ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
|
||||
|
||||
def HasCONTEXTIDREL2
|
||||
: Predicate<"Subtarget->hasCONTEXTIDREL2()">,
|
||||
AssemblerPredicate<(all_of FeatureCONTEXTIDREL2),
|
||||
"Target contains CONTEXTIDR_EL2 RW operand">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AT (address translate) instruction options.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -45,7 +68,7 @@ def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>;
|
|||
def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
|
||||
def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;
|
||||
|
||||
let Requires = [{ {AArch64::HasV8_2aOps} }] in {
|
||||
let Requires = [{ {AArch64::FeaturePAN_RWV} }] in {
|
||||
def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
|
||||
def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
|
||||
}
|
||||
|
@ -75,6 +98,21 @@ def : DB<"ld", 0xd>;
|
|||
def : DB<"st", 0xe>;
|
||||
def : DB<"sy", 0xf>;
|
||||
|
||||
class DBnXS<string name, bits<4> encoding, bits<5> immValue> : SearchableTable {
|
||||
let SearchableFields = ["Name", "Encoding", "ImmValue"];
|
||||
let EnumValueField = "Encoding";
|
||||
|
||||
string Name = name;
|
||||
bits<4> Encoding = encoding;
|
||||
bits<5> ImmValue = immValue;
|
||||
code Requires = [{ {AArch64::FeatureXS} }];
|
||||
}
|
||||
|
||||
def : DBnXS<"oshnxs", 0x3, 0x10>;
|
||||
def : DBnXS<"nshnxs", 0x7, 0x14>;
|
||||
def : DBnXS<"ishnxs", 0xb, 0x18>;
|
||||
def : DBnXS<"synxs", 0xf, 0x1c>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// DC (data cache maintenance) instruction options.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -102,9 +140,33 @@ def : DC<"CVAU", 0b011, 0b0111, 0b1011, 0b001>;
|
|||
def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
|
||||
def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>;
|
||||
|
||||
let Requires = [{ {AArch64::HasV8_2aOps} }] in
|
||||
let Requires = [{ {AArch64::FeatureCCPP} }] in
|
||||
def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>;
|
||||
|
||||
let Requires = [{ {AArch64::FeatureCacheDeepPersist} }] in
|
||||
def : DC<"CVADP", 0b011, 0b0111, 0b1101, 0b001>;
|
||||
|
||||
let Requires = [{ {AArch64::FeatureMTE} }] in {
|
||||
def : DC<"IGVAC", 0b000, 0b0111, 0b0110, 0b011>;
|
||||
def : DC<"IGSW", 0b000, 0b0111, 0b0110, 0b100>;
|
||||
def : DC<"CGSW", 0b000, 0b0111, 0b1010, 0b100>;
|
||||
def : DC<"CIGSW", 0b000, 0b0111, 0b1110, 0b100>;
|
||||
def : DC<"CGVAC", 0b011, 0b0111, 0b1010, 0b011>;
|
||||
def : DC<"CGVAP", 0b011, 0b0111, 0b1100, 0b011>;
|
||||
def : DC<"CGVADP", 0b011, 0b0111, 0b1101, 0b011>;
|
||||
def : DC<"CIGVAC", 0b011, 0b0111, 0b1110, 0b011>;
|
||||
def : DC<"GVA", 0b011, 0b0111, 0b0100, 0b011>;
|
||||
def : DC<"IGDVAC", 0b000, 0b0111, 0b0110, 0b101>;
|
||||
def : DC<"IGDSW", 0b000, 0b0111, 0b0110, 0b110>;
|
||||
def : DC<"CGDSW", 0b000, 0b0111, 0b1010, 0b110>;
|
||||
def : DC<"CIGDSW", 0b000, 0b0111, 0b1110, 0b110>;
|
||||
def : DC<"CGDVAC", 0b011, 0b0111, 0b1010, 0b101>;
|
||||
def : DC<"CGDVAP", 0b011, 0b0111, 0b1100, 0b101>;
|
||||
def : DC<"CGDVADP", 0b011, 0b0111, 0b1101, 0b101>;
|
||||
def : DC<"CIGDVAC", 0b011, 0b0111, 0b1110, 0b101>;
|
||||
def : DC<"GZVA", 0b011, 0b0111, 0b0100, 0b100>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// IC (instruction cache maintenance) instruction options.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -154,7 +216,7 @@ class TSB<string name, bits<4> encoding> : SearchableTable{
|
|||
bits<4> Encoding;
|
||||
let Encoding = encoding;
|
||||
|
||||
code Requires = [{ {AArch64::HasV8_4aOps} }];
|
||||
code Requires = [{ {AArch64::FeatureTRACEV8_4} }];
|
||||
}
|
||||
|
||||
def : TSB<"csync", 0>;
|
||||
|
@ -290,14 +352,41 @@ def : PState<"SPSel", 0b00101>;
|
|||
def : PState<"DAIFSet", 0b11110>;
|
||||
def : PState<"DAIFClr", 0b11111>;
|
||||
// v8.1a "Privileged Access Never" extension-specific PStates
|
||||
let Requires = [{ {AArch64::HasV8_1aOps} }] in
|
||||
let Requires = [{ {AArch64::FeaturePAN} }] in
|
||||
def : PState<"PAN", 0b00100>;
|
||||
|
||||
// v8.2a "User Access Override" extension-specific PStates
|
||||
let Requires = [{ {AArch64::HasV8_2aOps} }] in
|
||||
let Requires = [{ {AArch64::FeaturePsUAO} }] in
|
||||
def : PState<"UAO", 0b00011>;
|
||||
// v8.4a timining insensitivity of data processing instructions
|
||||
let Requires = [{ {AArch64::HasV8_4aOps} }] in
|
||||
// v8.4a timing insensitivity of data processing instructions
|
||||
let Requires = [{ {AArch64::FeatureDIT} }] in
|
||||
def : PState<"DIT", 0b11010>;
|
||||
// v8.5a Spectre Mitigation
|
||||
let Requires = [{ {AArch64::FeatureSSBS} }] in
|
||||
def : PState<"SSBS", 0b11001>;
|
||||
// v8.5a Memory Tagging Extension
|
||||
let Requires = [{ {AArch64::FeatureMTE} }] in
|
||||
def : PState<"TCO", 0b11100>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVCR instruction options.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class SVCR<string name, bits<3> encoding> : SearchableTable {
|
||||
let SearchableFields = ["Name", "Encoding"];
|
||||
let EnumValueField = "Encoding";
|
||||
|
||||
string Name = name;
|
||||
bits<3> Encoding;
|
||||
let Encoding = encoding;
|
||||
code Requires = [{ {} }];
|
||||
}
|
||||
|
||||
let Requires = [{ {AArch64::FeatureSME} }] in {
|
||||
def : SVCR<"SVCRSM", 0b001>;
|
||||
def : SVCR<"SVCRZA", 0b010>;
|
||||
def : SVCR<"SVCRSMZA", 0b011>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PSB instruction options.
|
||||
|
@ -315,14 +404,28 @@ class PSB<string name, bits<5> encoding> : SearchableTable {
|
|||
def : PSB<"csync", 0x11>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TLBI (translation lookaside buffer invalidate) instruction options.
|
||||
// BTI instruction options.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
|
||||
bits<3> op2, bit needsreg = 1> : SearchableTable {
|
||||
class BTI<string name, bits<3> encoding> : SearchableTable {
|
||||
let SearchableFields = ["Name", "Encoding"];
|
||||
let EnumValueField = "Encoding";
|
||||
|
||||
string Name = name;
|
||||
bits<3> Encoding;
|
||||
let Encoding = encoding;
|
||||
}
|
||||
|
||||
def : BTI<"c", 0b010>;
|
||||
def : BTI<"j", 0b100>;
|
||||
def : BTI<"jc", 0b110>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TLBI (translation lookaside buffer invalidate) instruction options.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
|
||||
bits<3> op2, bit needsreg> {
|
||||
string Name = name;
|
||||
bits<14> Encoding;
|
||||
let Encoding{13-11} = op1;
|
||||
|
@ -330,94 +433,147 @@ class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
|
|||
let Encoding{6-3} = crm;
|
||||
let Encoding{2-0} = op2;
|
||||
bit NeedsReg = needsreg;
|
||||
code Requires = [{ {} }];
|
||||
list<string> Requires = [];
|
||||
list<string> ExtraRequires = [];
|
||||
code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }];
|
||||
}
|
||||
|
||||
def : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>;
|
||||
def : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>;
|
||||
def : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>;
|
||||
def : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>;
|
||||
def : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>;
|
||||
def : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>;
|
||||
def : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>;
|
||||
def : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>;
|
||||
def : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>;
|
||||
def : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>;
|
||||
def : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>;
|
||||
def : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>;
|
||||
def : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>;
|
||||
def : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>;
|
||||
def : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
|
||||
def : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>;
|
||||
def : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>;
|
||||
def : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>;
|
||||
def : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>;
|
||||
def : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>;
|
||||
def : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>;
|
||||
def : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>;
|
||||
def : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>;
|
||||
def : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>;
|
||||
def : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>;
|
||||
def : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>;
|
||||
def : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>;
|
||||
def : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>;
|
||||
def : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>;
|
||||
def : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
|
||||
def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
|
||||
def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
|
||||
def TLBITable : GenericTable {
|
||||
let FilterClass = "TLBIEntry";
|
||||
let CppTypeName = "TLBI";
|
||||
let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
|
||||
}
|
||||
|
||||
def lookupTLBIByName : SearchIndex {
|
||||
let Table = TLBITable;
|
||||
let Key = ["Name"];
|
||||
}
|
||||
|
||||
def lookupTLBIByEncoding : SearchIndex {
|
||||
let Table = TLBITable;
|
||||
let Key = ["Encoding"];
|
||||
}
|
||||
|
||||
multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
|
||||
bits<3> op2, bit needsreg = 1> {
|
||||
def : TLBIEntry<name, op1, crn, crm, op2, needsreg>;
|
||||
def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
|
||||
let Encoding{7} = 1;
|
||||
let ExtraRequires = ["AArch64::FeatureXS"];
|
||||
}
|
||||
}
|
||||
|
||||
defm : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>;
|
||||
defm : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>;
|
||||
defm : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>;
|
||||
defm : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>;
|
||||
defm : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>;
|
||||
defm : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>;
|
||||
defm : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>;
|
||||
defm : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>;
|
||||
defm : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>;
|
||||
defm : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>;
|
||||
defm : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>;
|
||||
defm : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>;
|
||||
defm : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>;
|
||||
defm : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>;
|
||||
defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
|
||||
defm : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>;
|
||||
defm : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>;
|
||||
defm : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>;
|
||||
defm : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>;
|
||||
defm : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>;
|
||||
defm : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>;
|
||||
defm : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>;
|
||||
defm : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>;
|
||||
defm : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>;
|
||||
defm : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>;
|
||||
defm : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>;
|
||||
defm : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>;
|
||||
defm : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>;
|
||||
defm : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>;
|
||||
defm : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
|
||||
defm : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
|
||||
defm : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
|
||||
|
||||
// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
|
||||
let Requires = ["AArch64::FeatureTLB_RMI"] in {
|
||||
// Armv8.4-A Outer Sharable TLB Maintenance instructions:
|
||||
let Requires = [{ {AArch64::HasV8_4aOps} }] in {
|
||||
// op1 CRn CRm op2
|
||||
def : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>;
|
||||
def : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>;
|
||||
def : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>;
|
||||
def : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>;
|
||||
def : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>;
|
||||
def : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>;
|
||||
def : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>;
|
||||
def : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>;
|
||||
def : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>;
|
||||
def : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>;
|
||||
def : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
|
||||
def : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>;
|
||||
def : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>;
|
||||
def : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>;
|
||||
def : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>;
|
||||
def : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>;
|
||||
defm : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>;
|
||||
defm : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>;
|
||||
defm : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>;
|
||||
defm : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>;
|
||||
defm : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>;
|
||||
defm : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>;
|
||||
defm : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>;
|
||||
defm : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>;
|
||||
defm : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>;
|
||||
defm : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>;
|
||||
defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
|
||||
defm : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>;
|
||||
defm : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>;
|
||||
defm : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>;
|
||||
defm : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>;
|
||||
defm : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>;
|
||||
|
||||
// Armv8.4-A TLB Range Maintenance instructions:
|
||||
// op1 CRn CRm op2
|
||||
def : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>;
|
||||
def : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>;
|
||||
def : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>;
|
||||
def : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>;
|
||||
def : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>;
|
||||
def : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>;
|
||||
def : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>;
|
||||
def : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>;
|
||||
def : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>;
|
||||
def : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>;
|
||||
def : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>;
|
||||
def : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>;
|
||||
def : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>;
|
||||
def : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>;
|
||||
def : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>;
|
||||
def : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>;
|
||||
def : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>;
|
||||
def : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>;
|
||||
def : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>;
|
||||
def : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>;
|
||||
def : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>;
|
||||
def : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>;
|
||||
def : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>;
|
||||
def : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>;
|
||||
def : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>;
|
||||
def : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>;
|
||||
def : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>;
|
||||
def : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>;
|
||||
def : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>;
|
||||
def : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>;
|
||||
defm : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>;
|
||||
defm : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>;
|
||||
defm : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>;
|
||||
defm : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>;
|
||||
defm : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>;
|
||||
defm : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>;
|
||||
defm : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>;
|
||||
defm : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>;
|
||||
defm : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>;
|
||||
defm : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>;
|
||||
defm : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>;
|
||||
defm : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>;
|
||||
defm : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>;
|
||||
defm : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>;
|
||||
defm : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>;
|
||||
defm : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>;
|
||||
defm : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>;
|
||||
defm : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>;
|
||||
defm : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>;
|
||||
defm : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>;
|
||||
defm : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>;
|
||||
defm : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>;
|
||||
defm : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>;
|
||||
defm : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>;
|
||||
defm : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>;
|
||||
defm : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>;
|
||||
defm : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>;
|
||||
defm : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>;
|
||||
defm : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>;
|
||||
defm : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>;
|
||||
} //FeatureTLB_RMI
|
||||
|
||||
// Armv9-A Realm Management Extention TLBI Instructions
|
||||
let Requires = ["AArch64::FeatureRME"] in {
|
||||
defm : TLBI<"RPAOS", 0b110, 0b1000, 0b0100, 0b011>;
|
||||
defm : TLBI<"RPALOS", 0b110, 0b1000, 0b0100, 0b111>;
|
||||
defm : TLBI<"PAALLOS", 0b110, 0b1000, 0b0001, 0b100, 0>;
|
||||
defm : TLBI<"PAALL", 0b110, 0b1000, 0b0111, 0b100, 0>;
|
||||
}
|
||||
|
||||
// Armv8.5-A Prediction Restriction by Context instruction options:
|
||||
class PRCTX<string name, bits<4> crm> : SearchableTable {
|
||||
let SearchableFields = ["Name", "Encoding"];
|
||||
let EnumValueField = "Encoding";
|
||||
|
||||
string Name = name;
|
||||
bits<11> Encoding;
|
||||
let Encoding{10-4} = 0b0110111;
|
||||
let Encoding{3-0} = crm;
|
||||
bit NeedsReg = 1;
|
||||
code Requires = [{ {} }];
|
||||
}
|
||||
|
||||
let Requires = [{ {AArch64::FeaturePredRes} }] in {
|
||||
def : PRCTX<"RCTX", 0b0011>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -430,6 +586,7 @@ class SysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
|
|||
let EnumValueField = "Encoding";
|
||||
|
||||
string Name = name;
|
||||
string AltName = name;
|
||||
bits<16> Encoding;
|
||||
let Encoding{15-14} = op0;
|
||||
let Encoding{13-11} = op1;
|
||||
|
@ -476,8 +633,10 @@ def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>;
|
|||
def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>;
|
||||
def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>;
|
||||
def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>;
|
||||
|
||||
//v8.3 CCIDX - extending the CCsIDr number of sets
|
||||
def : ROSysReg<"CCSIDR2_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b010> {
|
||||
let Requires = [{ {AArch64::HasV8_3aOps} }];
|
||||
let Requires = [{ {AArch64::FeatureCCIDX} }];
|
||||
}
|
||||
def : ROSysReg<"CLIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b001>;
|
||||
def : ROSysReg<"CTR_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b001>;
|
||||
|
@ -487,6 +646,9 @@ def : ROSysReg<"AIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b111>;
|
|||
def : ROSysReg<"DCZID_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b111>;
|
||||
def : ROSysReg<"ID_PFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b000>;
|
||||
def : ROSysReg<"ID_PFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b001>;
|
||||
def : ROSysReg<"ID_PFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b100> {
|
||||
let Requires = [{ {AArch64::FeatureSpecRestrict} }];
|
||||
}
|
||||
def : ROSysReg<"ID_DFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b010>;
|
||||
def : ROSysReg<"ID_AFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b011>;
|
||||
def : ROSysReg<"ID_MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b100>;
|
||||
|
@ -510,11 +672,10 @@ def : ROSysReg<"ID_AA64AFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b100>;
|
|||
def : ROSysReg<"ID_AA64AFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b101>;
|
||||
def : ROSysReg<"ID_AA64ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b000>;
|
||||
def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>;
|
||||
def : ROSysReg<"ID_AA64ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b010>;
|
||||
def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>;
|
||||
def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>;
|
||||
def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010> {
|
||||
let Requires = [{ {AArch64::HasV8_2aOps} }];
|
||||
}
|
||||
def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010>;
|
||||
def : ROSysReg<"MVFR0_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b000>;
|
||||
def : ROSysReg<"MVFR1_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b001>;
|
||||
def : ROSysReg<"MVFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b010>;
|
||||
|
@ -525,6 +686,7 @@ def : ROSysReg<"ISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b000>;
|
|||
def : ROSysReg<"CNTPCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b001>;
|
||||
def : ROSysReg<"CNTVCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b010>;
|
||||
def : ROSysReg<"ID_MMFR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b110>;
|
||||
def : ROSysReg<"ID_MMFR5_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b110>;
|
||||
|
||||
// Trace registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
|
@ -584,7 +746,7 @@ def : ROSysReg<"ID_AA64ZFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b100>;
|
|||
|
||||
// v8.1a "Limited Ordering Regions" extension-specific system register
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::HasV8_1aOps} }] in
|
||||
let Requires = [{ {AArch64::FeatureLOR} }] in
|
||||
def : ROSysReg<"LORID_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b111>;
|
||||
|
||||
// v8.2a "RAS extension" registers
|
||||
|
@ -594,6 +756,35 @@ def : ROSysReg<"ERRIDR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b000>;
|
|||
def : ROSysReg<"ERXFR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b000>;
|
||||
}
|
||||
|
||||
// v8.5a "random number" registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureRandGen} }] in {
|
||||
def : ROSysReg<"RNDR", 0b11, 0b011, 0b0010, 0b0100, 0b000>;
|
||||
def : ROSysReg<"RNDRRS", 0b11, 0b011, 0b0010, 0b0100, 0b001>;
|
||||
}
|
||||
|
||||
// v8.5a Software Context Number registers
|
||||
let Requires = [{ {AArch64::FeatureSpecRestrict} }] in {
|
||||
def : RWSysReg<"SCXTNUM_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b111>;
|
||||
def : RWSysReg<"SCXTNUM_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b111>;
|
||||
def : RWSysReg<"SCXTNUM_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b111>;
|
||||
def : RWSysReg<"SCXTNUM_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b111>;
|
||||
def : RWSysReg<"SCXTNUM_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b111>;
|
||||
}
|
||||
|
||||
// v9a Realm Management Extension registers
|
||||
let Requires = [{ {AArch64::FeatureRME} }] in {
|
||||
def : RWSysReg<"MFAR_EL3", 0b11, 0b110, 0b0110, 0b0000, 0b101>;
|
||||
def : RWSysReg<"GPCCR_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b110>;
|
||||
def : RWSysReg<"GPTBR_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b100>;
|
||||
}
|
||||
|
||||
// v9-a Scalable Matrix Extension (SME) registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureSME} }] in {
|
||||
def : ROSysReg<"ID_AA64SMFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b101>;
|
||||
}
|
||||
|
||||
//===----------------------
|
||||
// Write-only regs
|
||||
//===----------------------
|
||||
|
@ -710,6 +901,9 @@ def : RWSysReg<"ACTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b001>;
|
|||
def : RWSysReg<"ACTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b001>;
|
||||
def : RWSysReg<"ACTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b001>;
|
||||
def : RWSysReg<"HCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b000>;
|
||||
def : RWSysReg<"HCRX_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b010> {
|
||||
let Requires = [{ {AArch64::FeatureHCX} }];
|
||||
}
|
||||
def : RWSysReg<"SCR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b000>;
|
||||
def : RWSysReg<"MDCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b001>;
|
||||
def : RWSysReg<"SDER32_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b001>;
|
||||
|
@ -719,13 +913,19 @@ def : RWSysReg<"HSTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b011>;
|
|||
def : RWSysReg<"HACR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b111>;
|
||||
def : RWSysReg<"MDCR_EL3", 0b11, 0b110, 0b0001, 0b0011, 0b001>;
|
||||
def : RWSysReg<"TTBR0_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b000>;
|
||||
def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000>;
|
||||
def : RWSysReg<"TTBR0_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b000>;
|
||||
|
||||
let Requires = [{ {AArch64::FeatureEL2VMSA} }] in {
|
||||
def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000> {
|
||||
let AltName = "VSCTLR_EL2";
|
||||
}
|
||||
def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>;
|
||||
}
|
||||
|
||||
def : RWSysReg<"TTBR1_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b001>;
|
||||
def : RWSysReg<"TCR_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b010>;
|
||||
def : RWSysReg<"TCR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b010>;
|
||||
def : RWSysReg<"TCR_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b010>;
|
||||
def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>;
|
||||
def : RWSysReg<"VTCR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b010>;
|
||||
def : RWSysReg<"DACR32_EL2", 0b11, 0b100, 0b0011, 0b0000, 0b000>;
|
||||
def : RWSysReg<"SPSR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b000>;
|
||||
|
@ -740,7 +940,7 @@ def : RWSysReg<"SP_EL2", 0b11, 0b110, 0b0100, 0b0001, 0b000>;
|
|||
def : RWSysReg<"SPSel", 0b11, 0b000, 0b0100, 0b0010, 0b000>;
|
||||
def : RWSysReg<"NZCV", 0b11, 0b011, 0b0100, 0b0010, 0b000>;
|
||||
def : RWSysReg<"DAIF", 0b11, 0b011, 0b0100, 0b0010, 0b001>;
|
||||
def : RWSysReg<"CurrentEL", 0b11, 0b000, 0b0100, 0b0010, 0b010>;
|
||||
def : ROSysReg<"CurrentEL", 0b11, 0b000, 0b0100, 0b0010, 0b010>;
|
||||
def : RWSysReg<"SPSR_irq", 0b11, 0b100, 0b0100, 0b0011, 0b000>;
|
||||
def : RWSysReg<"SPSR_abt", 0b11, 0b100, 0b0100, 0b0011, 0b001>;
|
||||
def : RWSysReg<"SPSR_und", 0b11, 0b100, 0b0100, 0b0011, 0b010>;
|
||||
|
@ -777,6 +977,7 @@ def : RWSysReg<"PMUSERENR_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b000>;
|
|||
def : RWSysReg<"PMINTENSET_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b001>;
|
||||
def : RWSysReg<"PMINTENCLR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b010>;
|
||||
def : RWSysReg<"PMOVSSET_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b011>;
|
||||
def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>;
|
||||
def : RWSysReg<"MAIR_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b000>;
|
||||
def : RWSysReg<"MAIR_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b000>;
|
||||
def : RWSysReg<"MAIR_EL3", 0b11, 0b110, 0b1010, 0b0010, 0b000>;
|
||||
|
@ -1063,7 +1264,6 @@ def : RWSysReg<"ICC_SRE_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b101>;
|
|||
def : RWSysReg<"ICC_IGRPEN0_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b110>;
|
||||
def : RWSysReg<"ICC_IGRPEN1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b111>;
|
||||
def : RWSysReg<"ICC_IGRPEN1_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b111>;
|
||||
def : RWSysReg<"ICC_SEIEN_EL1", 0b11, 0b000, 0b1100, 0b1101, 0b000>;
|
||||
def : RWSysReg<"ICC_AP0R0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b100>;
|
||||
def : RWSysReg<"ICC_AP0R1_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b101>;
|
||||
def : RWSysReg<"ICC_AP0R2_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b110>;
|
||||
|
@ -1081,9 +1281,8 @@ def : RWSysReg<"ICH_AP1R1_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b001>;
|
|||
def : RWSysReg<"ICH_AP1R2_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b010>;
|
||||
def : RWSysReg<"ICH_AP1R3_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b011>;
|
||||
def : RWSysReg<"ICH_HCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b000>;
|
||||
def : RWSysReg<"ICH_MISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b010>;
|
||||
def : ROSysReg<"ICH_MISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b010>;
|
||||
def : RWSysReg<"ICH_VMCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b111>;
|
||||
def : RWSysReg<"ICH_VSEIR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b100>;
|
||||
def : RWSysReg<"ICH_LR0_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b000>;
|
||||
def : RWSysReg<"ICH_LR1_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b001>;
|
||||
def : RWSysReg<"ICH_LR2_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b010>;
|
||||
|
@ -1101,24 +1300,74 @@ def : RWSysReg<"ICH_LR13_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b101>;
|
|||
def : RWSysReg<"ICH_LR14_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b110>;
|
||||
def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>;
|
||||
|
||||
// v8r system registers
|
||||
let Requires = [{ {AArch64::HasV8_0rOps} }] in {
|
||||
//Virtualization System Control Register
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"VSCTLR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000> {
|
||||
let AltName = "TTBR0_EL2";
|
||||
}
|
||||
|
||||
//MPU Type Register
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"MPUIR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b100>;
|
||||
def : RWSysReg<"MPUIR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b100>;
|
||||
|
||||
//Protection Region Enable Register
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"PRENR_EL1", 0b11, 0b000, 0b0110, 0b0001, 0b001>;
|
||||
def : RWSysReg<"PRENR_EL2", 0b11, 0b100, 0b0110, 0b0001, 0b001>;
|
||||
|
||||
//Protection Region Selection Register
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"PRSELR_EL1", 0b11, 0b000, 0b0110, 0b0010, 0b001>;
|
||||
def : RWSysReg<"PRSELR_EL2", 0b11, 0b100, 0b0110, 0b0010, 0b001>;
|
||||
|
||||
//Protection Region Base Address Register
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"PRBAR_EL1", 0b11, 0b000, 0b0110, 0b1000, 0b000>;
|
||||
def : RWSysReg<"PRBAR_EL2", 0b11, 0b100, 0b0110, 0b1000, 0b000>;
|
||||
|
||||
//Protection Region Limit Address Register
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"PRLAR_EL1", 0b11, 0b000, 0b0110, 0b1000, 0b001>;
|
||||
def : RWSysReg<"PRLAR_EL2", 0b11, 0b100, 0b0110, 0b1000, 0b001>;
|
||||
|
||||
foreach n = 1-15 in {
|
||||
foreach x = 1-2 in {
|
||||
//Direct acces to Protection Region Base Address Register for n th MPU region
|
||||
def : RWSysReg<!strconcat("PRBAR"#n, "_EL"#x),
|
||||
0b11, 0b000, 0b0110, 0b1000, 0b000>{
|
||||
let Encoding{5-2} = n;
|
||||
let Encoding{13} = !add(x,-1);
|
||||
}
|
||||
|
||||
def : RWSysReg<!strconcat("PRLAR"#n, "_EL"#x),
|
||||
0b11, 0b000, 0b0110, 0b1000, 0b001>{
|
||||
let Encoding{5-2} = n;
|
||||
let Encoding{13} = !add(x,-1);
|
||||
}
|
||||
} //foreach x = 1-2 in
|
||||
} //foreach n = 1-15 in
|
||||
} //let Requires = [{ {AArch64::HasV8_0rOps} }] in
|
||||
|
||||
// v8.1a "Privileged Access Never" extension-specific system registers
|
||||
let Requires = [{ {AArch64::HasV8_1aOps} }] in
|
||||
let Requires = [{ {AArch64::FeaturePAN} }] in
|
||||
def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>;
|
||||
|
||||
// v8.1a "Limited Ordering Regions" extension-specific system registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::HasV8_1aOps} }] in {
|
||||
let Requires = [{ {AArch64::FeatureLOR} }] in {
|
||||
def : RWSysReg<"LORSA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b000>;
|
||||
def : RWSysReg<"LOREA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b001>;
|
||||
def : RWSysReg<"LORN_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b010>;
|
||||
def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>;
|
||||
}
|
||||
|
||||
// v8.1a "Virtualization hos extensions" system registers
|
||||
// v8.1a "Virtualization Host extensions" system registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::HasV8_1aOps} }] in {
|
||||
let Requires = [{ {AArch64::FeatureVH} }] in {
|
||||
def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>;
|
||||
def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
|
||||
def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>;
|
||||
def : RWSysReg<"CNTHV_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b010>;
|
||||
def : RWSysReg<"CNTHV_CTL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b001>;
|
||||
|
@ -1144,10 +1393,13 @@ def : RWSysReg<"CNTV_CTL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b001>;
|
|||
def : RWSysReg<"CNTV_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b010>;
|
||||
def : RWSysReg<"SPSR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b000>;
|
||||
def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>;
|
||||
let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in {
|
||||
def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
|
||||
}
|
||||
}
|
||||
// v8.2a registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::HasV8_2aOps} }] in
|
||||
let Requires = [{ {AArch64::FeaturePsUAO} }] in
|
||||
def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>;
|
||||
|
||||
// v8.2a "Statistical Profiling extension" registers
|
||||
|
@ -1156,7 +1408,7 @@ let Requires = [{ {AArch64::FeatureSPE} }] in {
|
|||
def : RWSysReg<"PMBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b000>;
|
||||
def : RWSysReg<"PMBPTR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b001>;
|
||||
def : RWSysReg<"PMBSR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b011>;
|
||||
def : RWSysReg<"PMBIDR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b111>;
|
||||
def : ROSysReg<"PMBIDR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b111>;
|
||||
def : RWSysReg<"PMSCR_EL2", 0b11, 0b100, 0b1001, 0b1001, 0b000>;
|
||||
def : RWSysReg<"PMSCR_EL12", 0b11, 0b101, 0b1001, 0b1001, 0b000>;
|
||||
def : RWSysReg<"PMSCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b000>;
|
||||
|
@ -1165,7 +1417,7 @@ def : RWSysReg<"PMSIRR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b011>;
|
|||
def : RWSysReg<"PMSFCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b100>;
|
||||
def : RWSysReg<"PMSEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b101>;
|
||||
def : RWSysReg<"PMSLATFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b110>;
|
||||
def : RWSysReg<"PMSIDR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b111>;
|
||||
def : ROSysReg<"PMSIDR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b111>;
|
||||
}
|
||||
|
||||
// v8.2a "RAS extension" registers
|
||||
|
@ -1184,7 +1436,7 @@ def : RWSysReg<"VSESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b011>;
|
|||
|
||||
// v8.3a "Pointer authentication extension" registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::HasV8_3aOps} }] in {
|
||||
let Requires = [{ {AArch64::FeaturePAuth} }] in {
|
||||
def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
|
||||
def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
|
||||
def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
|
||||
|
@ -1197,12 +1449,14 @@ def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>;
|
|||
def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>;
|
||||
}
|
||||
|
||||
let Requires = [{ {AArch64::HasV8_4aOps} }] in {
|
||||
|
||||
// v8.4 "Secure Exception Level 2 extension"
|
||||
let Requires = [{ {AArch64::FeatureSEL2} }] in {
|
||||
// v8.4a "Virtualization secure second stage translation" registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>;
|
||||
def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000>;
|
||||
def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000> {
|
||||
let Requires = [{ {AArch64::HasV8_0aOps} }];
|
||||
}
|
||||
|
||||
// v8.4a "Virtualization timer" registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
|
@ -1216,18 +1470,19 @@ def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>;
|
|||
// v8.4a "Virtualization debug state" registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
|
||||
} // FeatureSEL2
|
||||
|
||||
// v8.4a RAS registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
|
||||
def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>;
|
||||
def : RWSysReg<"ERXTS_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b111>;
|
||||
def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>;
|
||||
def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>;
|
||||
def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>;
|
||||
|
||||
// v8.4a MPAM registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureMPAM} }] in {
|
||||
def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>;
|
||||
def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>;
|
||||
def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>;
|
||||
|
@ -1244,9 +1499,11 @@ def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>;
|
|||
def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>;
|
||||
def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
|
||||
def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>;
|
||||
} //FeatureMPAM
|
||||
|
||||
// v8.4a Activitiy monitor registers
|
||||
// v8.4a Activity Monitor registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureAM} }] in {
|
||||
def : RWSysReg<"AMCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b000>;
|
||||
def : ROSysReg<"AMCFGR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b001>;
|
||||
def : ROSysReg<"AMCGCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b010>;
|
||||
|
@ -1295,6 +1552,7 @@ def : RWSysReg<"AMEVTYPER112_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b100>;
|
|||
def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>;
|
||||
def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>;
|
||||
def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
|
||||
} //FeatureAM
|
||||
|
||||
// v8.4a Trace Extension registers
|
||||
//
|
||||
|
@ -1303,19 +1561,24 @@ def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
|
|||
// but they are already defined above.
|
||||
//
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureTRACEV8_4} }] in {
|
||||
def : RWSysReg<"TRFCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b001>;
|
||||
def : RWSysReg<"TRFCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b001>;
|
||||
def : RWSysReg<"TRFCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b001>;
|
||||
} //FeatureTRACEV8_4
|
||||
|
||||
// v8.4a Timining insensitivity of data processing instructions
|
||||
// v8.4a Timing insensitivity of data processing instructions
|
||||
// DIT: Data Independent Timing instructions
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureDIT} }] in {
|
||||
def : RWSysReg<"DIT", 0b11, 0b011, 0b0100, 0b0010, 0b101>;
|
||||
} //FeatureDIT
|
||||
|
||||
// v8.4a Enhanced Support for Nested Virtualization
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureNV} }] in {
|
||||
def : RWSysReg<"VNCR_EL2", 0b11, 0b100, 0b0010, 0b0010, 0b000>;
|
||||
|
||||
} // HasV8_4aOps
|
||||
} //FeatureNV
|
||||
|
||||
// SVE control registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
|
@ -1326,7 +1589,131 @@ def : RWSysReg<"ZCR_EL3", 0b11, 0b110, 0b0001, 0b0010, 0b000>;
|
|||
def : RWSysReg<"ZCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b000>;
|
||||
}
|
||||
|
||||
// V8.5a Spectre mitigation SSBS register
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureSSBS} }] in
|
||||
def : RWSysReg<"SSBS", 0b11, 0b011, 0b0100, 0b0010, 0b110>;
|
||||
|
||||
// v8.5a Memory Tagging Extension
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureMTE} }] in {
|
||||
def : RWSysReg<"TCO", 0b11, 0b011, 0b0100, 0b0010, 0b111>;
|
||||
def : RWSysReg<"GCR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b110>;
|
||||
def : RWSysReg<"RGSR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b101>;
|
||||
def : RWSysReg<"TFSR_EL1", 0b11, 0b000, 0b0101, 0b0110, 0b000>;
|
||||
def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0101, 0b0110, 0b000>;
|
||||
def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0101, 0b0110, 0b000>;
|
||||
def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0101, 0b0110, 0b000>;
|
||||
def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0101, 0b0110, 0b001>;
|
||||
def : ROSysReg<"GMID_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b100>;
|
||||
} // HasMTE
|
||||
|
||||
// Embedded Trace Extension R/W System registers
|
||||
let Requires = [{ {AArch64::FeatureETE} }] in {
|
||||
// Name Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"TRCRSR", 0b10, 0b001, 0b0000, 0b1010, 0b000>;
|
||||
// TRCEXTINSELR0 has the same encoding as ETM TRCEXTINSELR
|
||||
def : RWSysReg<"TRCEXTINSELR0", 0b10, 0b001, 0b0000, 0b1000, 0b100>;
|
||||
def : RWSysReg<"TRCEXTINSELR1", 0b10, 0b001, 0b0000, 0b1001, 0b100>;
|
||||
def : RWSysReg<"TRCEXTINSELR2", 0b10, 0b001, 0b0000, 0b1010, 0b100>;
|
||||
def : RWSysReg<"TRCEXTINSELR3", 0b10, 0b001, 0b0000, 0b1011, 0b100>;
|
||||
} // FeatureETE
|
||||
|
||||
// Trace Buffer Extension System registers
|
||||
let Requires = [{ {AArch64::FeatureTRBE} }] in {
|
||||
// Name Op0 Op1 CRn CRm Op2
|
||||
def : RWSysReg<"TRBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b000>;
|
||||
def : RWSysReg<"TRBPTR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b001>;
|
||||
def : RWSysReg<"TRBBASER_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b010>;
|
||||
def : RWSysReg<"TRBSR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b011>;
|
||||
def : RWSysReg<"TRBMAR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b100>;
|
||||
def : RWSysReg<"TRBTRG_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b110>;
|
||||
def : ROSysReg<"TRBIDR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b111>;
|
||||
} // FeatureTRBE
|
||||
|
||||
|
||||
// v8.6a Activity Monitors Virtualization Support
|
||||
let Requires = [{ {AArch64::FeatureAMVS} }] in {
|
||||
foreach n = 0-15 in {
|
||||
foreach x = 0-1 in {
|
||||
def : RWSysReg<"AMEVCNTVOFF"#x#n#"_EL2",
|
||||
0b11, 0b100, 0b1101, 0b1000, 0b000>{
|
||||
let Encoding{4} = x;
|
||||
let Encoding{3-0} = n;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// v8.6a Fine Grained Virtualization Traps
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureFineGrainedTraps} }] in {
|
||||
def : RWSysReg<"HFGRTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b100>;
|
||||
def : RWSysReg<"HFGWTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b101>;
|
||||
def : RWSysReg<"HFGITR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b110>;
|
||||
def : RWSysReg<"HDFGRTR_EL2", 0b11, 0b100, 0b0011, 0b0001, 0b100>;
|
||||
def : RWSysReg<"HDFGWTR_EL2", 0b11, 0b100, 0b0011, 0b0001, 0b101>;
|
||||
}
|
||||
|
||||
// v8.6a Enhanced Counter Virtualization
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureEnhancedCounterVirtualization} }] in {
|
||||
def : RWSysReg<"CNTSCALE_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b100>;
|
||||
def : RWSysReg<"CNTISCALE_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b101>;
|
||||
def : RWSysReg<"CNTPOFF_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b110>;
|
||||
def : RWSysReg<"CNTVFRQ_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b111>;
|
||||
def : RWSysReg<"CNTPCTSS_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b101>;
|
||||
def : RWSysReg<"CNTVCTSS_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b110>;
|
||||
}
|
||||
|
||||
// v8.7a LD64B/ST64B Accelerator Extension system register
|
||||
let Requires = [{ {AArch64::FeatureLS64} }] in
|
||||
def : RWSysReg<"ACCDATA_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b101>;
|
||||
|
||||
// Branch Record Buffer system registers
|
||||
let Requires = [{ {AArch64::FeatureBRBE} }] in {
|
||||
def : RWSysReg<"BRBCR_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b000>;
|
||||
def : RWSysReg<"BRBCR_EL12", 0b10, 0b101, 0b1001, 0b0000, 0b000>;
|
||||
def : RWSysReg<"BRBCR_EL2", 0b10, 0b100, 0b1001, 0b0000, 0b000>;
|
||||
def : RWSysReg<"BRBFCR_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b001>;
|
||||
def : ROSysReg<"BRBIDR0_EL1", 0b10, 0b001, 0b1001, 0b0010, 0b000>;
|
||||
def : RWSysReg<"BRBINFINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b000>;
|
||||
def : RWSysReg<"BRBSRCINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b001>;
|
||||
def : RWSysReg<"BRBTGTINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b010>;
|
||||
def : RWSysReg<"BRBTS_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b010>;
|
||||
foreach n = 0-31 in {
|
||||
defvar nb = !cast<bits<5>>(n);
|
||||
def : ROSysReg<"BRBINF"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b00}>;
|
||||
def : ROSysReg<"BRBSRC"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b01}>;
|
||||
def : ROSysReg<"BRBTGT"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b10}>;
|
||||
}
|
||||
}
|
||||
|
||||
// Statistical Profiling Extension system register
|
||||
let Requires = [{ {AArch64::FeatureSPE_EEF} }] in
|
||||
def : RWSysReg<"PMSNEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b001>;
|
||||
|
||||
// Cyclone specific system registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::ProcCyclone} }] in
|
||||
let Requires = [{ {AArch64::FeatureAppleA7SysReg} }] in
|
||||
def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
|
||||
|
||||
// Scalable Matrix Extension (SME)
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureSME} }] in {
|
||||
def : RWSysReg<"SMCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b110>;
|
||||
def : RWSysReg<"SMCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b110>;
|
||||
def : RWSysReg<"SMCR_EL3", 0b11, 0b110, 0b0001, 0b0010, 0b110>;
|
||||
def : RWSysReg<"SMCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b110>;
|
||||
def : RWSysReg<"SVCR", 0b11, 0b011, 0b0100, 0b0010, 0b010>;
|
||||
def : RWSysReg<"SMPRI_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b100>;
|
||||
def : RWSysReg<"SMPRIMAP_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b101>;
|
||||
def : ROSysReg<"SMIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b110>;
|
||||
def : RWSysReg<"TPIDR2_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b101>;
|
||||
} // HasSME
|
||||
|
||||
// v8.4a MPAM and SME registers
|
||||
// Op0 Op1 CRn CRm Op2
|
||||
let Requires = [{ {AArch64::FeatureMPAM, AArch64::FeatureSME} }] in {
|
||||
def : RWSysReg<"MPAMSM_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b011>;
|
||||
} // HasMPAM, HasSME
|
||||
|
|
|
@ -0,0 +1,726 @@
|
|||
//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SME Outer Products
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty,
|
||||
ZPRRegOp zpr_ty, string mnemonic>
|
||||
: I<(outs za_ty:$ZAda),
|
||||
(ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
|
||||
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
|
||||
"", []>,
|
||||
Sched<[]> {
|
||||
bits<5> Zm;
|
||||
bits<3> Pm;
|
||||
bits<3> Pn;
|
||||
bits<5> Zn;
|
||||
let Inst{31-23} = 0b100000001;
|
||||
let Inst{22} = sz;
|
||||
let Inst{21} = 0b0;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-13} = Pm;
|
||||
let Inst{12-10} = Pn;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4} = S;
|
||||
let Inst{3} = 0b0;
|
||||
}
|
||||
|
||||
class sme_outer_product_fp32<bit S, string mnemonic>
|
||||
: sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> {
|
||||
bits<2> ZAda;
|
||||
let Inst{1-0} = ZAda;
|
||||
let Inst{2} = 0b0;
|
||||
}
|
||||
|
||||
class sme_outer_product_fp64<bit S, string mnemonic>
|
||||
: sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> {
|
||||
bits<3> ZAda;
|
||||
let Inst{2-0} = ZAda;
|
||||
}
|
||||
|
||||
class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
|
||||
MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
|
||||
string mnemonic>
|
||||
: I<(outs za_ty:$ZAda),
|
||||
(ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
|
||||
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
|
||||
"", []>,
|
||||
Sched<[]> {
|
||||
bits<5> Zm;
|
||||
bits<3> Pm;
|
||||
bits<3> Pn;
|
||||
bits<5> Zn;
|
||||
let Inst{31-25} = 0b1010000;
|
||||
let Inst{24} = u0;
|
||||
let Inst{23} = 0b1;
|
||||
let Inst{22} = sz;
|
||||
let Inst{21} = u1;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-13} = Pm;
|
||||
let Inst{12-10} = Pn;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4} = S;
|
||||
let Inst{3} = 0b0;
|
||||
}
|
||||
|
||||
class sme_int_outer_product_i32<bits<3> opc, string mnemonic>
|
||||
: sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32, ZPR8,
|
||||
mnemonic> {
|
||||
bits<2> ZAda;
|
||||
let Inst{1-0} = ZAda;
|
||||
let Inst{2} = 0b0;
|
||||
}
|
||||
|
||||
class sme_int_outer_product_i64<bits<3> opc, string mnemonic>
|
||||
: sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64, ZPR16,
|
||||
mnemonic> {
|
||||
bits<3> ZAda;
|
||||
let Inst{2-0} = ZAda;
|
||||
}
|
||||
|
||||
class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
|
||||
: I<(outs TileOp32:$ZAda),
|
||||
(ins PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm),
|
||||
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
|
||||
"", []>,
|
||||
Sched<[]> {
|
||||
bits<5> Zm;
|
||||
bits<3> Pm;
|
||||
bits<3> Pn;
|
||||
bits<5> Zn;
|
||||
bits<2> ZAda;
|
||||
let Inst{31-22} = 0b1000000110;
|
||||
let Inst{21} = op;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-13} = Pm;
|
||||
let Inst{12-10} = Pn;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4} = S;
|
||||
let Inst{3-2} = 0b00;
|
||||
let Inst{1-0} = ZAda;
|
||||
}
|
||||
|
||||
multiclass sme_bf16_outer_product<bit S, string mnemonic> {
|
||||
def : sme_outer_product_widening_inst<0b0, S, mnemonic>;
|
||||
}
|
||||
|
||||
multiclass sme_f16_outer_product<bit S, string mnemonic> {
|
||||
def : sme_outer_product_widening_inst<0b1, S, mnemonic>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SME Add Vector to Tile
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
|
||||
ZPRRegOp zpr_ty, string mnemonic>
|
||||
: I<(outs tile_ty:$ZAda),
|
||||
(ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
|
||||
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
|
||||
"", []>, Sched<[]> {
|
||||
bits<3> Pm;
|
||||
bits<3> Pn;
|
||||
bits<5> Zn;
|
||||
let Inst{31-23} = 0b110000001;
|
||||
let Inst{22} = op;
|
||||
let Inst{21-17} = 0b01000;
|
||||
let Inst{16} = V;
|
||||
let Inst{15-13} = Pm;
|
||||
let Inst{12-10} = Pn;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-3} = 0b00;
|
||||
}
|
||||
|
||||
class sme_add_vector_to_tile_u32<bit V, string mnemonic>
|
||||
: sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
|
||||
bits<2> ZAda;
|
||||
let Inst{2} = 0b0;
|
||||
let Inst{1-0} = ZAda;
|
||||
}
|
||||
|
||||
class sme_add_vector_to_tile_u64<bit V, string mnemonic>
|
||||
: sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
|
||||
bits<3> ZAda;
|
||||
let Inst{2-0} = ZAda;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SME Contiguous Loads
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
|
||||
string mnemonic, string argstr>
|
||||
: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
|
||||
bits<5> Rm;
|
||||
bits<2> Rv;
|
||||
bits<3> Pg;
|
||||
bits<5> Rn;
|
||||
let Inst{31-25} = 0b1110000;
|
||||
let Inst{24} = Q;
|
||||
let Inst{23-22} = msz;
|
||||
let Inst{21} = 0b0;
|
||||
let Inst{20-16} = Rm;
|
||||
let Inst{15} = V;
|
||||
let Inst{14-13} = Rv;
|
||||
let Inst{12-10} = Pg;
|
||||
let Inst{9-5} = Rn;
|
||||
let Inst{4} = 0b0;
|
||||
|
||||
let mayLoad = 1;
|
||||
}
|
||||
|
||||
class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
|
||||
MatrixTileVectorOperand tile_ty, bit is_col,
|
||||
Operand imm_ty, RegisterOperand gpr_ty>
|
||||
: sme_mem_ld_ss_base<
|
||||
Q, is_col, msz, (outs tile_ty:$ZAt),
|
||||
(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
|
||||
gpr_ty:$Rm),
|
||||
mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
|
||||
|
||||
multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
|
||||
MatrixTileVectorOperand tile_ty,
|
||||
Operand imm_ty, RegisterOperand gpr_ty,
|
||||
string pg_suffix=""> {
|
||||
def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
|
||||
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
|
||||
// Default XZR offset aliases
|
||||
def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
|
||||
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
|
||||
def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
|
||||
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
|
||||
}
|
||||
|
||||
multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
|
||||
string pg_suffix=""> {
|
||||
defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
|
||||
!if(is_col, TileVectorOpV8, TileVectorOpH8),
|
||||
sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
|
||||
defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
|
||||
!if(is_col, TileVectorOpV16, TileVectorOpH16),
|
||||
sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
|
||||
defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
|
||||
!if(is_col, TileVectorOpV32, TileVectorOpH32),
|
||||
sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
|
||||
defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
|
||||
!if(is_col, TileVectorOpV64, TileVectorOpH64),
|
||||
sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
|
||||
defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
|
||||
!if(is_col, TileVectorOpV128, TileVectorOpH128),
|
||||
sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
|
||||
}
|
||||
|
||||
multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
|
||||
defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
|
||||
}
|
||||
|
||||
multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
|
||||
def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
|
||||
!if(is_col, TileVectorOpV8, TileVectorOpH8),
|
||||
is_col, sme_elm_idx0_15, GPR64shifted8> {
|
||||
bits<4> imm;
|
||||
let Inst{3-0} = imm;
|
||||
}
|
||||
def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
|
||||
!if(is_col, TileVectorOpV16, TileVectorOpH16),
|
||||
is_col, sme_elm_idx0_7, GPR64shifted16> {
|
||||
bits<1> ZAt;
|
||||
bits<3> imm;
|
||||
let Inst{3} = ZAt;
|
||||
let Inst{2-0} = imm;
|
||||
}
|
||||
def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
|
||||
!if(is_col, TileVectorOpV32, TileVectorOpH32),
|
||||
is_col, sme_elm_idx0_3, GPR64shifted32> {
|
||||
bits<2> ZAt;
|
||||
bits<2> imm;
|
||||
let Inst{3-2} = ZAt;
|
||||
let Inst{1-0} = imm;
|
||||
}
|
||||
def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
|
||||
!if(is_col, TileVectorOpV64, TileVectorOpH64),
|
||||
is_col, sme_elm_idx0_1, GPR64shifted64> {
|
||||
bits<3> ZAt;
|
||||
bits<1> imm;
|
||||
let Inst{3-1} = ZAt;
|
||||
let Inst{0} = imm;
|
||||
}
|
||||
def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
|
||||
!if(is_col, TileVectorOpV128, TileVectorOpH128),
|
||||
is_col, sme_elm_idx0_0, GPR64shifted128> {
|
||||
bits<4> ZAt;
|
||||
let Inst{3-0} = ZAt;
|
||||
}
|
||||
|
||||
defm : sme_mem_ld_ss_aliases<NAME, is_col>;
|
||||
}
|
||||
|
||||
multiclass sme_mem_ld_ss<string mnemonic> {
|
||||
defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>;
|
||||
defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SME Contiguous Stores
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
|
||||
string mnemonic, string argstr>
|
||||
: I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
|
||||
bits<5> Rm;
|
||||
bits<2> Rv;
|
||||
bits<3> Pg;
|
||||
bits<5> Rn;
|
||||
let Inst{31-25} = 0b1110000;
|
||||
let Inst{24} = Q;
|
||||
let Inst{23-22} = msz;
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-16} = Rm;
|
||||
let Inst{15} = V;
|
||||
let Inst{14-13} = Rv;
|
||||
let Inst{12-10} = Pg;
|
||||
let Inst{9-5} = Rn;
|
||||
let Inst{4} = 0b0;
|
||||
|
||||
let mayStore = 1;
|
||||
let hasSideEffects = 1;
|
||||
}
|
||||
|
||||
class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
|
||||
MatrixTileVectorOperand tile_ty, bit is_col,
|
||||
Operand imm_ty, RegisterOperand gpr_ty>
|
||||
: sme_mem_st_ss_base<
|
||||
Q, is_col, msz,
|
||||
(ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
|
||||
GPR64sp:$Rn, gpr_ty:$Rm),
|
||||
mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
|
||||
|
||||
multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
|
||||
defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
|
||||
}
|
||||
|
||||
multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
|
||||
def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
|
||||
!if(is_col, TileVectorOpV8, TileVectorOpH8),
|
||||
is_col, sme_elm_idx0_15, GPR64shifted8> {
|
||||
bits<4> imm;
|
||||
let Inst{3-0} = imm;
|
||||
}
|
||||
def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
|
||||
!if(is_col, TileVectorOpV16, TileVectorOpH16),
|
||||
is_col, sme_elm_idx0_7, GPR64shifted16> {
|
||||
bits<1> ZAt;
|
||||
bits<3> imm;
|
||||
let Inst{3} = ZAt;
|
||||
let Inst{2-0} = imm;
|
||||
}
|
||||
def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
|
||||
!if(is_col, TileVectorOpV32, TileVectorOpH32),
|
||||
is_col, sme_elm_idx0_3, GPR64shifted32> {
|
||||
bits<2> ZAt;
|
||||
bits<2> imm;
|
||||
let Inst{3-2} = ZAt;
|
||||
let Inst{1-0} = imm;
|
||||
}
|
||||
def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
|
||||
!if(is_col, TileVectorOpV64, TileVectorOpH64),
|
||||
is_col, sme_elm_idx0_1, GPR64shifted64> {
|
||||
bits<3> ZAt;
|
||||
bits<1> imm;
|
||||
let Inst{3-1} = ZAt;
|
||||
let Inst{0} = imm;
|
||||
}
|
||||
def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
|
||||
!if(is_col, TileVectorOpV128, TileVectorOpH128),
|
||||
is_col, sme_elm_idx0_0, GPR64shifted128> {
|
||||
bits<4> ZAt;
|
||||
let Inst{3-0} = ZAt;
|
||||
}
|
||||
|
||||
defm : sme_mem_st_ss_aliases<NAME, is_col>;
|
||||
}
|
||||
|
||||
multiclass sme_mem_st_ss<string mnemonic> {
|
||||
defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>;
|
||||
defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SME Save and Restore Array
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sme_spill_fill_inst<bit isStore, dag outs, dag ins, string opcodestr>
|
||||
: I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
|
||||
[]>,
|
||||
Sched<[]> {
|
||||
bits<2> Rv;
|
||||
bits<5> Rn;
|
||||
bits<4> imm4;
|
||||
let Inst{31-22} = 0b1110000100;
|
||||
let Inst{21} = isStore;
|
||||
let Inst{20-15} = 0b000000;
|
||||
let Inst{14-13} = Rv;
|
||||
let Inst{12-10} = 0b000;
|
||||
let Inst{9-5} = Rn;
|
||||
let Inst{4} = 0b0;
|
||||
let Inst{3-0} = imm4;
|
||||
|
||||
let mayLoad = !not(isStore);
|
||||
let mayStore = isStore;
|
||||
}
|
||||
|
||||
multiclass sme_spill_fill<bit isStore, dag outs, dag ins, string opcodestr> {
|
||||
def NAME : sme_spill_fill_inst<isStore, outs, ins, opcodestr>;
|
||||
|
||||
def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
|
||||
(!cast<Instruction>(NAME) MatrixOp:$ZAt,
|
||||
MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
|
||||
}
|
||||
|
||||
multiclass sme_spill<string opcodestr> {
|
||||
defm NAME : sme_spill_fill<0b1, (outs),
|
||||
(ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
|
||||
sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
|
||||
imm0_15:$offset),
|
||||
opcodestr>;
|
||||
}
|
||||
|
||||
multiclass sme_fill<string opcodestr> {
|
||||
defm NAME : sme_spill_fill<0b0, (outs MatrixOp:$ZAt),
|
||||
(ins MatrixIndexGPR32Op12_15:$Rv,
|
||||
sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
|
||||
imm0_15:$offset),
|
||||
opcodestr>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Move instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
|
||||
string mnemonic, string argstr>
|
||||
: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
|
||||
bits<2> Rv;
|
||||
bits<3> Pg;
|
||||
bits<5> Zn;
|
||||
let Inst{31-24} = 0b11000000;
|
||||
let Inst{23-22} = sz;
|
||||
let Inst{21-17} = 0b00000;
|
||||
let Inst{16} = Q;
|
||||
let Inst{15} = V;
|
||||
let Inst{14-13} = Rv;
|
||||
let Inst{12-10} = Pg;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4} = 0b0;
|
||||
}
|
||||
|
||||
class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
|
||||
bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
|
||||
string mnemonic>
|
||||
: sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
|
||||
(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
|
||||
mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">;
|
||||
|
||||
multiclass sme_vector_to_tile_aliases<Instruction inst,
|
||||
MatrixTileVectorOperand tile_ty,
|
||||
ZPRRegOp zpr_ty, Operand imm_ty> {
|
||||
def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
|
||||
(inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
|
||||
}
|
||||
|
||||
multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
|
||||
def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
|
||||
TileVectorOpH8),
|
||||
is_col, sme_elm_idx0_15, ZPR8, mnemonic> {
|
||||
bits<4> imm;
|
||||
let Inst{3-0} = imm;
|
||||
}
|
||||
def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
|
||||
TileVectorOpH16),
|
||||
is_col, sme_elm_idx0_7, ZPR16, mnemonic> {
|
||||
bits<1> ZAd;
|
||||
bits<3> imm;
|
||||
let Inst{3} = ZAd;
|
||||
let Inst{2-0} = imm;
|
||||
}
|
||||
def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
|
||||
TileVectorOpH32),
|
||||
is_col, sme_elm_idx0_3, ZPR32, mnemonic> {
|
||||
bits<2> ZAd;
|
||||
bits<2> imm;
|
||||
let Inst{3-2} = ZAd;
|
||||
let Inst{1-0} = imm;
|
||||
}
|
||||
def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
|
||||
TileVectorOpH64),
|
||||
is_col, sme_elm_idx0_1, ZPR64, mnemonic> {
|
||||
bits<3> ZAd;
|
||||
bits<1> imm;
|
||||
let Inst{3-1} = ZAd;
|
||||
let Inst{0} = imm;
|
||||
}
|
||||
def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
|
||||
TileVectorOpH128),
|
||||
is_col, sme_elm_idx0_0, ZPR128, mnemonic> {
|
||||
bits<4> ZAd;
|
||||
bits<1> imm;
|
||||
let Inst{3-0} = ZAd;
|
||||
}
|
||||
|
||||
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
|
||||
!if(is_col, TileVectorOpV8,
|
||||
TileVectorOpH8),
|
||||
ZPR8, sme_elm_idx0_15>;
|
||||
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
|
||||
!if(is_col, TileVectorOpV16,
|
||||
TileVectorOpH16),
|
||||
ZPR16, sme_elm_idx0_7>;
|
||||
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
|
||||
!if(is_col, TileVectorOpV32,
|
||||
TileVectorOpH32),
|
||||
ZPR32, sme_elm_idx0_3>;
|
||||
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
|
||||
!if(is_col, TileVectorOpV64,
|
||||
TileVectorOpH64),
|
||||
ZPR64, sme_elm_idx0_1>;
|
||||
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
|
||||
!if(is_col, TileVectorOpV128,
|
||||
TileVectorOpH128),
|
||||
ZPR128, sme_elm_idx0_0>;
|
||||
}
|
||||
|
||||
multiclass sme_vector_to_tile<string mnemonic> {
|
||||
defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>;
|
||||
defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>;
|
||||
}
|
||||
|
||||
class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
|
||||
string mnemonic, string argstr>
|
||||
: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
|
||||
bits<2> Rv;
|
||||
bits<3> Pg;
|
||||
bits<5> Zd;
|
||||
let Inst{31-24} = 0b11000000;
|
||||
let Inst{23-22} = sz;
|
||||
let Inst{21-17} = 0b00001;
|
||||
let Inst{16} = Q;
|
||||
let Inst{15} = V;
|
||||
let Inst{14-13} = Rv;
|
||||
let Inst{12-10} = Pg;
|
||||
let Inst{9} = 0b0;
|
||||
let Inst{4-0} = Zd;
|
||||
}
|
||||
|
||||
class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
|
||||
MatrixTileVectorOperand tile_ty,
|
||||
bit is_col, Operand imm_ty, string mnemonic>
|
||||
: sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
|
||||
(ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
|
||||
mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]">;
|
||||
|
||||
multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
|
||||
MatrixTileVectorOperand tile_ty,
|
||||
Operand imm_ty > {
|
||||
def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
|
||||
(inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
|
||||
}
|
||||
|
||||
multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
|
||||
def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
|
||||
TileVectorOpH8),
|
||||
is_col, sme_elm_idx0_15, mnemonic> {
|
||||
bits<4> imm;
|
||||
let Inst{8-5} = imm;
|
||||
}
|
||||
def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
|
||||
TileVectorOpH16),
|
||||
is_col, sme_elm_idx0_7, mnemonic> {
|
||||
bits<1> ZAn;
|
||||
bits<3> imm;
|
||||
let Inst{8} = ZAn;
|
||||
let Inst{7-5} = imm;
|
||||
}
|
||||
def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
|
||||
TileVectorOpH32),
|
||||
is_col, sme_elm_idx0_3, mnemonic> {
|
||||
bits<2> ZAn;
|
||||
bits<2> imm;
|
||||
let Inst{8-7} = ZAn;
|
||||
let Inst{6-5} = imm;
|
||||
}
|
||||
def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
|
||||
TileVectorOpH64),
|
||||
is_col, sme_elm_idx0_1, mnemonic> {
|
||||
bits<3> ZAn;
|
||||
bits<1> imm;
|
||||
let Inst{8-6} = ZAn;
|
||||
let Inst{5} = imm;
|
||||
}
|
||||
def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
|
||||
TileVectorOpH128),
|
||||
is_col, sme_elm_idx0_0, mnemonic> {
|
||||
bits<4> ZAn;
|
||||
let Inst{8-5} = ZAn;
|
||||
}
|
||||
|
||||
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
|
||||
!if(is_col, TileVectorOpV8,
|
||||
TileVectorOpH8), sme_elm_idx0_15>;
|
||||
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
|
||||
!if(is_col, TileVectorOpV16,
|
||||
TileVectorOpH16), sme_elm_idx0_7>;
|
||||
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
|
||||
!if(is_col, TileVectorOpV32,
|
||||
TileVectorOpH32), sme_elm_idx0_3>;
|
||||
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
|
||||
!if(is_col, TileVectorOpV64,
|
||||
TileVectorOpH64), sme_elm_idx0_1>;
|
||||
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
|
||||
!if(is_col, TileVectorOpV128,
|
||||
TileVectorOpH128), sme_elm_idx0_0>;
|
||||
}
|
||||
|
||||
multiclass sme_tile_to_vector<string mnemonic> {
|
||||
defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>;
|
||||
defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SME Zero
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sme_zero_inst<string mnemonic>
|
||||
: I<(outs MatrixTileList:$imm), (ins),
|
||||
mnemonic, "\t$imm", "", []>, Sched<[]> {
|
||||
bits<8> imm;
|
||||
let Inst{31-8} = 0b110000000000100000000000;
|
||||
let Inst{7-0} = imm;
|
||||
}
|
||||
|
||||
multiclass sme_zero<string mnemonic> {
|
||||
def NAME : sme_zero_inst<mnemonic>;
|
||||
|
||||
def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
|
||||
def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
|
||||
def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
|
||||
def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
|
||||
def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
|
||||
def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
|
||||
def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
|
||||
def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
|
||||
def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
|
||||
def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
|
||||
def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
|
||||
def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
|
||||
def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
|
||||
def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
|
||||
def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE2 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sve2_int_perm_revd<string asm>
|
||||
: I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
|
||||
asm, "\t$Zd, $Pg/m, $Zn", "", []>,
|
||||
Sched<[]> {
|
||||
bits<5> Zd;
|
||||
bits<3> Pg;
|
||||
bits<5> Zn;
|
||||
let Inst{31-24} = 0b00000101;
|
||||
let Inst{23-22} = 0b00; // size
|
||||
let Inst{21-13} = 0b101110100;
|
||||
let Inst{12-10} = Pg;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
let DestructiveInstType = DestructiveUnary;
|
||||
let ElementSize = ZPR128.ElementSize;
|
||||
}
|
||||
|
||||
class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
|
||||
: I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
|
||||
asm, "\t$Zd, $Zn, $Zm", "", []>,
|
||||
Sched<[]> {
|
||||
bits<5> Zm;
|
||||
bits<5> Zn;
|
||||
bits<5> Zd;
|
||||
let Inst{31-24} = 0b01000100;
|
||||
let Inst{23-22} = sz;
|
||||
let Inst{21} = 0b0;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-11} = 0b11000;
|
||||
let Inst{10} = U;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
let DestructiveInstType = DestructiveOther;
|
||||
let ElementSize = zpr_ty.ElementSize;
|
||||
}
|
||||
|
||||
multiclass sve2_clamp<string asm, bit U> {
|
||||
def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
|
||||
def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
|
||||
def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
|
||||
def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
|
||||
}
|
||||
|
||||
class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
|
||||
: I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm,
|
||||
MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
|
||||
asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
|
||||
Sched<[]> {
|
||||
bits<2> Rv;
|
||||
bits<4> Pn;
|
||||
bits<4> Pm;
|
||||
bits<4> Pd;
|
||||
let Inst{31-24} = 0b00100101;
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{17-16} = Rv;
|
||||
let Inst{15-14} = 0b01;
|
||||
let Inst{13-10} = Pn;
|
||||
let Inst{9} = 0b0;
|
||||
let Inst{8-5} = Pm;
|
||||
let Inst{4} = 0b0;
|
||||
let Inst{3-0} = Pd;
|
||||
}
|
||||
|
||||
multiclass sve2_int_perm_sel_p<string asm> {
|
||||
def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
|
||||
bits<4> imm;
|
||||
let Inst{23-22} = imm{3-2};
|
||||
let Inst{20-19} = imm{1-0};
|
||||
let Inst{18} = 0b1;
|
||||
}
|
||||
def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
|
||||
bits<3> imm;
|
||||
let Inst{23-22} = imm{2-1};
|
||||
let Inst{20} = imm{0};
|
||||
let Inst{19-18} = 0b10;
|
||||
}
|
||||
def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
|
||||
bits<2> imm;
|
||||
let Inst{23-22} = imm{1-0};
|
||||
let Inst{20-18} = 0b100;
|
||||
}
|
||||
def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
|
||||
bits<1> imm;
|
||||
let Inst{23} = imm;
|
||||
let Inst{22} = 0b1;
|
||||
let Inst{20-18} = 0b000;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue