mirror of
https://github.com/intel/llvm.git
synced 2026-02-05 04:46:27 +08:00
[AArch64] Further restricts when a dup(*ext) can be rearranged
In most cases, the dup(*ext) pattern can be rearranged to perform the extension on the vector side, allowing for further vector-specific optimisations to be made. However the initial checks for this conversion were insufficient, allowing invalid encodings to be attempted (causing compilation to fail). Differential Revision: https://reviews.llvm.org/D94778
This commit is contained in:
@@ -11843,7 +11843,8 @@ static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
|
||||
|
||||
SDValue InsertVectorNode = DAG.getNode(
|
||||
InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
|
||||
Extend.getOperand(0), DAG.getConstant(0, DL, MVT::i64));
|
||||
DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
|
||||
DAG.getConstant(0, DL, MVT::i64));
|
||||
|
||||
std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
|
||||
|
||||
@@ -11851,9 +11852,8 @@ static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
|
||||
DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
|
||||
DAG.getUNDEF(PreExtendVT), ShuffleMask);
|
||||
|
||||
SDValue ExtendNode =
|
||||
DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, TargetType,
|
||||
VectorShuffleNode, DAG.getValueType(TargetType));
|
||||
SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
|
||||
DL, TargetType, VectorShuffleNode);
|
||||
|
||||
return ExtendNode;
|
||||
}
|
||||
|
||||
33
llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
Normal file
33
llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
Normal file
@@ -0,0 +1,33 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -o -| FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; This test covers a case where an AArch64 DUP instruction is generated with an
|
||||
; invalid encoding, resulting in a crash. We don't care about the specific output
|
||||
; here, only that this case no longer causes said crash.
|
||||
define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr {
|
||||
; CHECK-LABEL: dupext_crashtest:
|
||||
for.body.lr.ph:
|
||||
%conv314 = zext i32 %e to i64
|
||||
br label %vector.memcheck
|
||||
|
||||
vector.memcheck: ; preds = %for.body.lr.ph
|
||||
br label %vector.ph
|
||||
|
||||
vector.ph: ; preds = %vector.memcheck
|
||||
%broadcast.splatinsert = insertelement <2 x i64> poison, i64 %conv314, i32 0
|
||||
%broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
br label %vector.body
|
||||
|
||||
vector.body: ; preds = %vector.body, %vector.ph
|
||||
%wide.load = load <2 x i32>, <2 x i32>* undef, align 4
|
||||
%0 = zext <2 x i32> %wide.load to <2 x i64>
|
||||
%1 = mul nuw <2 x i64> %broadcast.splat, %0
|
||||
%2 = trunc <2 x i64> %1 to <2 x i32>
|
||||
%3 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> %2
|
||||
%4 = bitcast i32* undef to <2 x i32>*
|
||||
store <2 x i32> %3, <2 x i32>* %4, align 4
|
||||
br label %vector.body
|
||||
}
|
||||
Reference in New Issue
Block a user