mirror of
https://github.com/intel/llvm.git
synced 2026-01-25 01:07:04 +08:00
[X86][AVX] lowerV2X128Shuffle - attempt to recognise broadcastf128 subvector load
As noticed on PR50053 we were failing to recognise when a shuffle of a load was really a subvector broadcast load
This commit is contained in:
@@ -16054,9 +16054,33 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
const APInt &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
// With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding.
|
||||
if (Subtarget.hasAVX2() && V2.isUndef())
|
||||
return SDValue();
|
||||
if (V2.isUndef()) {
|
||||
// Attempt to match VBROADCAST*128 subvector broadcast load.
|
||||
bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1);
|
||||
bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1);
|
||||
if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() &&
|
||||
MayFoldLoad(peekThroughOneUseBitcasts(V1))) {
|
||||
auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1));
|
||||
if (!Ld->isNonTemporal()) {
|
||||
MVT MemVT = VT.getHalfNumVectorElementsVT();
|
||||
unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
|
||||
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
|
||||
SDValue Ptr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
|
||||
TypeSize::Fixed(Ofs), DL);
|
||||
SDValue Ops[] = {Ld->getChain(), Ptr};
|
||||
SDValue BcastLd = DAG.getMemIntrinsicNode(
|
||||
X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, MemVT,
|
||||
DAG.getMachineFunction().getMachineMemOperand(
|
||||
Ld->getMemOperand(), Ofs, MemVT.getStoreSize()));
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1));
|
||||
return BcastLd;
|
||||
}
|
||||
}
|
||||
|
||||
// With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding.
|
||||
if (Subtarget.hasAVX2())
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
bool V2IsZero = !V2.isUndef() && ISD::isBuildVectorAllZeros(V2.getNode());
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_mem_shuffle
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -disable-peephole | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -disable-peephole | FileCheck %s --check-prefixes=ALL,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefixes=ALL,AVX2
|
||||
|
||||
define <8 x float> @shuffle_v8f32_45670123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
|
||||
; AVX1-LABEL: shuffle_v8f32_45670123:
|
||||
@@ -60,15 +60,10 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_01230123_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp {
|
||||
; AVX1-LABEL: shuffle_v8f32_01230123_mem:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vperm2f128 $34, (%rdi), %ymm0, %ymm0 # ymm0 = mem[0,1,0,1]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8f32_01230123_mem:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpermpd $68, (%rdi), %ymm0 # ymm0 = mem[0,1,0,1]
|
||||
; AVX2-NEXT: retq
|
||||
; ALL-LABEL: shuffle_v8f32_01230123_mem:
|
||||
; ALL: # %bb.0: # %entry
|
||||
; ALL-NEXT: vbroadcastf128 (%rdi), %ymm0 # ymm0 = mem[0,1,0,1]
|
||||
; ALL-NEXT: retq
|
||||
entry:
|
||||
%a = load <8 x float>, <8 x float>* %pa
|
||||
%b = load <8 x float>, <8 x float>* %pb
|
||||
@@ -92,15 +87,10 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_45674567_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp {
|
||||
; AVX1-LABEL: shuffle_v8f32_45674567_mem:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vperm2f128 $51, (%rdi), %ymm0, %ymm0 # ymm0 = mem[2,3,2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8f32_45674567_mem:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpermpd $238, (%rdi), %ymm0 # ymm0 = mem[2,3,2,3]
|
||||
; AVX2-NEXT: retq
|
||||
; ALL-LABEL: shuffle_v8f32_45674567_mem:
|
||||
; ALL: # %bb.0: # %entry
|
||||
; ALL-NEXT: vbroadcastf128 16(%rdi), %ymm0 # ymm0 = mem[0,1,0,1]
|
||||
; ALL-NEXT: retq
|
||||
entry:
|
||||
%a = load <8 x float>, <8 x float>* %pa
|
||||
%b = load <8 x float>, <8 x float>* %pb
|
||||
|
||||
Reference in New Issue
Block a user