mirror of
https://github.com/intel/llvm.git
synced 2026-01-21 04:14:03 +08:00
[X86] Fold CONCAT(VPERMV3(X,Y,M0),VPERMV3(Z,W,M1)) -> VPERMV3(CONCAT(X,Z),CONCAT(Y,W),CONCAT(M0,M1))
Further prep work toward supporting different subvector sizes in combineX86ShufflesRecursively
This commit is contained in:
@@ -48813,6 +48813,38 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
|
||||
return DAG.getBitcast(VT, Res);
|
||||
}
|
||||
break;
|
||||
case X86ISD::VPERMV3:
|
||||
if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {
|
||||
MVT OpVT = Op0.getSimpleValueType();
|
||||
int NumSrcElts = OpVT.getVectorNumElements();
|
||||
SmallVector<int, 64> ConcatMask;
|
||||
for (unsigned i = 0; i != NumOps; ++i) {
|
||||
bool IsUnary;
|
||||
SmallVector<int, 64> SubMask;
|
||||
SmallVector<SDValue, 2> SubOps;
|
||||
if (!getTargetShuffleMask(Ops[i].getNode(), OpVT, false, SubOps,
|
||||
SubMask, IsUnary))
|
||||
break;
|
||||
for (int M : SubMask) {
|
||||
if (0 <= M) {
|
||||
M += M < NumSrcElts ? 0 : NumSrcElts;
|
||||
M += i * NumSrcElts;
|
||||
}
|
||||
ConcatMask.push_back(M);
|
||||
}
|
||||
}
|
||||
if (ConcatMask.size() == (NumOps * NumSrcElts)) {
|
||||
SDValue Src0 = concatSubVectors(Ops[0].getOperand(0),
|
||||
Ops[1].getOperand(0), DAG, DL);
|
||||
SDValue Src1 = concatSubVectors(Ops[0].getOperand(2),
|
||||
Ops[1].getOperand(2), DAG, DL);
|
||||
MVT IntMaskSVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
|
||||
MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
|
||||
SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
|
||||
return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case X86ISD::VSHLI:
|
||||
case X86ISD::VSRAI:
|
||||
case X86ISD::VSRLI:
|
||||
|
||||
@@ -145,11 +145,10 @@ define <32 x i16> @concat_trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) nou
|
||||
; AVX512-NEXT: vpsrad $23, %zmm1, %zmm1
|
||||
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
|
||||
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = ashr <16 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
|
||||
%2 = ashr <16 x i32> %a1, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
|
||||
@@ -166,11 +165,10 @@ define <32 x i16> @concat_trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) nou
|
||||
; AVX512-NEXT: vpsrld $23, %zmm1, %zmm1
|
||||
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
|
||||
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = lshr <16 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
|
||||
%2 = lshr <16 x i32> %a1, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
|
||||
@@ -183,26 +181,25 @@ define <32 x i16> @concat_trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) nou
|
||||
define <64 x i8> @concat_trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) nounwind {
|
||||
; AVX512F-LABEL: concat_trunc_packsswb_512:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512F-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512F-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512F-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: concat_trunc_packsswb_512:
|
||||
@@ -211,11 +208,10 @@ define <64 x i8> @concat_trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512BW-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512BW-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%1 = ashr <32 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
%2 = and <32 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
@@ -228,26 +224,25 @@ define <64 x i8> @concat_trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
|
||||
define <64 x i8> @concat_trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) nounwind {
|
||||
; AVX512F-LABEL: concat_trunc_packuswb_512:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $15, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512F-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512F-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512F-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: concat_trunc_packuswb_512:
|
||||
@@ -256,11 +251,10 @@ define <64 x i8> @concat_trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [2,6,3,7]
|
||||
; AVX512BW-NEXT: vpermi2q %ymm1, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,4,1,5]
|
||||
; AVX512BW-NEXT: vpermi2q %ymm1, %ymm0, %ymm3
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
|
||||
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%1 = lshr <32 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
%2 = and <32 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
|
||||
Reference in New Issue
Block a user