mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
[PowerPC] Lowering support for EVL type VP_LOAD/VP_STORE (#165910)
Map EVL type VP_LOAD/VP_STORE for fixed length vectors to PPC load/store with length.
This commit is contained in:
@@ -657,6 +657,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
|
||||
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
|
||||
|
||||
if (Subtarget.isISA3_0() && isPPC64) {
|
||||
setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
|
||||
}
|
||||
|
||||
// We want to custom lower some of our intrinsics.
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
|
||||
@@ -11917,6 +11928,62 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
|
||||
return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
|
||||
}
|
||||
|
||||
// Adjust the length value for a load/store with length to account for the
|
||||
// instructions requiring a left justified length, and for non-byte element
|
||||
// types requiring scaling by element size.
|
||||
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
|
||||
SelectionDAG &DAG) {
|
||||
SDLoc dl(Val);
|
||||
EVT VT = Val->getValueType(0);
|
||||
unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
|
||||
unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
|
||||
SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
|
||||
return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
auto VPLD = cast<VPLoadSDNode>(Op);
|
||||
bool Future = Subtarget.isISAFuture();
|
||||
SDLoc dl(Op);
|
||||
assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
|
||||
"Mask predication not supported");
|
||||
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
|
||||
unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
|
||||
unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
|
||||
Len = AdjustLength(Len, EltBits, !Future, DAG);
|
||||
SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
|
||||
VPLD->getOperand(1), Len};
|
||||
SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
|
||||
SDValue VPL =
|
||||
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys, Ops,
|
||||
VPLD->getMemoryVT(), VPLD->getMemOperand());
|
||||
return VPL;
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
auto VPST = cast<VPStoreSDNode>(Op);
|
||||
assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
|
||||
"Mask predication not supported");
|
||||
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
SDLoc dl(Op);
|
||||
SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
|
||||
unsigned EltBits =
|
||||
Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
|
||||
bool Future = Subtarget.isISAFuture();
|
||||
unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
|
||||
Len = AdjustLength(Len, EltBits, !Future, DAG);
|
||||
SDValue Ops[] = {
|
||||
VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
|
||||
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
|
||||
VPST->getOperand(2), Len};
|
||||
SDVTList Tys = DAG.getVTList(MVT::Other);
|
||||
SDValue VPS =
|
||||
DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
|
||||
VPST->getMemoryVT(), VPST->getMemOperand());
|
||||
return VPS;
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
@@ -12771,6 +12838,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (Op->getFlags().hasNoFPExcept())
|
||||
return Op;
|
||||
return SDValue();
|
||||
case ISD::VP_LOAD:
|
||||
return LowerVP_LOAD(Op, DAG);
|
||||
case ISD::VP_STORE:
|
||||
return LowerVP_STORE(Op, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1345,6 +1345,9 @@ namespace llvm {
|
||||
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
@@ -24,6 +24,10 @@ using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "ppctti"
|
||||
|
||||
static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
|
||||
cl::desc("Allow vp.load and vp.store for pwr9"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
|
||||
cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
|
||||
|
||||
@@ -1031,3 +1035,42 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
|
||||
bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
|
||||
return TLI->supportsTailCallFor(CB);
|
||||
}
|
||||
|
||||
// Target hook used by CodeGen to decide whether to expand vector predication
|
||||
// intrinsics into scalar operations or to use special ISD nodes to represent
|
||||
// them. The Target will not see the intrinsics.
|
||||
TargetTransformInfo::VPLegalization
|
||||
PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
|
||||
using VPLegalization = TargetTransformInfo::VPLegalization;
|
||||
unsigned Directive = ST->getCPUDirective();
|
||||
VPLegalization DefaultLegalization = BaseT::getVPLegalizationStrategy(PI);
|
||||
if (Directive != PPC::DIR_PWR10 && Directive != PPC::DIR_PWR_FUTURE &&
|
||||
(!Pwr9EVL || Directive != PPC::DIR_PWR9))
|
||||
return DefaultLegalization;
|
||||
|
||||
if (!ST->isPPC64())
|
||||
return DefaultLegalization;
|
||||
|
||||
unsigned IID = PI.getIntrinsicID();
|
||||
if (IID != Intrinsic::vp_load && IID != Intrinsic::vp_store)
|
||||
return DefaultLegalization;
|
||||
|
||||
bool IsLoad = IID == Intrinsic::vp_load;
|
||||
Type *VecTy = IsLoad ? PI.getType() : PI.getOperand(0)->getType();
|
||||
EVT VT = TLI->getValueType(DL, VecTy, true);
|
||||
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
|
||||
VT != MVT::v16i8)
|
||||
return DefaultLegalization;
|
||||
|
||||
auto IsAllTrueMask = [](Value *MaskVal) {
|
||||
if (Value *SplattedVal = getSplatValue(MaskVal))
|
||||
if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
|
||||
return ConstValue->isAllOnesValue();
|
||||
return false;
|
||||
};
|
||||
unsigned MaskIx = IsLoad ? 1 : 2;
|
||||
if (!IsAllTrueMask(PI.getOperand(MaskIx)))
|
||||
return DefaultLegalization;
|
||||
|
||||
return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
|
||||
}
|
||||
|
||||
@@ -150,6 +150,9 @@ public:
|
||||
ArrayRef<Type *> Types) const override;
|
||||
bool supportsTailCallFor(const CallBase *CB) const override;
|
||||
|
||||
TargetTransformInfo::VPLegalization
|
||||
getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
|
||||
|
||||
private:
|
||||
// The following constant is used for estimating costs on power9.
|
||||
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
|
||||
|
||||
160
llvm/test/CodeGen/PowerPC/vp-ld-st.ll
Normal file
160
llvm/test/CodeGen/PowerPC/vp-ld-st.ll
Normal file
@@ -0,0 +1,160 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
|
||||
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=future \
|
||||
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -check-prefix=FUTURE %s
|
||||
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
|
||||
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=future \
|
||||
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck --check-prefix=FUTURE %s
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define void @stxvl1(<16 x i8> %a, ptr %b, i64 %c) {
|
||||
; CHECK-LABEL: stxvl1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi 3, 6, 56
|
||||
; CHECK-NEXT: stxvl 34, 5, 3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; FUTURE-LABEL: stxvl1:
|
||||
; FUTURE: # %bb.0: # %entry
|
||||
; FUTURE-NEXT: stxvrl 34, 5, 6
|
||||
; FUTURE-NEXT: blr
|
||||
entry:
|
||||
%cconv = trunc i64 %c to i32
|
||||
tail call void @llvm.vp.store.v16i8.p0(<16 x i8> %a, ptr %b, <16 x i1> splat (i1 true), i32 %cconv)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define void @stxvl2(<8 x i16> %a, ptr %b, i64 %c) {
|
||||
; CHECK-LABEL: stxvl2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi 3, 6, 57
|
||||
; CHECK-NEXT: stxvl 34, 5, 3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; FUTURE-LABEL: stxvl2:
|
||||
; FUTURE: # %bb.0: # %entry
|
||||
; FUTURE-NEXT: sldi 3, 6, 1
|
||||
; FUTURE-NEXT: stxvrl 34, 5, 3
|
||||
; FUTURE-NEXT: blr
|
||||
entry:
|
||||
%cconv = trunc i64 %c to i32
|
||||
tail call void @llvm.vp.store.v8i16.p0(<8 x i16> %a, ptr %b, <8 x i1> splat (i1 true), i32 %cconv)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define void @stxvl4(<4 x i32> %a, ptr %b, i64 %c) {
|
||||
; CHECK-LABEL: stxvl4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi 3, 6, 58
|
||||
; CHECK-NEXT: stxvl 34, 5, 3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; FUTURE-LABEL: stxvl4:
|
||||
; FUTURE: # %bb.0: # %entry
|
||||
; FUTURE-NEXT: sldi 3, 6, 2
|
||||
; FUTURE-NEXT: stxvrl 34, 5, 3
|
||||
; FUTURE-NEXT: blr
|
||||
entry:
|
||||
%cconv = trunc i64 %c to i32
|
||||
tail call void @llvm.vp.store.v4i32.p0(<4 x i32> %a, ptr %b, <4 x i1> splat (i1 true), i32 %cconv)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define void @stxvl8(<2 x i64> %a, ptr %b, i64 %c) {
|
||||
; CHECK-LABEL: stxvl8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi 3, 6, 59
|
||||
; CHECK-NEXT: stxvl 34, 5, 3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; FUTURE-LABEL: stxvl8:
|
||||
; FUTURE: # %bb.0: # %entry
|
||||
; FUTURE-NEXT: sldi 3, 6, 3
|
||||
; FUTURE-NEXT: stxvrl 34, 5, 3
|
||||
; FUTURE-NEXT: blr
|
||||
entry:
|
||||
%cconv = trunc i64 %c to i32
|
||||
tail call void @llvm.vp.store.v2i64.p0(<2 x i64> %a, ptr %b, <2 x i1> splat (i1 true), i32 %cconv)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define <16 x i8> @lxvl1(ptr %a, i64 %b) {
|
||||
; CHECK-LABEL: lxvl1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi 4, 4, 56
|
||||
; CHECK-NEXT: lxvl 34, 3, 4
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; FUTURE-LABEL: lxvl1:
|
||||
; FUTURE: # %bb.0: # %entry
|
||||
; FUTURE-NEXT: lxvrl 34, 3, 4
|
||||
; FUTURE-NEXT: blr
|
||||
entry:
|
||||
%bconv = trunc i64 %b to i32
|
||||
%0 = tail call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %a, <16 x i1> splat (i1 true), i32 %bconv)
|
||||
ret <16 x i8> %0
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define <8 x i16> @lxvl2(ptr %a, i64 %b) {
|
||||
; CHECK-LABEL: lxvl2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi 4, 4, 57
|
||||
; CHECK-NEXT: lxvl 34, 3, 4
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; FUTURE-LABEL: lxvl2:
|
||||
; FUTURE: # %bb.0: # %entry
|
||||
; FUTURE-NEXT: sldi 4, 4, 1
|
||||
; FUTURE-NEXT: lxvrl 34, 3, 4
|
||||
; FUTURE-NEXT: blr
|
||||
entry:
|
||||
%bconv = trunc i64 %b to i32
|
||||
%0 = tail call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %a, <8 x i1> splat (i1 true), i32 %bconv)
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define <4 x i32> @lxvl4(ptr %a, i64 %b) {
|
||||
; CHECK-LABEL: lxvl4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi 4, 4, 58
|
||||
; CHECK-NEXT: lxvl 34, 3, 4
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; FUTURE-LABEL: lxvl4:
|
||||
; FUTURE: # %bb.0: # %entry
|
||||
; FUTURE-NEXT: sldi 4, 4, 2
|
||||
; FUTURE-NEXT: lxvrl 34, 3, 4
|
||||
; FUTURE-NEXT: blr
|
||||
entry:
|
||||
%bconv = trunc i64 %b to i32
|
||||
%0 = tail call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %a, <4 x i1> splat (i1 true), i32 %bconv)
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define <2 x i64> @lxvl8(ptr %a, i64 %b) {
|
||||
; CHECK-LABEL: lxvl8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi 4, 4, 59
|
||||
; CHECK-NEXT: lxvl 34, 3, 4
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; FUTURE-LABEL: lxvl8:
|
||||
; FUTURE: # %bb.0: # %entry
|
||||
; FUTURE-NEXT: sldi 4, 4, 3
|
||||
; FUTURE-NEXT: lxvrl 34, 3, 4
|
||||
; FUTURE-NEXT: blr
|
||||
entry:
|
||||
%bconv = trunc i64 %b to i32
|
||||
%0 = tail call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %a, <2 x i1> splat (i1 true), i32 %bconv)
|
||||
ret <2 x i64> %0
|
||||
}
|
||||
Reference in New Issue
Block a user