[PowerPC] Update PC-Relative Load/Store Patterns to use the refactored Load/Store Implementation

This patch updates the PC-Relative load and store patterns to utilize the
refactored load/store implementation introduced in D93370.

PC-Relative implementation has been added to PPCISelLowering.cpp, and also the
patterns in PPCInstrPrefix.td have been updated and no longer require AddedComplexity.
All existing test cases pass with this update.

Differential Revision: https://reviews.llvm.org/D95116
This commit is contained in:
Amy Kwan
2021-09-09 15:37:02 -05:00
parent ee0ac45672
commit 351a0d8a90
7 changed files with 457 additions and 72 deletions

View File

@@ -262,6 +262,14 @@ namespace {
None) == PPC::AM_DForm;
}
/// SelectPCRelForm - Returns true if address N can be represented by
/// PC-Relative addressing mode.
bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
None) == PPC::AM_PCRel;
}
/// SelectXForm - Returns true if address N can be represented by the
/// addressing mode of XForm instructions (an indexed [r+r] operation).
bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

View File

@@ -17357,6 +17357,14 @@ static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
}
}
static bool isPCRelNode(SDValue N) {
return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
isValidPCRelNode<ConstantPoolSDNode>(N) ||
isValidPCRelNode<GlobalAddressSDNode>(N) ||
isValidPCRelNode<JumpTableSDNode>(N) ||
isValidPCRelNode<BlockAddressSDNode>(N));
}
/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
/// the address flags of the load/store instruction that is to be matched.
unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
@@ -17374,6 +17382,10 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
if (Subtarget.hasSPE())
FlagSet |= PPC::MOF_SubtargetSPE;
// Check if we have a PCRel node and return early.
if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
return FlagSet;
// Mark this as something we don't want to handle here if it is atomic
// or pre-increment instruction.
if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
@@ -17518,6 +17530,14 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
// Select an X-Form load if it is not.
setXFormForUnalignedFI(N, Flags, Mode);
// Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
assert(Subtarget.isUsingPCRelativeCalls() &&
"Must be using PC-Relative calls when a valid PC-Relative node is "
"present!");
Mode = PPC::AM_PCRel;
}
// Set Base and Disp accordingly depending on the address mode.
switch (Mode) {
case PPC::AM_DForm:
@@ -17589,6 +17609,12 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
Base = N;
break;
}
case PPC::AM_PCRel: {
// When selecting PC-Relative instructions, "Base" is not utilized as
// we select the address as [PC+imm].
Disp = N;
break;
}
case PPC::AM_None:
break;
default: { // By default, X-Form is always available to be selected.

View File

@@ -494,6 +494,11 @@ namespace llvm {
/// Constrained floating point add in round-to-zero mode.
STRICT_FADDRTZ,
// NOTE: The nodes below may require PC-Rel specific patterns if the
// address could be PC-Relative. When adding new nodes below, consider
// whether or not the address can be PC-Relative and add the corresponding
// PC-relative patterns and tests.
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -713,6 +718,7 @@ namespace llvm {
AM_DSForm,
AM_DQForm,
AM_XForm,
AM_PCRel
};
} // end namespace PPC

View File

@@ -1152,15 +1152,13 @@ def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
// PC Relative Address
def pcreladdr : ComplexPattern<iPTR, 1, "SelectAddrPCRel", [], []>;
// Load and Store Instruction Selection addressing modes.
def DForm : ComplexPattern<iPTR, 2, "SelectDForm", [], [SDNPWantParent]>;
def DSForm : ComplexPattern<iPTR, 2, "SelectDSForm", [], [SDNPWantParent]>;
def DQForm : ComplexPattern<iPTR, 2, "SelectDQForm", [], [SDNPWantParent]>;
def XForm : ComplexPattern<iPTR, 2, "SelectXForm", [], [SDNPWantParent]>;
def ForceXForm : ComplexPattern<iPTR, 2, "SelectForceXForm", [], [SDNPWantParent]>;
def PCRelForm : ComplexPattern<iPTR, 2, "SelectPCRelForm", [], [SDNPWantParent]>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.

View File

@@ -1682,178 +1682,171 @@ let Predicates = [PairedVectorMemops] in {
}
}
// TODO: We have an added complexity of 500 here. This is only a temporary
// solution to have tablegen consider these patterns first. The way we do
// addressing for PowerPC is complex depending on available D form, X form, or
// aligned D form loads/stores like DS and DQ forms. The prefixed
// instructions in this file also add additional PC Relative loads/stores
// and D form loads/stores with 34 bit immediates. It is very difficult to force
// instruction selection to consistently pick these first without the current
// added complexity. Once pc-relative implementation is complete, a set of
// follow-up patches will address this refactoring and the AddedComplexity will
// be removed.
let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
let Predicates = [PCRelativeMemops] in {
// Load i32
def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
def : Pat<(i32 (extloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i32 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
def : Pat<(i32 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i32 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHApc $ga, 0)>;
def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZpc $ga, 0)>;
def : Pat<(i32 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i32 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZpc $ga, 0)>;
def : Pat<(i32 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLWZpc $ga, 0)>;
def : Pat<(i32 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLWZpc $ga, 0)>;
// Store i32
def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTBpc $RS, $ga, 0)>;
def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTHpc $RS, $ga, 0)>;
def : Pat<(store i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(store i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTWpc $RS, $ga, 0)>;
// Load i64
def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
def : Pat<(i64 (extloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i64 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
def : Pat<(i64 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i64 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHA8pc $ga, 0)>;
def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZ8pc $ga, 0)>;
def : Pat<(i64 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i64 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZ8pc $ga, 0)>;
def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
(PLWZ8pc $ga, 0)>;
def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
(PLWA8pc $ga, 0)>;
def : Pat<(i64 (extloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
def : Pat<(i64 (extloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
(PLWZ8pc $ga, 0)>;
def : Pat<(i64 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLDpc $ga, 0)>;
def : Pat<(i64 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLDpc $ga, 0)>;
// Store i64
def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTB8pc $RS, $ga, 0)>;
def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTH8pc $RS, $ga, 0)>;
def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTW8pc $RS, $ga, 0)>;
def : Pat<(store i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(store i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTDpc $RS, $ga, 0)>;
// Load f32
def : Pat<(f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFSpc $addr, 0)>;
def : Pat<(f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFSpc $addr, 0)>;
// Store f32
def : Pat<(store f32:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(store f32:$FRS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTFSpc $FRS, $ga, 0)>;
// Load f64
def : Pat<(f64 (extloadf32 (PPCmatpcreladdr pcreladdr:$addr))),
def : Pat<(f64 (extloadf32 (PPCmatpcreladdr PCRelForm:$addr))),
(COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>;
def : Pat<(f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFDpc $addr, 0)>;
def : Pat<(f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFDpc $addr, 0)>;
// Store f64
def : Pat<(store f64:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(store f64:$FRS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTFDpc $FRS, $ga, 0)>;
// Load f128
def : Pat<(f128 (load (PPCmatpcreladdr pcreladdr:$addr))),
def : Pat<(f128 (load (PPCmatpcreladdr PCRelForm:$addr))),
(COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>;
// Store f128
def : Pat<(store f128:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(store f128:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>;
// Load v4i32
def : Pat<(v4i32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
def : Pat<(v4i32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v4i32
def : Pat<(store v4i32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(store v4i32:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Load v2i64
def : Pat<(v2i64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
def : Pat<(v2i64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v2i64
def : Pat<(store v2i64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(store v2i64:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Load v4f32
def : Pat<(v4f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
def : Pat<(v4f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v4f32
def : Pat<(store v4f32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(store v4f32:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Load v2f64
def : Pat<(v2f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
def : Pat<(v2f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v2f64
def : Pat<(store v2f64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(store v2f64:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Atomic Load
def : Pat<(atomic_load_8 (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga)),
(PLBZpc $ga, 0)>;
def : Pat<(atomic_load_16 (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga)),
(PLHZpc $ga, 0)>;
def : Pat<(atomic_load_32 (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga)),
(PLWZpc $ga, 0)>;
def : Pat<(atomic_load_64 (PPCmatpcreladdr pcreladdr:$ga)),
def : Pat<(atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga)),
(PLDpc $ga, 0)>;
// Atomic Store
def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
(PSTBpc $RS, $ga, 0)>;
def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
(PSTHpc $RS, $ga, 0)>;
def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
(PSTWpc $RS, $ga, 0)>;
def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTB8pc $RS, $ga, 0)>;
def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTH8pc $RS, $ga, 0)>;
def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTW8pc $RS, $ga, 0)>;
def : Pat<(atomic_store_64 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
def : Pat<(atomic_store_64 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTDpc $RS, $ga, 0)>;
// Special Cases For PPCstore_scal_int_from_vsr
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f64:$src)),
(PPCmatpcreladdr pcreladdr:$dst), 8),
(PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)),
(PPCmatpcreladdr pcreladdr:$dst), 8),
(PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)),
(PPCmatpcreladdr pcreladdr:$dst), 8),
(PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)),
(PPCmatpcreladdr pcreladdr:$dst), 8),
(PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>;
def : Pat<(v4f32 (PPCldvsxlh (PPCmatpcreladdr PCRelForm:$addr))),
(SUBREG_TO_REG (i64 1), (PLFDpc $addr, 0), sub_64)>;
// If the PPCmatpcreladdr node is not caught by any other pattern it should be
// caught here and turned into a paddi instruction to materialize the address.
def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
def : Pat<(PPCmatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>;
// PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize
// tls global address with paddi instruction.
def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
def : Pat<(PPCtlsdynamatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>;
// PPCtlslocalexecmataddr node is used for TLS local exec models to
// materialize tls global address with paddi instruction.
def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)),

View File

@@ -42,6 +42,7 @@
@GlobSt11 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16
@GlobLd12 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16
@GlobSt12 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16
@GlobF128 = dso_local local_unnamed_addr global [20 x fp128] zeroinitializer, align 16
; Function Attrs: nofree norecurse nounwind uwtable willreturn
define dso_local void @testGlob1PtrPlus0() {
@@ -2270,3 +2271,323 @@ entry:
store <16 x i8> %0, <16 x i8>* %arrayidx1, align 16
ret void
}
; Function Attrs: nofree norecurse nounwind uwtable willreturn
define dso_local void @Atomic_LdSt_i8() {
; CHECK-P10-LE-LABEL: Atomic_LdSt_i8:
; CHECK-P10-LE: # %bb.0: # %entry
; CHECK-P10-LE-NEXT: plbz r3, GlobLd1@PCREL(0), 1
; CHECK-P10-LE-NEXT: pstb r3, GlobSt1@PCREL(0), 1
; CHECK-P10-LE-NEXT: blr
;
; CHECK-P10-BE-LABEL: Atomic_LdSt_i8:
; CHECK-P10-BE: # %bb.0: # %entry
; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd1@toc@ha
; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt1@toc@ha
; CHECK-P10-BE-NEXT: lbz r3, GlobLd1@toc@l(r3)
; CHECK-P10-BE-NEXT: stb r3, GlobSt1@toc@l(r4)
; CHECK-P10-BE-NEXT: blr
;
; CHECK-LABEL: Atomic_LdSt_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r3, r2, GlobLd1@toc@ha
; CHECK-NEXT: addis r4, r2, GlobSt1@toc@ha
; CHECK-NEXT: lbz r3, GlobLd1@toc@l(r3)
; CHECK-NEXT: stb r3, GlobSt1@toc@l(r4)
; CHECK-NEXT: blr
entry:
%0 = load atomic i8, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @GlobLd1, i64 0, i64 0) monotonic, align 1
store atomic i8 %0, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @GlobSt1, i64 0, i64 0) monotonic, align 1
ret void
}
; Function Attrs: nofree norecurse nounwind uwtable willreturn
define dso_local void @Atomic_LdSt_i16() {
; CHECK-P10-LE-LABEL: Atomic_LdSt_i16:
; CHECK-P10-LE: # %bb.0: # %entry
; CHECK-P10-LE-NEXT: plhz r3, GlobLd3@PCREL(0), 1
; CHECK-P10-LE-NEXT: psth r3, GlobSt3@PCREL(0), 1
; CHECK-P10-LE-NEXT: blr
;
; CHECK-P10-BE-LABEL: Atomic_LdSt_i16:
; CHECK-P10-BE: # %bb.0: # %entry
; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd3@toc@ha
; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt3@toc@ha
; CHECK-P10-BE-NEXT: lhz r3, GlobLd3@toc@l(r3)
; CHECK-P10-BE-NEXT: sth r3, GlobSt3@toc@l(r4)
; CHECK-P10-BE-NEXT: blr
;
; CHECK-LABEL: Atomic_LdSt_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r3, r2, GlobLd3@toc@ha
; CHECK-NEXT: addis r4, r2, GlobSt3@toc@ha
; CHECK-NEXT: lhz r3, GlobLd3@toc@l(r3)
; CHECK-NEXT: sth r3, GlobSt3@toc@l(r4)
; CHECK-NEXT: blr
entry:
%0 = load atomic i16, i16* getelementptr inbounds ([20 x i16], [20 x i16]* @GlobLd3, i64 0, i64 0) monotonic, align 2
store atomic i16 %0, i16* getelementptr inbounds ([20 x i16], [20 x i16]* @GlobSt3, i64 0, i64 0) monotonic, align 2
ret void
}
; Function Attrs: nofree norecurse nounwind uwtable willreturn
define dso_local void @Atomic_LdSt_i32() {
; CHECK-P10-LE-LABEL: Atomic_LdSt_i32:
; CHECK-P10-LE: # %bb.0: # %entry
; CHECK-P10-LE-NEXT: plwz r3, GlobLd5@PCREL(0), 1
; CHECK-P10-LE-NEXT: pstw r3, GlobSt5@PCREL(0), 1
; CHECK-P10-LE-NEXT: blr
;
; CHECK-P10-BE-LABEL: Atomic_LdSt_i32:
; CHECK-P10-BE: # %bb.0: # %entry
; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd5@toc@ha
; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt5@toc@ha
; CHECK-P10-BE-NEXT: lwz r3, GlobLd5@toc@l(r3)
; CHECK-P10-BE-NEXT: stw r3, GlobSt5@toc@l(r4)
; CHECK-P10-BE-NEXT: blr
;
; CHECK-LABEL: Atomic_LdSt_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r3, r2, GlobLd5@toc@ha
; CHECK-NEXT: addis r4, r2, GlobSt5@toc@ha
; CHECK-NEXT: lwz r3, GlobLd5@toc@l(r3)
; CHECK-NEXT: stw r3, GlobSt5@toc@l(r4)
; CHECK-NEXT: blr
entry:
%0 = load atomic i32, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @GlobLd5, i64 0, i64 0) monotonic, align 4
store atomic i32 %0, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @GlobSt5, i64 0, i64 0) monotonic, align 4
ret void
}
; Function Attrs: nofree norecurse nounwind uwtable willreturn
define dso_local void @Atomic_LdSt_i64() {
; CHECK-P10-LE-LABEL: Atomic_LdSt_i64:
; CHECK-P10-LE: # %bb.0: # %entry
; CHECK-P10-LE-NEXT: pld r3, GlobLd7@PCREL(0), 1
; CHECK-P10-LE-NEXT: pstd r3, GlobSt7@PCREL(0), 1
; CHECK-P10-LE-NEXT: blr
;
; CHECK-P10-BE-LABEL: Atomic_LdSt_i64:
; CHECK-P10-BE: # %bb.0: # %entry
; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd7@toc@ha
; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt7@toc@ha
; CHECK-P10-BE-NEXT: ld r3, GlobLd7@toc@l(r3)
; CHECK-P10-BE-NEXT: std r3, GlobSt7@toc@l(r4)
; CHECK-P10-BE-NEXT: blr
;
; CHECK-P9-LABEL: Atomic_LdSt_i64:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: addis r3, r2, GlobLd7@toc@ha
; CHECK-P9-NEXT: addis r4, r2, GlobSt7@toc@ha
; CHECK-P9-NEXT: ld r3, GlobLd7@toc@l(r3)
; CHECK-P9-NEXT: std r3, GlobSt7@toc@l(r4)
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: Atomic_LdSt_i64:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, GlobLd7@toc@ha
; CHECK-P8-NEXT: ld r3, GlobLd7@toc@l(r3)
; CHECK-P8-NEXT: addis r4, r2, GlobSt7@toc@ha
; CHECK-P8-NEXT: std r3, GlobSt7@toc@l(r4)
; CHECK-P8-NEXT: blr
entry:
%0 = load atomic i64, i64* getelementptr inbounds ([20 x i64], [20 x i64]* @GlobLd7, i64 0, i64 0) monotonic, align 8
store atomic i64 %0, i64* getelementptr inbounds ([20 x i64], [20 x i64]* @GlobSt7, i64 0, i64 0) monotonic, align 8
ret void
}
; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
define dso_local void @store_double_f64_to_uint(double %str) local_unnamed_addr #0 {
; CHECK-P10-LE-LABEL: store_double_f64_to_uint:
; CHECK-P10-LE: # %bb.0: # %entry
; CHECK-P10-LE-NEXT: xscvdpuxds v2, f1
; CHECK-P10-LE-NEXT: pstxsd v2, GlobSt10@PCREL(0), 1
; CHECK-P10-LE-NEXT: blr
;
; CHECK-P10-BE-LABEL: store_double_f64_to_uint:
; CHECK-P10-BE: # %bb.0: # %entry
; CHECK-P10-BE-NEXT: xscvdpuxds v2, f1
; CHECK-P10-BE-NEXT: addis r3, r2, GlobSt10@toc@ha
; CHECK-P10-BE-NEXT: addi r3, r3, GlobSt10@toc@l
; CHECK-P10-BE-NEXT: stxsd v2, 0(r3)
; CHECK-P10-BE-NEXT: blr
;
; CHECK-P9-LABEL: store_double_f64_to_uint:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xscvdpuxds v2, f1
; CHECK-P9-NEXT: addis r3, r2, GlobSt10@toc@ha
; CHECK-P9-NEXT: addi r3, r3, GlobSt10@toc@l
; CHECK-P9-NEXT: stxsd v2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: store_double_f64_to_uint:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xscvdpuxds f0, f1
; CHECK-P8-NEXT: addis r3, r2, GlobSt10@toc@ha
; CHECK-P8-NEXT: addi r3, r3, GlobSt10@toc@l
; CHECK-P8-NEXT: stxsdx f0, 0, r3
; CHECK-P8-NEXT: blr
entry:
%conv = fptoui double %str to i64
store i64 %conv, i64* bitcast ([20 x double]* @GlobSt10 to i64*), align 8
ret void
}
; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
define dso_local void @store_double_f64_to_sint(double %str) local_unnamed_addr #0 {
; CHECK-P10-LE-LABEL: store_double_f64_to_sint:
; CHECK-P10-LE: # %bb.0: # %entry
; CHECK-P10-LE-NEXT: xscvdpsxds v2, f1
; CHECK-P10-LE-NEXT: pstxsd v2, GlobSt10@PCREL(0), 1
; CHECK-P10-LE-NEXT: blr
;
; CHECK-P10-BE-LABEL: store_double_f64_to_sint:
; CHECK-P10-BE: # %bb.0: # %entry
; CHECK-P10-BE-NEXT: xscvdpsxds v2, f1
; CHECK-P10-BE-NEXT: addis r3, r2, GlobSt10@toc@ha
; CHECK-P10-BE-NEXT: addi r3, r3, GlobSt10@toc@l
; CHECK-P10-BE-NEXT: stxsd v2, 0(r3)
; CHECK-P10-BE-NEXT: blr
;
; CHECK-P9-LABEL: store_double_f64_to_sint:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xscvdpsxds v2, f1
; CHECK-P9-NEXT: addis r3, r2, GlobSt10@toc@ha
; CHECK-P9-NEXT: addi r3, r3, GlobSt10@toc@l
; CHECK-P9-NEXT: stxsd v2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: store_double_f64_to_sint:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: xscvdpsxds f0, f1
; CHECK-P8-NEXT: addis r3, r2, GlobSt10@toc@ha
; CHECK-P8-NEXT: addi r3, r3, GlobSt10@toc@l
; CHECK-P8-NEXT: stxsdx f0, 0, r3
; CHECK-P8-NEXT: blr
entry:
%conv = fptosi double %str to i64
store i64 %conv, i64* bitcast ([20 x double]* @GlobSt10 to i64*), align 8
ret void
}
; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
define dso_local void @store_f128_to_uint(fp128 %str) local_unnamed_addr #0 {
; CHECK-P10-LE-LABEL: store_f128_to_uint:
; CHECK-P10-LE: # %bb.0: # %entry
; CHECK-P10-LE-NEXT: xscvqpudz v2, v2
; CHECK-P10-LE-NEXT: pstxsd v2, GlobF128@PCREL(0), 1
; CHECK-P10-LE-NEXT: blr
;
; CHECK-P10-BE-LABEL: store_f128_to_uint:
; CHECK-P10-BE: # %bb.0: # %entry
; CHECK-P10-BE-NEXT: xscvqpudz v2, v2
; CHECK-P10-BE-NEXT: addis r3, r2, GlobF128@toc@ha
; CHECK-P10-BE-NEXT: addi r3, r3, GlobF128@toc@l
; CHECK-P10-BE-NEXT: stxsd v2, 0(r3)
; CHECK-P10-BE-NEXT: blr
;
; CHECK-P9-LABEL: store_f128_to_uint:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xscvqpudz v2, v2
; CHECK-P9-NEXT: addis r3, r2, GlobF128@toc@ha
; CHECK-P9-NEXT: addi r3, r3, GlobF128@toc@l
; CHECK-P9-NEXT: stxsd v2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LE-LABEL: store_f128_to_uint:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: mflr r0
; CHECK-P8-LE-NEXT: std r0, 16(r1)
; CHECK-P8-LE-NEXT: stdu r1, -32(r1)
; CHECK-P8-LE-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-LE-NEXT: .cfi_offset lr, 16
; CHECK-P8-LE-NEXT: bl __fixunskfdi
; CHECK-P8-LE-NEXT: nop
; CHECK-P8-LE-NEXT: addis r4, r2, GlobF128@toc@ha
; CHECK-P8-LE-NEXT: std r3, GlobF128@toc@l(r4)
; CHECK-P8-LE-NEXT: addi r1, r1, 32
; CHECK-P8-LE-NEXT: ld r0, 16(r1)
; CHECK-P8-LE-NEXT: mtlr r0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: store_f128_to_uint:
; CHECK-P8-BE: # %bb.0: # %entry
; CHECK-P8-BE-NEXT: mflr r0
; CHECK-P8-BE-NEXT: std r0, 16(r1)
; CHECK-P8-BE-NEXT: stdu r1, -112(r1)
; CHECK-P8-BE-NEXT: .cfi_def_cfa_offset 112
; CHECK-P8-BE-NEXT: .cfi_offset lr, 16
; CHECK-P8-BE-NEXT: bl __fixunskfdi
; CHECK-P8-BE-NEXT: nop
; CHECK-P8-BE-NEXT: addis r4, r2, GlobF128@toc@ha
; CHECK-P8-BE-NEXT: std r3, GlobF128@toc@l(r4)
; CHECK-P8-BE-NEXT: addi r1, r1, 112
; CHECK-P8-BE-NEXT: ld r0, 16(r1)
; CHECK-P8-BE-NEXT: mtlr r0
; CHECK-P8-BE-NEXT: blr
entry:
%conv = fptoui fp128 %str to i64
store i64 %conv, i64* bitcast ([20 x fp128]* @GlobF128 to i64*), align 16
ret void
}
; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
define dso_local void @store_f128_to_sint(fp128 %str) local_unnamed_addr #0 {
; CHECK-P10-LE-LABEL: store_f128_to_sint:
; CHECK-P10-LE: # %bb.0: # %entry
; CHECK-P10-LE-NEXT: xscvqpsdz v2, v2
; CHECK-P10-LE-NEXT: pstxsd v2, GlobF128@PCREL(0), 1
; CHECK-P10-LE-NEXT: blr
;
; CHECK-P10-BE-LABEL: store_f128_to_sint:
; CHECK-P10-BE: # %bb.0: # %entry
; CHECK-P10-BE-NEXT: xscvqpsdz v2, v2
; CHECK-P10-BE-NEXT: addis r3, r2, GlobF128@toc@ha
; CHECK-P10-BE-NEXT: addi r3, r3, GlobF128@toc@l
; CHECK-P10-BE-NEXT: stxsd v2, 0(r3)
; CHECK-P10-BE-NEXT: blr
;
; CHECK-P9-LABEL: store_f128_to_sint:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xscvqpsdz v2, v2
; CHECK-P9-NEXT: addis r3, r2, GlobF128@toc@ha
; CHECK-P9-NEXT: addi r3, r3, GlobF128@toc@l
; CHECK-P9-NEXT: stxsd v2, 0(r3)
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LE-LABEL: store_f128_to_sint:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: mflr r0
; CHECK-P8-LE-NEXT: std r0, 16(r1)
; CHECK-P8-LE-NEXT: stdu r1, -32(r1)
; CHECK-P8-LE-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-LE-NEXT: .cfi_offset lr, 16
; CHECK-P8-LE-NEXT: bl __fixkfdi
; CHECK-P8-LE-NEXT: nop
; CHECK-P8-LE-NEXT: addis r4, r2, GlobF128@toc@ha
; CHECK-P8-LE-NEXT: std r3, GlobF128@toc@l(r4)
; CHECK-P8-LE-NEXT: addi r1, r1, 32
; CHECK-P8-LE-NEXT: ld r0, 16(r1)
; CHECK-P8-LE-NEXT: mtlr r0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: store_f128_to_sint:
; CHECK-P8-BE: # %bb.0: # %entry
; CHECK-P8-BE-NEXT: mflr r0
; CHECK-P8-BE-NEXT: std r0, 16(r1)
; CHECK-P8-BE-NEXT: stdu r1, -112(r1)
; CHECK-P8-BE-NEXT: .cfi_def_cfa_offset 112
; CHECK-P8-BE-NEXT: .cfi_offset lr, 16
; CHECK-P8-BE-NEXT: bl __fixkfdi
; CHECK-P8-BE-NEXT: nop
; CHECK-P8-BE-NEXT: addis r4, r2, GlobF128@toc@ha
; CHECK-P8-BE-NEXT: std r3, GlobF128@toc@l(r4)
; CHECK-P8-BE-NEXT: addi r1, r1, 112
; CHECK-P8-BE-NEXT: ld r0, 16(r1)
; CHECK-P8-BE-NEXT: mtlr r0
; CHECK-P8-BE-NEXT: blr
entry:
%conv = fptosi fp128 %str to i64
store i64 %conv, i64* bitcast ([20 x fp128]* @GlobF128 to i64*), align 16
ret void
}

View File

@@ -5,6 +5,12 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10-BE
; Function Attrs: norecurse nounwind readonly
define dso_local <2 x double> @test1(<2 x float>* nocapture readonly %Ptr) {
@@ -75,3 +81,30 @@ entry:
%2 = fpext <2 x float> %sub to <2 x double>
ret <2 x double> %2
}
@G = dso_local local_unnamed_addr global <2 x float> <float 3.000000e+00, float 0x3FF3333340000000>, align 8
; Function Attrs: mustprogress nofree norecurse nosync nounwind readonly uwtable willreturn
define dso_local <2 x double> @test5(<2 x double> %a) {
; CHECK-P10-LABEL: test5:
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: plfd f0, G@PCREL(0), 1
; CHECK-P10-NEXT: xxmrghw vs0, vs0, vs0
; CHECK-P10-NEXT: xvcvspdp vs0, vs0
; CHECK-P10-NEXT: xvadddp v2, vs0, v2
; CHECK-P10-NEXT: blr
;
; CHECK-P10-BE-LABEL: test5:
; CHECK-P10-BE: # %bb.0: # %entry
; CHECK-P10-BE-NEXT: addis r3, r2, G@toc@ha
; CHECK-P10-BE-NEXT: lfd f0, G@toc@l(r3)
; CHECK-P10-BE-NEXT: xxmrghw vs0, vs0, vs0
; CHECK-P10-BE-NEXT: xvcvspdp vs0, vs0
; CHECK-P10-BE-NEXT: xvadddp v2, vs0, v2
; CHECK-P10-BE-NEXT: blr
entry:
%0 = load <2 x float>, <2 x float>* @G, align 8
%1 = fpext <2 x float> %0 to <2 x double>
%add = fadd <2 x double> %1, %a
ret <2 x double> %add
}