[PowerPC][LLD] Extend R2 save stub to support offsets of more than 26 bits

The R2 save stub will now support offsets up to 64 bits.

There are three cases that will be used.
1) The offset fits in 26 bits.
```
b <26 bit offset>
```
2) The offset does not fit in 26 bits but fits in 34 bits.
```
paddi r12, 0, <34 bit offset>, 1
mtctr r12
bctr
```
3) The offset does not fit in 34 bits. Since this is an R2 save stub we can use
the TOC in R2. We are not loading the offset but the actual address we want to
branch to.
```
addis r12, r2, <address in TOC lo>
ld r12 <address in TOC hi>(r12)
mtctr r12
bctr
```

In case 1) the stub is only 8 bytes while in cases 2) and 3) the stub will be
20 bytes.

Reviewed By: MaskRay, sfertile, NeHuang

Differential Revision: https://reviews.llvm.org/D87916
This commit is contained in:
Stefan Pintilie
2020-09-22 18:44:54 -05:00
parent 9d2ef5e74e
commit d224175230
2 changed files with 138 additions and 27 deletions

View File

@@ -289,10 +289,33 @@ public:
// 2) Tail calls the callee.
class PPC64R2SaveStub final : public Thunk {
public:
PPC64R2SaveStub(Symbol &dest) : Thunk(dest, 0) {}
uint32_t size() override { return 8; }
PPC64R2SaveStub(Symbol &dest, int64_t addend) : Thunk(dest, addend) {
alignment = 16;
}
// To prevent oscillations in layout when moving from short to long thunks
// we make sure that once a thunk has been set to long it cannot go back.
bool getMayUseShortThunk() {
if (!mayUseShortThunk)
return false;
if (!isInt<26>(computeOffset())) {
mayUseShortThunk = false;
return false;
}
return true;
}
uint32_t size() override { return getMayUseShortThunk() ? 8 : 20; }
void writeTo(uint8_t *buf) override;
void addSymbols(ThunkSection &isec) override;
private:
// Transitioning from long to short can create layout oscillations in
// certain corner cases which would prevent the layout from converging.
// This is similar to the handling for ARMThunk.
bool mayUseShortThunk = true;
int64_t computeOffset() const {
return destination.getVA() - (getThunkTargetSym()->getVA() + 4);
}
};
// PPC64 R12 Setup Stub
@@ -893,12 +916,25 @@ bool PPC64PltCallStub::isCompatibleWith(const InputSection &isec,
}
void PPC64R2SaveStub::writeTo(uint8_t *buf) {
int64_t offset = destination.getVA() - (getThunkTargetSym()->getVA() + 4);
// The branch offset needs to fit in 26 bits.
if (!isInt<26>(offset))
reportRangeError(buf, offset, 26, destination, "R2 save stub offset");
const int64_t offset = computeOffset();
write32(buf + 0, 0xf8410018); // std r2,24(r1)
write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b <offset>
// The branch offset needs to fit in 26 bits.
if (getMayUseShortThunk()) {
write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b <offset>
} else if (isInt<34>(offset)) {
const uint64_t paddi = PADDI_R12_NO_DISP |
(((offset >> 16) & 0x3ffff) << 32) |
(offset & 0xffff);
writePrefixedInstruction(buf + 4, paddi); // paddi r12, 0, func@pcrel, 1
write32(buf + 12, MTCTR_R12); // mtctr r12
write32(buf + 16, BCTR); // bctr
} else {
in.ppc64LongBranchTarget->addEntry(&destination, addend);
const int64_t offsetFromTOC =
in.ppc64LongBranchTarget->getEntryVA(&destination, addend) -
getPPC64TocBase();
writePPC64LoadAndBranch(buf + 4, offsetFromTOC);
}
}
void PPC64R2SaveStub::addSymbols(ThunkSection &isec) {
@@ -1109,7 +1145,7 @@ static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
// then the callee clobbers the TOC and we need an R2 save stub when RelType
// is R_PPC64_REL14 or R_PPC64_REL24.
if ((type == R_PPC64_REL14 || type == R_PPC64_REL24) && (s.stOther >> 5) == 1)
return make<PPC64R2SaveStub>(s);
return make<PPC64R2SaveStub>(s, a);
if (type == R_PPC64_REL24_NOTOC)
return (s.stOther >> 5) > 1

View File

@@ -1,36 +1,111 @@
# REQUIRES: ppc
# RUN: echo 'SECTIONS { \
# RUN: .text_callee 0x10010000 : { *(.text_callee) } \
# RUN: .text_caller 0x20020000 : { *(.text_caller) } \
# RUN: }' > %t.script
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o
# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/asm -o %t.o
# RUN: ld.lld -T %t/lts %t.o -o %t_le
# RUN: llvm-objdump --mcpu=pwr10 --no-show-raw-insn -d %t_le | FileCheck %s
# RUN: llvm-readelf -s %t_le | FileCheck %s --check-prefix=SYM
# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o
# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=powerpc64 %t/asm -o %t.o
# RUN: ld.lld -T %t/lts %t.o -o %t_be
# RUN: llvm-objdump --mcpu=pwr10 --no-show-raw-insn -d %t_be | FileCheck %s
# RUN: llvm-readelf -s %t_be | FileCheck %s --check-prefix=SYM
# CHECK: error: R2 save stub offset is out of range: -268501028 is not in [-33554432, 33554431]; references callee
# CHECK-NEXT: >>> defined in {{.*}}.o
# SYM: Symbol table '.symtab' contains 9 entries:
# SYM: 1: 0000000010010000 0 NOTYPE LOCAL DEFAULT [<other: 0x20>] 1 callee
# SYM-NEXT: 2: 0000000020020008 0 NOTYPE LOCAL DEFAULT [<other: 0x60>] 3 caller
# SYM-NEXT: 3: 0000000010020008 0 NOTYPE LOCAL DEFAULT 2 caller_close
# SYM-NEXT: 4: 0000000520020008 0 NOTYPE LOCAL DEFAULT 4 caller_far
# SYM-NEXT: 5: 0000000520028038 0 NOTYPE LOCAL HIDDEN 6 .TOC.
# SYM-NEXT: 6: 0000000010020020 8 FUNC LOCAL DEFAULT 2 __toc_save_callee
# SYM-NEXT: 7: 0000000020020020 20 FUNC LOCAL DEFAULT 3 __toc_save_callee
# SYM-NEXT: 8: 0000000520020020 20 FUNC LOCAL DEFAULT 4 __toc_save_callee
# RUN: ld.lld -T %t.script %t.o -o /dev/null --noinhibit-exec
#--- lts
PHDRS {
callee PT_LOAD FLAGS(0x1 | 0x4);
close PT_LOAD FLAGS(0x1 | 0x4);
caller PT_LOAD FLAGS(0x1 | 0x4);
far PT_LOAD FLAGS(0x1 | 0x4);
}
SECTIONS {
.text_callee 0x10010000 : { *(.text_callee) } :callee
.text_caller_close 0x10020000 : { *(.text_caller_close) } :close
.text_caller 0x20020000 : { *(.text_caller) } :caller
.text_caller_far 0x520020000 : { *(.text_caller_far) } :far
}
#--- asm
# CHECK-LABEL: <callee>:
# CHECK: blr
.section .text_callee, "ax", %progbits
callee:
.localentry callee, 1
blr
.section .text_caller, "ax", %progbits
caller:
# CHECK-LABEL: <caller_close>:
# CHECK: bl 0x10020020
# CHECK-NEXT: ld 2, 24(1)
# CHECK-NEXT: blr
# CHECK-LABEL: <__toc_save_callee>:
# CHECK: std 2, 24(1)
# CHECK-NEXT: b 0x10010000
.section .text_caller_close, "ax", %progbits
.Lfunc_toc1:
.quad .TOC.-.Lfunc_gep1
caller_close:
.Lfunc_gep1:
addis 2, 12, .TOC.-.Lfunc_gep1@ha
addi 2, 2, .TOC.-.Lfunc_gep1@l
ld 2, .Lfunc_toc1-.Lfunc_gep1(12)
add 2, 2, 12
.Lfunc_lep1:
.localentry caller, .Lfunc_lep1-.Lfunc_gep1
addis 30, 2, global@toc@ha
lwz 3, global@toc@l(30)
bl callee
nop
blr
global:
.long 0
# CHECK-LABEL: <caller>:
# CHECK: bl 0x20020020
# CHECK-NEXT: ld 2, 24(1)
# CHECK-NEXT: blr
# CHECK-LABEL: <__toc_save_callee>:
# CHECK: std 2, 24(1)
# CHECK-NEXT: paddi 12, 0, -268501028, 1
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr
.section .text_caller, "ax", %progbits
.Lfunc_toc2:
.quad .TOC.-.Lfunc_gep2
caller:
.Lfunc_gep2:
ld 2, .Lfunc_toc2-.Lfunc_gep2(12)
add 2, 2, 12
.Lfunc_lep2:
.localentry caller, .Lfunc_lep2-.Lfunc_gep2
bl callee
nop
blr
# CHECK-LABEL: <caller_far>:
# CHECK: ld 2, -8(12)
# CHECK-NEXT: add 2, 2, 12
# CHECK-NEXT: bl 0x520020020
# CHECK-NEXT: ld 2, 24(1)
# CHECK-NEXT: blr
# CHECK-LABEL: <__toc_save_callee>:
# CHECK: std 2, 24(1)
# CHECK-NEXT: addis 12, 2, 0
# CHECK-NEXT: ld 12, -32760(12)
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr
.section .text_caller_far, "ax", %progbits
.Lfunc_toc3:
.quad .TOC.-.Lfunc_gep3
caller_far:
.Lfunc_gep3:
ld 2, .Lfunc_toc3-.Lfunc_gep3(12)
add 2, 2, 12
.Lfunc_lep3:
.localentry caller, .Lfunc_lep3-.Lfunc_gep3
bl callee
nop
blr