Work around missing sc 1 traps on pHyp
When running a pseries guest in PR KVM on top of pHyp, sc 1 instructions are handled directly by pHyp, so we don't get to see them. That means we need to get inventive. Invent a new instruction that behaves like sc 1, but really is a reserved instruction that traps. This instruction can be used by KVM to emulate sc 1 behavior. This patch adds the SLOF support for it. With this, SLOF detects whether it's running on such a broken setup and if so patches itself to execute the fake sc 1 instruction instead of the real one. Furthermore, we also hook into "quiesce" which Linux calls when it boots. This gives us the chance to also patch Linux when it boots up, so it uses the fake sc 1 too. Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
This commit is contained in:
parent
69c6fc492c
commit
dd53579ae8
|
@ -1,6 +1,6 @@
|
|||
BOARD=qemu
|
||||
TARG=ppc64
|
||||
export FLAG=-DRTAS_NVRAM
|
||||
export FLAG="-DRTAS_NVRAM -DBROKEN_SC1"
|
||||
export CPUARCH=ppcp7
|
||||
export CPUARCHDEF=-DCPU_PPCP7
|
||||
#export SNK_BIOSEMU_APPS=1
|
||||
|
|
|
@ -27,7 +27,8 @@ STG1OBJ += stage2_head.o stage2.o comlib.o romfs_wrap.o nvramlog.o
|
|||
|
||||
all: stage1.bin Cboot.o
|
||||
|
||||
stage1.bin: $(STG1OBJ) $(LIBCMNDIR)/libelf.a $(LIBCMNDIR)/libc.a
|
||||
stage1.bin: $(STG1OBJ) $(LIBCMNDIR)/libelf.a $(LIBCMNDIR)/libc.a \
|
||||
$(LIBCMNDIR)/libhvcall.a
|
||||
$(LD) $(LDFLAGS1) -o stage1.elf $^
|
||||
$(OBJCOPY) -O binary stage1.elf $@
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <cpu.h>
|
||||
#include <libelf.h>
|
||||
#include <string.h>
|
||||
#include "../lib/libhvcall/libhvcall.h"
|
||||
|
||||
#define DEBUG(fmt...)
|
||||
//#define DEBUG(fmt...) printf(fmt)
|
||||
|
@ -101,6 +102,8 @@ static void load_file(uint64_t destAddr, char *name, uint64_t maxSize,
|
|||
flush_cache((void *) destAddr, fileInfo.size_data);
|
||||
}
|
||||
|
||||
extern void print_version(void);
|
||||
|
||||
/***************************************************************************
|
||||
* Function: early_c_entry
|
||||
* Input : start_addr
|
||||
|
@ -118,6 +121,20 @@ void early_c_entry(uint64_t start_addr, uint64_t fdt_addr)
|
|||
// uint64_t flashlen = header->flashlen;
|
||||
unsigned long ofw_addr[2];
|
||||
int rc;
|
||||
extern char __executable_start;
|
||||
extern char __etext;
|
||||
|
||||
/*
|
||||
* If we run on a broken environment, we need to patch our own sc 1
|
||||
* calls to be able to trap hypercalls. This does not cover RTAS or
|
||||
* any payload we will load yet.
|
||||
*/
|
||||
if (patch_broken_sc1(&__executable_start, &__etext, NULL)) {
|
||||
/* We are running in PR KVM on top of pHyp. Print all output
|
||||
we missed to print so far again to fake identical behavior */
|
||||
printf("\n\r\nSLOF");
|
||||
print_version();
|
||||
}
|
||||
|
||||
if (fdt_addr == 0) {
|
||||
puts("ERROR: Flatten device tree not available!");
|
||||
|
|
|
@ -17,10 +17,14 @@ OUTPUT_ARCH(powerpc:common64)
|
|||
ENTRY ( __start )
|
||||
|
||||
SECTIONS {
|
||||
__executable_start = .;
|
||||
|
||||
.text : {
|
||||
*(.text)
|
||||
}
|
||||
|
||||
__etext = .;
|
||||
|
||||
. = ALIGN(8);
|
||||
|
||||
.data : {
|
||||
|
|
|
@ -66,6 +66,12 @@ rtas-cb /rtas-control-block erase
|
|||
|
||||
\ ." RTAS found, base=" rtas-base . ." size=" rtas-size . cr
|
||||
|
||||
\ Patch the RTAS blob with our sc1 patcher if necessary
|
||||
0
|
||||
rtas-base
|
||||
dup rtas-size +
|
||||
check-and-patch-sc1
|
||||
|
||||
device-end
|
||||
;
|
||||
find-qemu-rtas
|
||||
|
|
|
@ -124,6 +124,30 @@ populate-pci-busses
|
|||
|
||||
600 cp
|
||||
|
||||
: check-patch-kernel-sc1 ( -- )
|
||||
\ At this point we can try our best to patch the kernel. This function
|
||||
\ gets called from the "quiesce" call that kernels execute before they
|
||||
\ take over the system.
|
||||
\
|
||||
\ Here we know that ciregs->r4 contains the return address that gets us
|
||||
\ back into enter_prom inside the guest kernel.
|
||||
\ We assume that within a range of +- 16MB of that pointer all sc 1
|
||||
\ instructions inside of that kernel reside.
|
||||
|
||||
\ test_ins (instruction that tells us the kernel's endianness; we use the
|
||||
\ return address back into the kernel here.)
|
||||
ciregs >r4 @
|
||||
\ test_ins + 16MB (end of search range)
|
||||
dup 1000000 +
|
||||
\ MAX(test_ins - 16MB, 0) (start of search range)
|
||||
dup 2000000 < IF 0 ELSE dup 2000000 - THEN
|
||||
swap
|
||||
check-and-patch-sc1
|
||||
;
|
||||
|
||||
\ Add sc 1 patching
|
||||
' check-patch-kernel-sc1 add-quiesce-xt
|
||||
|
||||
\ Add rtas cleanup last
|
||||
' rtas-quiesce add-quiesce-xt
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ TARGET = ../libhvcall.a
|
|||
|
||||
all: $(TARGET)
|
||||
|
||||
SRCS =
|
||||
SRCS = brokensc1.c
|
||||
SRCSS = hvcall.S
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,162 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <cpu.h>
|
||||
#include "libhvcall.h"
|
||||
#include "byteorder.h"
|
||||
|
||||
// #define DEBUG_PATCHERY
|
||||
|
||||
#define H_SET_DABR 0x28
|
||||
|
||||
enum broken_sc1 {
|
||||
SC1_UNKNOWN,
|
||||
SC1_BROKEN,
|
||||
SC1_WORKS,
|
||||
};
|
||||
|
||||
static unsigned long hcall(unsigned long arg0, unsigned long arg1)
|
||||
{
|
||||
register unsigned long r3 asm("r3") = arg0;
|
||||
register unsigned long r4 asm("r4") = arg1;
|
||||
asm volatile("sc 1"
|
||||
: "=r" (r3)
|
||||
: "r" (r3), "r" (r4)
|
||||
: "ctr", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
|
||||
"r12", "r13", "r31", "lr", "cc");
|
||||
return r3;
|
||||
}
|
||||
|
||||
static enum broken_sc1 check_broken_sc1(void)
|
||||
{
|
||||
long r;
|
||||
|
||||
/*
|
||||
* Check if we can do a simple hcall. If it works, we are running in
|
||||
* a sane environment and everything's fine. If it doesn't, we need
|
||||
* to patch the hypercall instruction to something that traps into
|
||||
* supervisor mode.
|
||||
*/
|
||||
r = hcall(H_SET_DABR, 0);
|
||||
if (r == H_SUCCESS || r == H_HARDWARE) {
|
||||
/* All is fine */
|
||||
return SC1_WORKS;
|
||||
}
|
||||
|
||||
/* We found a broken sc1 host! */
|
||||
return SC1_BROKEN;
|
||||
}
|
||||
|
||||
int patch_broken_sc1(void *start, void *end, uint32_t *test_ins)
|
||||
{
|
||||
static enum broken_sc1 is_broken_sc1 = SC1_UNKNOWN;
|
||||
uint32_t *p;
|
||||
/* The sc 1 instruction */
|
||||
uint32_t sc1 = 0x44000022;
|
||||
/* An illegal instruction that KVM interprets as sc 1 */
|
||||
uint32_t sc1_replacement = 0x7c000268;
|
||||
int is_le = (test_ins && *test_ins == 0x48000008);
|
||||
#ifdef DEBUG_PATCHERY
|
||||
int cnt = 0;
|
||||
#endif
|
||||
|
||||
switch (is_broken_sc1) {
|
||||
case SC1_UNKNOWN:
|
||||
/* If we never probed sc1 before, let's do so now! */
|
||||
is_broken_sc1 = check_broken_sc1();
|
||||
return patch_broken_sc1(start, end, test_ins);
|
||||
case SC1_WORKS:
|
||||
/* If we know that sc1 works fine, no need to check */
|
||||
return 0;
|
||||
case SC1_BROKEN:
|
||||
/* Handled below */
|
||||
break;
|
||||
}
|
||||
|
||||
/* We only get here with a broken sc1 implementation */
|
||||
|
||||
/* Trim the range we scan to not cover the data section */
|
||||
if (test_ins) {
|
||||
/* This is the cpu table matcher for 970FX */
|
||||
uint32_t end_bytes[] = { 0xffff0000, 0x3c0000 };
|
||||
/*
|
||||
* The .__start symbol contains a trap instruction followed
|
||||
* by lots of zeros.
|
||||
*/
|
||||
uint32_t start_bytes[] = { 0x7fe00008, 0, 0, 0, 0 };
|
||||
|
||||
if (is_le) {
|
||||
end_bytes[0] = bswap_32(end_bytes[0]);
|
||||
end_bytes[1] = bswap_32(end_bytes[1]);
|
||||
start_bytes[1] = bswap_32(start_bytes[1]);
|
||||
}
|
||||
|
||||
/* Find the start of the text section */
|
||||
for (p = test_ins; (long)p > (long)start; p--) {
|
||||
if (p[0] == start_bytes[0] &&
|
||||
p[1] == start_bytes[1] &&
|
||||
p[2] == start_bytes[2] &&
|
||||
p[3] == start_bytes[3] &&
|
||||
p[4] == start_bytes[4]) {
|
||||
/*
|
||||
* We found a match of the instruction sequence
|
||||
* trap
|
||||
* .long 0
|
||||
* .long 0
|
||||
* .long 0
|
||||
* .long 0
|
||||
* which marks the beginning of the .text
|
||||
* section on all Linux kernels I've checked.
|
||||
*/
|
||||
#ifdef DEBUG_PATCHERY
|
||||
printf("Shortened start from %p to %p\n", end, p);
|
||||
#endif
|
||||
start = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find the end of the text section */
|
||||
for (p = start; (long)p < (long)end; p++) {
|
||||
if (p[0] == end_bytes[0] && p[1] == end_bytes[1]) {
|
||||
/*
|
||||
* We found a match of the PPC970FX entry in the
|
||||
* guest kernel's CPU table. That table is
|
||||
* usually found early in the .data section and
|
||||
* thus marks the end of the .text section for
|
||||
* us which we need to patch.
|
||||
*/
|
||||
#ifdef DEBUG_PATCHERY
|
||||
printf("Shortened end from %p to %p\n", end, p);
|
||||
#endif
|
||||
end = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (is_le) {
|
||||
/*
|
||||
* The kernel was built for LE mode, so our sc1 and replacement
|
||||
* opcodes are in the wrong byte order. Reverse them.
|
||||
*/
|
||||
sc1 = bswap_32(sc1);
|
||||
sc1_replacement = bswap_32(sc1_replacement);
|
||||
}
|
||||
|
||||
/* Patch all sc 1 instructions to reserved instruction 31/308 */
|
||||
for (p = start; (long)p < (long)end; p++) {
|
||||
if (*p == sc1) {
|
||||
*p = sc1_replacement;
|
||||
flush_cache(p, sizeof(*p));
|
||||
#ifdef DEBUG_PATCHERY
|
||||
cnt++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_PATCHERY
|
||||
printf("Patched %d instructions (%p - %p)\n", cnt, start, end);
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
|
@ -115,3 +115,11 @@ PRIM(get_X2d_print_X2d_version)
|
|||
unsigned long addr = TOS.u; POP;
|
||||
get_print_banner(addr);
|
||||
MIRP
|
||||
|
||||
PRIM(check_X2d_and_X2d_patch_X2d_sc1)
|
||||
unsigned long end = TOS.u; POP;
|
||||
unsigned long start = TOS.u; POP;
|
||||
unsigned long patch_ins = TOS.u; POP;
|
||||
|
||||
patch_broken_sc1((void*)start, (void*)end, (void*)patch_ins);
|
||||
MIRP
|
||||
|
|
|
@ -17,6 +17,7 @@ cod(hv-reg-crq)
|
|||
cod(hv-free-crq)
|
||||
cod(hv-send-crq)
|
||||
cod(hv-put-tce)
|
||||
cod(check-and-patch-sc1)
|
||||
|
||||
cod(RB@)
|
||||
cod(RB!)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#define __LIBHVCALL_H__
|
||||
|
||||
#define H_SUCCESS 0
|
||||
#define H_HARDWARE -1
|
||||
|
||||
#define H_GET_TCE 0x1C
|
||||
#define H_PUT_TCE 0x20
|
||||
|
@ -94,6 +95,7 @@ extern unsigned long hv_logical_ci_store(unsigned long size, unsigned long addr,
|
|||
extern unsigned long hv_logical_memop(unsigned long dst, unsigned long src,
|
||||
unsigned long esize, unsigned long count,
|
||||
unsigned long op);
|
||||
extern int patch_broken_sc1(void *start, void *end, uint32_t *test_ins);
|
||||
|
||||
extern unsigned long hv_cas(unsigned long vec, unsigned long buf,
|
||||
unsigned long size);
|
||||
|
|
10
slof/ofw.S
10
slof/ofw.S
|
@ -45,5 +45,15 @@
|
|||
ld r3, 0(r3)
|
||||
std r3, XVECT_M_HANDLER(0)
|
||||
|
||||
#ifdef BROKEN_SC1
|
||||
/* Patch potentially broken sc 1 instructions */
|
||||
lis r3, _slof_text@h
|
||||
ori r3, r3, _slof_text@l
|
||||
lis r4, _slof_text_end@h
|
||||
ori r4, r4, _slof_text_end@l
|
||||
li r5, 0
|
||||
bl .patch_broken_sc1
|
||||
#endif
|
||||
|
||||
/* GO! */
|
||||
ba 0x100
|
||||
|
|
Loading…
Reference in New Issue