diff --git a/board-qemu/config b/board-qemu/config index a381f19..5a625c5 100644 --- a/board-qemu/config +++ b/board-qemu/config @@ -1,6 +1,6 @@ BOARD=qemu TARG=ppc64 -export FLAG=-DRTAS_NVRAM +export FLAG="-DRTAS_NVRAM -DBROKEN_SC1" export CPUARCH=ppcp7 export CPUARCHDEF=-DCPU_PPCP7 #export SNK_BIOSEMU_APPS=1 diff --git a/board-qemu/llfw/Makefile b/board-qemu/llfw/Makefile index 89f17f8..c83f21e 100644 --- a/board-qemu/llfw/Makefile +++ b/board-qemu/llfw/Makefile @@ -27,7 +27,8 @@ STG1OBJ += stage2_head.o stage2.o comlib.o romfs_wrap.o nvramlog.o all: stage1.bin Cboot.o -stage1.bin: $(STG1OBJ) $(LIBCMNDIR)/libelf.a $(LIBCMNDIR)/libc.a +stage1.bin: $(STG1OBJ) $(LIBCMNDIR)/libelf.a $(LIBCMNDIR)/libc.a \ + $(LIBCMNDIR)/libhvcall.a $(LD) $(LDFLAGS1) -o stage1.elf $^ $(OBJCOPY) -O binary stage1.elf $@ diff --git a/board-qemu/llfw/stage2.c b/board-qemu/llfw/stage2.c index 54f2b8d..ef6ea35 100644 --- a/board-qemu/llfw/stage2.c +++ b/board-qemu/llfw/stage2.c @@ -23,6 +23,7 @@ #include #include #include +#include "../lib/libhvcall/libhvcall.h" #define DEBUG(fmt...) //#define DEBUG(fmt...) printf(fmt) @@ -101,6 +102,8 @@ static void load_file(uint64_t destAddr, char *name, uint64_t maxSize, flush_cache((void *) destAddr, fileInfo.size_data); } +extern void print_version(void); + /*************************************************************************** * Function: early_c_entry * Input : start_addr @@ -118,6 +121,20 @@ void early_c_entry(uint64_t start_addr, uint64_t fdt_addr) // uint64_t flashlen = header->flashlen; unsigned long ofw_addr[2]; int rc; + extern char __executable_start; + extern char __etext; + + /* + * If we run on a broken environment, we need to patch our own sc 1 + * calls to be able to trap hypercalls. This does not cover RTAS or + * any payload we will load yet. + */ + if (patch_broken_sc1(&__executable_start, &__etext, NULL)) { + /* We are running in PR KVM on top of pHyp. Print all output + we missed to print so far again to fake identical behavior */ + printf("\n\r\nSLOF"); + print_version(); + } if (fdt_addr == 0) { puts("ERROR: Flatten device tree not available!"); diff --git a/board-qemu/llfw/stage2.lds b/board-qemu/llfw/stage2.lds index 4012ea5..e060dd1 100644 --- a/board-qemu/llfw/stage2.lds +++ b/board-qemu/llfw/stage2.lds @@ -17,10 +17,14 @@ OUTPUT_ARCH(powerpc:common64) ENTRY ( __start ) SECTIONS { + __executable_start = .; + .text : { *(.text) } + __etext = .; + . = ALIGN(8); .data : { diff --git a/board-qemu/slof/rtas.fs b/board-qemu/slof/rtas.fs index 5de3b8a..41e30c2 100644 --- a/board-qemu/slof/rtas.fs +++ b/board-qemu/slof/rtas.fs @@ -66,6 +66,12 @@ rtas-cb /rtas-control-block erase \ ." RTAS found, base=" rtas-base . ." size=" rtas-size . cr + \ Patch the RTAS blob with our sc1 patcher if necessary + 0 + rtas-base + dup rtas-size + + check-and-patch-sc1 + device-end ; find-qemu-rtas diff --git a/board-qemu/slof/tree.fs b/board-qemu/slof/tree.fs index dbf6b09..4aba4c5 100644 --- a/board-qemu/slof/tree.fs +++ b/board-qemu/slof/tree.fs @@ -124,6 +124,30 @@ populate-pci-busses 600 cp +: check-patch-kernel-sc1 ( -- ) + \ At this point we can try our best to patch the kernel. This function + \ gets called from the "quiesce" call that kernels execute before they + \ take over the system. + \ + \ Here we know that ciregs->r4 contains the return address that gets us + \ back into enter_prom inside the guest kernel. + \ We assume that within a range of +- 16MB of that pointer all sc 1 + \ instructions inside of that kernel reside. + + \ test_ins (instruction that tells us the kernel's endianness; we use the + \ return address back into the kernel here.) + ciregs >r4 @ + \ test_ins + 16MB (end of search range) + dup 1000000 + + \ MAX(test_ins - 16MB, 0) (start of search range) + dup 2000000 < IF 0 ELSE dup 2000000 - THEN + swap + check-and-patch-sc1 +; + +\ Add sc 1 patching +' check-patch-kernel-sc1 add-quiesce-xt + \ Add rtas cleanup last ' rtas-quiesce add-quiesce-xt diff --git a/lib/libhvcall/Makefile b/lib/libhvcall/Makefile index 53ed98f..af7fbc3 100644 --- a/lib/libhvcall/Makefile +++ b/lib/libhvcall/Makefile @@ -22,7 +22,7 @@ TARGET = ../libhvcall.a all: $(TARGET) -SRCS = +SRCS = brokensc1.c SRCSS = hvcall.S diff --git a/lib/libhvcall/brokensc1.c b/lib/libhvcall/brokensc1.c new file mode 100644 index 0000000..c37a0f1 --- /dev/null +++ b/lib/libhvcall/brokensc1.c @@ -0,0 +1,162 @@ +#include +#include +#include +#include "libhvcall.h" +#include "byteorder.h" + +// #define DEBUG_PATCHERY + +#define H_SET_DABR 0x28 + +enum broken_sc1 { + SC1_UNKNOWN, + SC1_BROKEN, + SC1_WORKS, +}; + +static unsigned long hcall(unsigned long arg0, unsigned long arg1) +{ + register unsigned long r3 asm("r3") = arg0; + register unsigned long r4 asm("r4") = arg1; + asm volatile("sc 1" + : "=r" (r3) + : "r" (r3), "r" (r4) + : "ctr", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "r13", "r31", "lr", "cc"); + return r3; +} + +static enum broken_sc1 check_broken_sc1(void) +{ + long r; + + /* + * Check if we can do a simple hcall. If it works, we are running in + * a sane environment and everything's fine. If it doesn't, we need + * to patch the hypercall instruction to something that traps into + * supervisor mode. + */ + r = hcall(H_SET_DABR, 0); + if (r == H_SUCCESS || r == H_HARDWARE) { + /* All is fine */ + return SC1_WORKS; + } + + /* We found a broken sc1 host! */ + return SC1_BROKEN; +} + +int patch_broken_sc1(void *start, void *end, uint32_t *test_ins) +{ + static enum broken_sc1 is_broken_sc1 = SC1_UNKNOWN; + uint32_t *p; + /* The sc 1 instruction */ + uint32_t sc1 = 0x44000022; + /* An illegal instruction that KVM interprets as sc 1 */ + uint32_t sc1_replacement = 0x7c000268; + int is_le = (test_ins && *test_ins == 0x48000008); +#ifdef DEBUG_PATCHERY + int cnt = 0; +#endif + + switch (is_broken_sc1) { + case SC1_UNKNOWN: + /* If we never probed sc1 before, let's do so now! */ + is_broken_sc1 = check_broken_sc1(); + return patch_broken_sc1(start, end, test_ins); + case SC1_WORKS: + /* If we know that sc1 works fine, no need to check */ + return 0; + case SC1_BROKEN: + /* Handled below */ + break; + } + + /* We only get here with a broken sc1 implementation */ + + /* Trim the range we scan to not cover the data section */ + if (test_ins) { + /* This is the cpu table matcher for 970FX */ + uint32_t end_bytes[] = { 0xffff0000, 0x3c0000 }; + /* + * The .__start symbol contains a trap instruction followed + * by lots of zeros. + */ + uint32_t start_bytes[] = { 0x7fe00008, 0, 0, 0, 0 }; + + if (is_le) { + end_bytes[0] = bswap_32(end_bytes[0]); + end_bytes[1] = bswap_32(end_bytes[1]); + start_bytes[1] = bswap_32(start_bytes[1]); + } + + /* Find the start of the text section */ + for (p = test_ins; (long)p > (long)start; p--) { + if (p[0] == start_bytes[0] && + p[1] == start_bytes[1] && + p[2] == start_bytes[2] && + p[3] == start_bytes[3] && + p[4] == start_bytes[4]) { + /* + * We found a match of the instruction sequence + * trap + * .long 0 + * .long 0 + * .long 0 + * .long 0 + * which marks the beginning of the .text + * section on all Linux kernels I've checked. + */ +#ifdef DEBUG_PATCHERY + printf("Shortened start from %p to %p\n", end, p); +#endif + start = p; + break; + } + } + + /* Find the end of the text section */ + for (p = start; (long)p < (long)end; p++) { + if (p[0] == end_bytes[0] && p[1] == end_bytes[1]) { + /* + * We found a match of the PPC970FX entry in the + * guest kernel's CPU table. That table is + * usually found early in the .data section and + * thus marks the end of the .text section for + * us which we need to patch. + */ +#ifdef DEBUG_PATCHERY + printf("Shortened end from %p to %p\n", end, p); +#endif + end = p; + break; + } + } + } + + if (is_le) { + /* + * The kernel was built for LE mode, so our sc1 and replacement + * opcodes are in the wrong byte order. Reverse them. + */ + sc1 = bswap_32(sc1); + sc1_replacement = bswap_32(sc1_replacement); + } + + /* Patch all sc 1 instructions to reserved instruction 31/308 */ + for (p = start; (long)p < (long)end; p++) { + if (*p == sc1) { + *p = sc1_replacement; + flush_cache(p, sizeof(*p)); +#ifdef DEBUG_PATCHERY + cnt++; +#endif + } + } + +#ifdef DEBUG_PATCHERY + printf("Patched %d instructions (%p - %p)\n", cnt, start, end); +#endif + + return 1; +} diff --git a/lib/libhvcall/hvcall.code b/lib/libhvcall/hvcall.code index 6d70b3c..744469f 100644 --- a/lib/libhvcall/hvcall.code +++ b/lib/libhvcall/hvcall.code @@ -115,3 +115,11 @@ PRIM(get_X2d_print_X2d_version) unsigned long addr = TOS.u; POP; get_print_banner(addr); MIRP + +PRIM(check_X2d_and_X2d_patch_X2d_sc1) + unsigned long end = TOS.u; POP; + unsigned long start = TOS.u; POP; + unsigned long patch_ins = TOS.u; POP; + + patch_broken_sc1((void*)start, (void*)end, (void*)patch_ins); +MIRP diff --git a/lib/libhvcall/hvcall.in b/lib/libhvcall/hvcall.in index 1f9ed6b..e99d6d1 100644 --- a/lib/libhvcall/hvcall.in +++ b/lib/libhvcall/hvcall.in @@ -17,6 +17,7 @@ cod(hv-reg-crq) cod(hv-free-crq) cod(hv-send-crq) cod(hv-put-tce) +cod(check-and-patch-sc1) cod(RB@) cod(RB!) diff --git a/lib/libhvcall/libhvcall.h b/lib/libhvcall/libhvcall.h index 03813cd..6356a62 100644 --- a/lib/libhvcall/libhvcall.h +++ b/lib/libhvcall/libhvcall.h @@ -2,6 +2,7 @@ #define __LIBHVCALL_H__ #define H_SUCCESS 0 +#define H_HARDWARE -1 #define H_GET_TCE 0x1C #define H_PUT_TCE 0x20 @@ -94,6 +95,7 @@ extern unsigned long hv_logical_ci_store(unsigned long size, unsigned long addr, extern unsigned long hv_logical_memop(unsigned long dst, unsigned long src, unsigned long esize, unsigned long count, unsigned long op); +extern int patch_broken_sc1(void *start, void *end, uint32_t *test_ins); extern unsigned long hv_cas(unsigned long vec, unsigned long buf, unsigned long size); diff --git a/slof/ofw.S b/slof/ofw.S index b9f78e4..14e1e9d 100644 --- a/slof/ofw.S +++ b/slof/ofw.S @@ -45,5 +45,15 @@ ld r3, 0(r3) std r3, XVECT_M_HANDLER(0) +#ifdef BROKEN_SC1 + /* Patch potentially broken sc 1 instructions */ + lis r3, _slof_text@h + ori r3, r3, _slof_text@l + lis r4, _slof_text_end@h + ori r4, r4, _slof_text_end@l + li r5, 0 + bl .patch_broken_sc1 +#endif + /* GO! */ ba 0x100