fdt: Pass the resulting device tree to QEMU

This creates flatten device tree and passes it to QEMU via a custom
hypercall right before jumping to RTAS.

This preloads strings with 40 property names from CPU and PCI device nodes
and the strings lookup only searches within these.

Test results on a guest with 256 CPUs and 256 virtual Intel E1000 devices
running on a POWER8 box:
FDTsize=366024 Strings=15888 Struct=350080 Reused str=12457 242 ms

A simple guest (one CPU, no PCI) with this patch as is:
FDTsize=15940 Strings=3148 Struct=12736 Reused str=84 7 ms

While we are here, fix the version handling in fdt-init. It only matters
a little for the fdt-debug==1 case though.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v6:
* fix memory sizes for free-mem
* store correct chosen-cpu to the header (used to be just 0)
* fdt-skip-string uses zcount now and works 30% faster
* moved to a new file - fdt-fl.fs

v5:
* applied latest comments from Segher
* s/fdt-property/fdt-copy-property/, s/fdt-properties/fdt-copy-properties/
* reduced the temporary buffers to 1MB each as the guest uses 1MB in total
anyway
* do not pass root phandle to fdt-flatten-tree, it fetches it from
device-tree itself
* reworked fdt-copy-properties to use for-all-words proposed by Segher

v4:
* reworked fdt-properties, works lot faster
* do not store "name" properties as nodes have names already

v3:
* fixed stack handling after hcall returned
* fixed format versions in both rendering and parsing paths
* rebased on top of removed unused hvcalls
* renamed used variables to have fdtfl- prefixes as there are already
some for parsing the initial dt

v2:
* fixed comments from review
* added strings cache
* changed last_compat_vers from 0x17 to 0x16 as suggested by dwg

---

I tested the blob by storing it from QEMU to a file and decompiling it.
This commit is contained in:
Alexey Kardashevskiy 2017-10-19 14:58:30 +11:00
parent 7b61ea3e5c
commit e6fc84652c
6 changed files with 282 additions and 1 deletions

View File

@ -119,6 +119,8 @@ check-boot-menu
380 cp 380 cp
#include "fdt-fl.fs"
\ Grab rtas from qemu \ Grab rtas from qemu
#include "rtas.fs" #include "rtas.fs"

266
board-qemu/slof/fdt-fl.fs Normal file
View File

@ -0,0 +1,266 @@
\ *****************************************************************************
\ * Copyright (c) 2017 IBM Corporation
\ * All rights reserved.
\ * This program and the accompanying materials
\ * are made available under the terms of the BSD License
\ * which accompanies this distribution, and is available at
\ * http://www.opensource.org/licenses/bsd-license.php
\ ****************************************************************************/
0 VALUE fdtfl-debug
VARIABLE fdtfl-struct
VARIABLE fdtfl-struct-here
VARIABLE fdtfl-strings
VARIABLE fdtfl-strings-cache
VARIABLE fdtfl-strings-here
VARIABLE fdtfl-strings-reused \ debug only
VARIABLE fdlfl-ms \ debug only
: fdt-skip-string ( cur -- cur ) zcount + char+ 4 #aligned ;
: zstring= ( str len zstr -- flag )
2dup + c@ 0<> IF
3drop false
EXIT
THEN
swap comp 0=
;
: fdt-find-string ( name namelen -- nameoff true | false )
fdtfl-strings @
BEGIN
dup fdtfl-strings-cache @ <
WHILE
3dup zstring= IF
nip nip ( curstr )
fdtfl-strings @ -
true
EXIT
THEN
fdt-skip-string
REPEAT
3drop
false
;
: fdt-str-allot ( len -- ) fdtfl-strings-here @ + to fdtfl-strings-here ;
: fdt-str-c, ( char -- ) fdtfl-strings-here @ 1 fdt-str-allot c! ;
: fdt-str-align ( -- )
fdtfl-strings-here @
dup dup 4 #aligned swap - ( here bytes-to-erase )
dup -rot
erase
fdt-str-allot
;
: fdt-str-bytes, ( data len -- ) fdtfl-strings-here @ over fdt-str-allot swap move ;
: fdt-str-ztr, ( str len -- ) fdt-str-bytes, 0 fdt-str-c, ;
: fdt-add-string ( name namelen -- nameoff )
fdtfl-strings-here @ -rot
fdt-str-ztr,
fdt-str-align
fdtfl-strings @ -
;
: fdt-get-string ( name namelen -- nameoff )
2dup fdt-find-string IF
-rot 2drop
fdtfl-debug IF
1 fdtfl-strings-reused +!
THEN
EXIT
THEN
fdt-add-string
;
: fdt-allot ( len -- ) fdtfl-struct-here @ + to fdtfl-struct-here ;
: fdt-c, ( char -- ) fdtfl-struct-here @ 1 fdt-allot c! ;
: fdt-align ( -- )
fdtfl-struct-here @
dup dup 4 #aligned swap - ( here bytes-to-erase )
dup -rot
erase
fdt-allot
;
: fdt-bytes, ( data len -- ) fdtfl-struct-here @ over fdt-allot swap move ;
: fdt-ztr, ( str len -- ) fdt-bytes, 0 fdt-c, ;
: fdt-l, ( token -- ) fdtfl-struct-here @ l! /l fdt-allot ;
: fdt-begin-node ( phandle -- )
OF_DT_BEGIN_NODE fdt-l,
dup device-tree @ = IF drop s" " ELSE node>qname THEN
fdt-ztr,
fdt-align
;
: fdt-end-node ( -- ) OF_DT_END_NODE fdt-l, ;
: fdt-prop ( prop len name namelen -- )
OF_DT_PROP fdt-l,
\ get string offset
fdt-get-string ( prop len nameoff )
\ store len and nameoff
over fdt-l,
fdt-l, ( prop len )
\ now store the bytes
fdt-bytes,
fdt-align
;
: fdt-end ( -- ) OF_DT_END fdt-l, ;
: fdt-copy-property ( link -- )
dup link> execute
rot
link>name name>string
2dup s" name" str= IF 4drop EXIT THEN \ skipping useless "name"
fdt-prop
;
: for-all-words ( wid xt -- ) \ xt has sig ( lfa -- )
>r
cell+ @ BEGIN dup WHILE dup r@ execute @ REPEAT
r> 2drop
;
: fdt-copy-properties ( phandle -- )
dup encode-int s" phandle" fdt-prop
node>properties @
['] fdt-copy-property for-all-words
;
: fdt-copy-node ( node -- )
fdtfl-debug 1 > IF dup node>path type cr THEN
dup fdt-begin-node
dup fdt-copy-properties
child BEGIN dup WHILE dup recurse peer REPEAT
drop
fdt-end-node
;
: fdtfl-strings-preload ( -- )
s" reg" fdt-add-string drop
s" status" fdt-add-string drop
s" 64-bit" fdt-add-string drop
s" phandle" fdt-add-string drop
s" ibm,vmx" fdt-add-string drop
s" ibm,dfp" fdt-add-string drop
s" slb-size" fdt-add-string drop
s" ibm,purr" fdt-add-string drop
s" vendor-id" fdt-add-string drop
s" device-id" fdt-add-string drop
s" min-grant" fdt-add-string drop
s" class-code" fdt-add-string drop
s" compatible" fdt-add-string drop
s" interrupts" fdt-add-string drop
s" cpu-version" fdt-add-string drop
s" #size-cells" fdt-add-string drop
s" ibm,req#msi" fdt-add-string drop
s" revision-id" fdt-add-string drop
s" device_type" fdt-add-string drop
s" max-latency" fdt-add-string drop
s" ibm,chip-id" fdt-add-string drop
s" ibm,pft-size" fdt-add-string drop
s" ibm,slb-size" fdt-add-string drop
s" devsel-speed" fdt-add-string drop
s" ibm,loc-code" fdt-add-string drop
s" subsystem-id" fdt-add-string drop
s" d-cache-size" fdt-add-string drop
s" i-cache-size" fdt-add-string drop
s" #address-cells" fdt-add-string drop
s" clock-frequency" fdt-add-string drop
s" cache-line-size" fdt-add-string drop
s" ibm,pa-features" fdt-add-string drop
s" ibm,my-drc-index" fdt-add-string drop
s" d-cache-line-size" fdt-add-string drop
s" i-cache-line-size" fdt-add-string drop
s" assigned-addresses" fdt-add-string drop
s" d-cache-block-size" fdt-add-string drop
s" i-cache-block-size" fdt-add-string drop
s" timebase-frequency" fdt-add-string drop
s" subsystem-vendor-id" fdt-add-string drop
s" ibm,segment-page-sizes" fdt-add-string drop
s" ibm,ppc-interrupt-server#s" fdt-add-string drop
s" ibm,processor-segment-sizes" fdt-add-string drop
s" ibm,ppc-interrupt-gserver#s" fdt-add-string drop
;
: fdt-append-blob ( bytes cur blob -- cur )
3dup -rot swap move
drop +
;
: fdt-flatten-tree ( -- tree )
100000 alloc-mem dup fdtfl-struct-here ! fdtfl-struct !
100000 alloc-mem dup fdtfl-strings-here ! fdtfl-strings !
fdtfl-debug IF
0 fdtfl-strings-reused !
milliseconds fdlfl-ms !
THEN
\ Preload strings cache
fdtfl-strings-preload
fdtfl-strings-here @ fdtfl-strings-cache !
\ Render the blobs
device-tree @ fdt-copy-node
fdt-end
\ Calculate strings and struct sizes
fdtfl-struct-here @ fdtfl-struct @ -
fdtfl-strings-here @ fdtfl-strings @ - ( struct-len strings-len )
2dup + /fdth +
10 + \ Reserve 16 bytes for an empty reserved block
fdtfl-debug IF
3dup
." FDTsize=" .d ." Strings=" .d ." Struct=" .d
." Reused str=" fdtfl-strings-reused @ .d
milliseconds fdlfl-ms @ - .d ." ms"
cr
THEN
\ Allocate flatten DT blob
dup alloc-mem ( struct-len strings-len total-len fdt )
>r ( struct-len strings-len total-len r: fdt )
\ Write header
OF_DT_HEADER r@ >fdth_magic l!
dup r@ >fdth_tsize l!
/fdth 10 + 2 pick + r@ >fdth_struct_off l!
/fdth 10 + r@ >fdth_string_off l!
/fdth r@ >fdth_rsvmap_off l!
11 r@ >fdth_version l!
10 r@ >fdth_compat_vers l!
chosen-cpu-unit r@ >fdth_boot_cpu l!
over r@ >fdth_string_size l!
2 pick r@ >fdth_struct_size l!
( struct-len strings-len total-len r: fdt )
drop ( struct-len strings-len r: fdt )
r@ /fdth + ( struct-len strings-len cur r: fdt )
\ Write the reserved entry
0 over ! cell+ 0 over ! cell+
\ Write strings and struct blobs
fdtfl-strings @ fdt-append-blob
fdtfl-struct @ fdt-append-blob
drop
\ Free temporary blobs
fdtfl-struct @ 100000 free-mem
fdtfl-strings @ 100000 free-mem
\ Return fdt
r>
;
: fdt-flatten-tree-free ( tree )
dup >fdth_tsize l@ free-mem
;

View File

@ -98,6 +98,12 @@ find-qemu-rtas
; ;
: rtas-quiesce ( -- ) : rtas-quiesce ( -- )
fdt-flatten-tree
dup hv-update-dt ?dup IF
\ Ignore hcall not implemented error, print error otherwise
dup -2 <> IF ." HV-UPDATE-DT error: " . cr ELSE drop THEN
THEN
fdt-flatten-tree-free
" quiesce" rtas-get-token rtas-cb rtas>token l! " quiesce" rtas-get-token rtas-cb rtas>token l!
0 rtas-cb rtas>nargs l! 0 rtas-cb rtas>nargs l!
0 rtas-cb rtas>nret l! 0 rtas-cb rtas>nret l!

View File

@ -123,3 +123,8 @@ PRIM(check_X2d_and_X2d_patch_X2d_sc1)
patch_broken_sc1((void*)start, (void*)end, (void*)patch_ins); patch_broken_sc1((void*)start, (void*)end, (void*)patch_ins);
MIRP MIRP
PRIM(hv_X2d_update_X2d_dt)
unsigned long dt = TOS.u;
TOS.u = hv_generic(KVMPPC_H_UPDATE_DT, dt);
MIRP

View File

@ -30,4 +30,5 @@ cod(RX!)
cod(hv-logical-memop) cod(hv-logical-memop)
cod(hv-cas) cod(hv-cas)
cod(hv-update-dt)
cod(get-print-version) cod(get-print-version)

View File

@ -24,7 +24,8 @@
#define KVMPPC_H_LOGICAL_MEMOP (KVMPPC_HCALL_BASE + 0x1) #define KVMPPC_H_LOGICAL_MEMOP (KVMPPC_HCALL_BASE + 0x1)
/* Client Architecture support */ /* Client Architecture support */
#define KVMPPC_H_CAS (KVMPPC_HCALL_BASE + 0x2) #define KVMPPC_H_CAS (KVMPPC_HCALL_BASE + 0x2)
#define KVMPPC_HCALL_MAX KVMPPC_H_CAS #define KVMPPC_H_UPDATE_DT (KVMPPC_HCALL_BASE + 0x3)
#define KVMPPC_HCALL_MAX KVMPPC_H_UPDATE_DT
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__