Stubs for shared libraries on amd64-linux.elf and arm64-linux.elf

modified:   stub/Makefile
	modified:   stub/src/amd64-linux.elf-so_entry.S
	modified:   stub/src/amd64-linux.elf-so_fold.S
	modified:   stub/src/arm64-linux.elf-so_entry.S
	modified:   stub/src/arm64-linux.elf-so_fold.S
	modified:   stub/src/upxfd_linux.c
This commit is contained in:
John Reiser 2024-12-10 16:49:35 -08:00 committed by Markus F.X.J. Oberhumer
parent baee96347d
commit e2f1e6697a
6 changed files with 214 additions and 86 deletions

View File

@ -429,10 +429,19 @@ amd64-linux.elf-entry.h: $(srcdir)/src/$$T.S
$(call tc,f-embed_objinfo,tmp/$T.bin)
$(call tc,bin2h) tmp/$T.bin $@
amd64-linux.elf-so_entry.h: $(srcdir)/src/$$T.S
@echo; echo TARGET=$@ PATH=$(PATH); echo
$(call tc,gcc) -c -x assembler-with-cpp $< -o tmp/$T.bin
$(call tc,f-embed_objinfo,tmp/$T.bin)
amd64-linux.elf-so_entry.h : $(srcdir)/src/amd64-linux.elf-so_entry.lds \
$(srcdir)/src/$$T.S \
tmp/amd64-linux.elf-help_umf.o \
tmp/amd64-linux.elf-upxfd_android.o \
tmp/amd64-linux.elf-upxfd_linux.o
$(call tc,gcc) -c $(srcdir)/src/$T.S -o tmp/$T.o
multiarch-ld-2.17 -r -Map tmp/$T.map -o tmp/$T.bin \
-T src/amd64-linux.elf-so_entry.lds \
tmp/$T.o \
tmp/amd64-linux.elf-help_umf.o \
tmp/amd64-linux.elf-upxfd_android.o \
tmp/amd64-linux.elf-upxfd_linux.o
$(call tc,f-embed_objinfo_without_xstrip,tmp/$T.bin)
$(call tc,bin2h) tmp/$T.bin $@
amd64-linux.elf-fold.h : $(srcdir)/src/$$T.lds \
@ -449,6 +458,7 @@ amd64-linux.elf-fold.h : $(srcdir)/src/$$T.lds \
amd64-linux.elf-so_fold.h : $(srcdir)/src/$$T.lds \
tmp/$$T.o \
tmp/amd64-expand.o \
tmp/amd64-linux.elf-help_umf.o \
tmp/amd64-linux.elf-upxfd_linux.o \
tmp/amd64-linux.elf-so_main.o
@echo; echo TARGET=$@ PATH=$(PATH); echo
@ -467,6 +477,19 @@ tmp/amd64-linux.elf-so_fold.o : $(srcdir)/src/$$T.S
tmp/amd64-expand.o: $(srcdir)/src/$$T.S
$(call tc,gcc) -c $< -o $@
tmp/amd64-linux.elf-help_umf.o: $(srcdir)/src/$$T.S
$(call tc,gcc) -c $< -o $@
tmp/amd64-linux.elf-upxfd_android.o : $(srcdir)/src/upxfd_android.c
$(call tc,gcc) -c -O $< -o $@
$(call tc,objcopy) --rename-section .text=UMF_ANDROID -R .comment -R .data -R .bss -R .note.GNU-stack $@
$(call tc,objdump) -Dr $(tc_objdump_disasm_options) $@ | $(RTRIM) > $@.disasm
tmp/amd64-linux.elf-upxfd_linux.o : $(srcdir)/src/upxfd_linux.c
$(call tc,gcc) -c -O $< -o $@
$(call tc,objcopy) --rename-section .text=UMF_LINUX -R .comment -R .data -R .bss -R .note.GNU-stack $@
$(call tc,objdump) -Dr $(tc_objdump_disasm_options) $@ | $(RTRIM) > $@.disasm
tmp/amd64-linux.elf-fold.o : $(srcdir)/src/$$T.S
$(call tc,gcc) -c $< -o $@
$(call tc,f-objstrip,$@)
@ -810,7 +833,8 @@ tc.arm64-linux.elf.gcc = arm64-linux-gcc-4.9.2 -nostdinc -DDAISY_CHAIN=1 -MMD -
tc.arm64-linux.elf.gcc += -fno-exceptions -fno-asynchronous-unwind-tables
tc.arm64-linux.elf.gcc += -Wall -W -Wcast-align -Wcast-qual -Wstrict-prototypes -Wwrite-strings -Werror
+tc.arm64-expand.gcc = $(tc.arm64-linux.elf.gcc)
tc.arm64-expand.gcc = $(tc.arm64-linux.elf.gcc)
tc.arm64-linux.elf-upxfd_linux.gcc = $(tc.arm64-linux.elf.gcc)
tc.arm64-linux.elf-fold.ld = arm64-linux-ld-2.25
tc.arm64-linux.elf-so_fold.ld = arm64-linux-ld-2.25
@ -821,6 +845,7 @@ tc.arm64-linux.elf-fold.objcopy = arm64-linux-objcopy-2.25 -F elf64-littlea
tc.arm64-linux.elf-so_fold.objcopy = arm64-linux-objcopy-2.25 -F elf64-littleaarch64
tc.arm64-linux.elf-main2.objcopy = arm64-linux-objcopy-2.25 -F elf64-littleaarch64
tc.arm64-linux.elf-so_main.objcopy = arm64-linux-objcopy-2.25 -F elf64-littleaarch64
tc.arm64-linux.elf-upxfd_linux.objcopy = arm64-linux-objcopy-2.25 -F elf64-littleaarch64
tc.arm64-linux.elf-entry.objdump = arm64-linux-objdump-2.25
tc.arm64-linux.elf-so_entry.objdump = arm64-linux-objdump-2.25
@ -828,6 +853,7 @@ tc.arm64-linux.elf-fold.objdump = arm64-linux-objdump-2.25
tc.arm64-linux.elf-so_fold.objdump = arm64-linux-objdump-2.25
tc.arm64-linux.elf-main2.objdump = arm64-linux-objdump-2.25
tc.arm64-linux.elf-so_main.objdump = arm64-linux-objdump-2.25
tc.arm64-linux.elf-upxfd_linux.objdump = arm64-linux-objdump-2.25
tc.arm64-expand.objdump = arm64-linux-objdump-2.25
arm64-linux.elf-entry.h : $(srcdir)/src/$$T.S
@ -849,8 +875,17 @@ arm64-linux.elf-fold.h : $(srcdir)/src/$$T.lds \
$(call tc,f-embed_objinfo_without_xstrip_keep_dot_text,tmp/$T.bin)
$(call tc,bin2h) tmp/$T.bin $@
arm64-linux.elf-so_fold.h : tmp/$$T.o tmp/arm64-linux.elf-so_main.o $(srcdir)/src/$$T.lds tmp/arm64-expand.o
$(call tc,ld) -r -T $(srcdir)/src/$T.lds -Map tmp/$T.map $(filter %.o,$^) -o tmp/$T.bin
arm64-linux.elf-so_fold.h : $(srcdir)/src/$$T.lds \
tmp/$$T.o \
tmp/arm64-expand.o \
tmp/arm64-linux.elf-help_umf.o \
tmp/arm64-linux.elf-upxfd_linux.o \
tmp/arm64-linux.elf-so_main.o
@echo; echo TARGET=$@ PATH=$(PATH); echo
# FIXME: multiarch-ld-2.18 creates a huge file here, so use 2.17
$(call tc,ld) -r -T $(srcdir)/src/$T.lds -Map tmp/$T.map $(filter %.o,$^) -o tmp/$T.bin
# multiarch-ld-2.17 -r -T $(srcdir)/src/$T.lds -Map tmp/$T.map $(filter %.o,$^) -o tmp/$T.bin
ls -l tmp/$T.bin
$(call tc,f-embed_objinfo_without_xstrip,tmp/$T.bin)
$(call tc,bin2h) tmp/$T.bin $@
@ -867,7 +902,10 @@ tmp/arm64-expand.o: $(srcdir)/src/$$T.S
$(call tc,gcc) -c $< -o $@
arm64-linux-objdump-2.25 -Dr $(tc_objdump_disasm_options) tmp/$T.o | $(RTRIM) > tmp/$T.o.disasm
tmp/amd64-linux.elf-upxfd_linux.o : $(srcdir)/src/upxfd_linux.c
tmp/arm64-linux.elf-help_umf.o: $(srcdir)/src/$$T.S
$(call tc,gcc) -c $< -o $@
tmp/arm64-linux.elf-upxfd_linux.o : $(srcdir)/src/upxfd_linux.c
$(call tc,gcc) -c -O $< -o $@
$(call tc,objcopy) --rename-section .text=UMF_LINUX -R .comment -R .data -R .bss -R .note.GNU-stack $@
$(call tc,objdump) -Dr $(tc_objdump_disasm_options) $@ | $(RTRIM) > $@.disasm

View File

@ -54,15 +54,16 @@ MAP_PRIVATE= 2
MAP_FIXED= 0x10
MAP_ANONYMOUS= 0x20
__NR_close= 3
__NR_exit= 60
__NR_mmap= 9 // 64-bit mode only! /usr/include/asm/unistd_64.h
__NR_mprotect= 10
__NR_munmap= 11
__NR_memfd_create= 0x13f // 319
MFD_EXEC= 0x10
__NR_openat= 257
__NR_read= 0
__NR_write= 1
__NR_close= 3
__NR_exit= 60
PAGE_SHIFT= 12
PAGE_MASK= (~0<<PAGE_SHIFT)
@ -92,7 +93,50 @@ _start:
push %rbx // MATCH_03 saved register
push %rbp // MATCH_02 saved register
mov %rsp,%rbp
call L70 // MATCH_08 push $&getbit
lea fold_info(%rip),%rbx
lea _start - 4*4(%rip),%rax // &so_info
push %rax // MATCH_14 &so_info
// cmpw $M_NRV2B_LE32|(0<<8),b_method(%rbx); je 0f; hlt; 0: // check method and filter bytes
// De-compress folded code onto the stack
movl /*sz_unc*/(%rbx),%eax; push %rax // MATCH_40 len unfolded code
sub %rax,%rsp; and $-2*NBPW,%rsp
AT_PAGESZ= 6
O_RDONLY= 0
#define fd_psa r9l
#define end_aux r8
sub %arg3l,%arg3l // 0 == O_RDONLY
lea str_psa(%rip),%arg2
sub %arg1l,%arg1l // 0 == impostor for FD_CWD
push $__NR_openat; call do_sys; push %rax // fd_psa
mov $512,%arg3l // len
lea 1*NBPW(%rsp),%arg2 // buffer
pop %arg1; push %arg1 // fd_psa
push $__NR_read; call do_sys; lea (%rax,%arg2),%end_aux
pop %arg1 // fd_psa
push $__NR_close; call do_sys
lea 1*NBPW(%rsp),%rsi // buffer
0:
lodsq; xchg %rax,%rcx // tag
lodsq // value
cmp $AT_PAGESZ,%rcx; je 1f
cmp %end_aux,%rsi; jl 0b; mov $1<<12,%eax // default 4KiB
1:
neg %rax; push %rax // MATCH_61 PAGE_MASK
// This is nrv2b_d32, inlined and optimized for small space (about 160 bytes).
// The task is to de-compress the folded pieces for shared library init:
// the de-compressor(s) of the PT_LOAD pieces, and the C-code supervisor
// which adjusts the placement and mapping of the address space.
// The output length is a couple KB for NRV, a few KB for Lzma, 64KB for Zstd.
// This is motivated by the possibility of using multiple de-compressors
// depending on the characteristics of each PT_LOAD, and by the increased size
// and compressability of C-coded de-compressors for Lzma and Zstd
// in contrast to the simple and small assembly-coded NRV.
/* Working registers */
#define off %eax /* XXX: 2GB */
#define bits %ebx
@ -105,40 +149,8 @@ _start:
#define jnextb0 GETBIT; jnc
#define jnextb1 GETBIT; jc
/* rotate next bit into bottom bit of reg */
#define getnextb(reg) GETBIT; adcl reg,reg
getbit:
endbr64
addl bits,bits; jz refill // Carry= next bit
rep; ret // rep: stop instruction pipeline (spend 1 byte for speed)
refill:
movl (%rsi),bits; subq $-4,%rsi // next 32 bits; set Carry
adcl bits,bits // LSB= 1 (CarryIn); CarryOut= next bit
ret // infrequent (1/32)
L20: // %rdx == &getbit
pop %rbx // MATCH_09 &fold_info
lea _start - 4*4 - getbit(%rdx),%rax // &so_info
push %rax // MATCH_14 &so_info
// cmpw $M_NRV2B_LE32|(0<<8),b_method(%rbx); je 0f; hlt; 0: // check method and filter bytes
// De-compress folded code onto the stack
movl /*sz_unc*/(%rbx),%eax; push %rax // MATCH_40 len unfolded code
sub %rax,%rsp; and $-2*NBPW,%rsp
// This is nrv2b_d32, inlined and optimized for small space (about 160 bytes).
// The task is to de-compress the folded pieces for shared library init:
// the de-compressor(s) of the PT_LOAD pieces, and the C-code supervisor
// which adjusts the placement and mapping of the address space.
// The output length is a couple KB for NRV, a few KB for Lzma, 64KB for Zstd.
// This is motivated by the possibility of using multiple de-compressors
// depending on the characteristics of each PT_LOAD, and by the increased size
// and compressability of C-coded de-compressors for Lzma and Zstd
// in contrast to the simple and small assembly-coded NRV.
push %rsp; pop dst // &unfolded_code
lea getbit(%rip),%rdx
lea 1*NBPW(%rsp),dst // &unfolded_code
push %rbp // MATCH_45
movl sz_cpr(%rbx),len // lsrc
lea sz_b_info(%rbx),src
@ -161,7 +173,19 @@ decompress: // inlined: (uchar const *src, uint len, uchar *dst /*, u32 &ldst,
xor len,len // create loop invariant
push $~0; pop dispq // -1: initial displacement
cld // paranoia
.byte 0xa8 // "testb $... ,%al" ==> "jmp top_n2b"
jmp top_n2b
/* rotate next bit into bottom bit of reg */
#define getnextb(reg) GETBIT; adcl reg,reg
getbit:
endbr64
addl bits,bits; jz refill // Carry= next bit
rep; ret // rep: stop instruction pipeline (spend 1 byte for speed)
refill: // infrequent (1/32)
movl (%rsi),bits; subq $-4,%rsi // next 32 bits; set Carry
adcl bits,bits // LSB= 1 (CarryIn); CarryOut= next bit
ret
lit_n2b:
movsb // *dst++ = *src++;
top_n2b:
@ -219,7 +243,7 @@ no_memfd: // so try /dev/shm
hlt // FIXME /dev/shm
ok_memfd:
pop %rcx // MATCH_22 discard "upx"
pop /*0*NBPW*/(%rsp) // actual PAGE_MASK replaces compiled-in default
push %rax; pop %arg1 // mfd
push %rsp; pop %arg2 // buffer
push %rax // MATCH_47 save mfd
@ -241,6 +265,7 @@ ok_memfd:
push $__NR_close; call do_sys
// %rsp:
// MATCH_42 ? FIXME
// MATCH_11 ptr unfolded_code; for escape hatch
// MATCH_10 len unfolded code; for escape hatch
// MATCH_14 &so_info
@ -252,6 +277,7 @@ ok_memfd:
// MATCH_07 envp
pop %rax; push %rax // MATCH_11 ptr unfolded code
add $2*NBPW,%rax
jmp *%rax // enter C code
do_sys: // on-stack parameter: hint on error
@ -259,12 +285,12 @@ do_sys: // on-stack parameter: hint on error
cmp $-4096,%rax; jb 0f; int3; 0:
ret $NBPW
str_psa:
.asciz "/proc/self/auxv"
// IDENTSTR goes here
section ELFMAINZ
L70:
pop %rdx // MATCH_08 &getbit
call L20 // MATCH_09 push $&fold_info
fold_info:
// b_info (sz_unc, sz_cpr, method) of folded code (C-language, etc.)

View File

@ -23,6 +23,8 @@ NBPW= 8
// MATCH_07 envp
section SO_HEAD
PAGE_MASK: .quad 0xfffffffffffff000
.quad 0 // ? FIXME
fold:
pop %rbx // MATCH_11 ptr unfolded code
pop %rbp // MATCH_10 len unfolded code
@ -45,6 +47,10 @@ fold:
// pop %arg3 // MATCH_07 envp
// ret // ==> user_DT_INIT
get_page_mask: .globl get_page_mask
mov PAGE_MASK(%rip),%rax
ret
section ptr_NEXT
// pop %rax; call *%rax
// "lea f_exp(%rip)," addressing on x86_64 subsumes the need for code,

View File

@ -53,10 +53,6 @@ MAP_PRIVATE= 2
MAP_FIXED= 0x10
MAP_ANONYMOUS= 0x20
PAGE_SHIFT= 12
PAGE_MASK= (~0<<PAGE_SHIFT)
PAGE_SIZE= -PAGE_MASK
// /usr/include/asm-generic/unistd.h
__NR_close = 0x39 // 57
__NR_exit = 0x5d // 93
@ -65,6 +61,7 @@ __NR_mmap = 0xde // 222
__NR_mprotect = 0xe2 // 226
__NR_munmap = 0xd7 // 215
__NR_openat = 0x38 // 56
__NR_read = 0x3f // 63
__NR_write = 0x40 // 64
arg1 .req x0
@ -111,6 +108,30 @@ L70_ret:
sub x0,sp,x0 // alloca
and sp,x0,#-2*NBPW // align stack
psa_fd .req w5
aux_end .req x4
page_m .req x7
AT_PAGESZ= 6
O_RDONLY= 0
mov w2,#O_RDONLY
adr x1,str_psa
mov w0,#0
do_sys __NR_openat; mov psa_fd,w0
mov x1,sp // buffer
mov x2,#512 // len
do_sys __NR_read; add aux_end,x1,x0 // end
mov w0,psa_fd; do_sys __NR_close
.unreq psa_fd
0:
ldr s0,[x1,#NBPW] // value
ldr x2,[x1],#2*NBPW // tag
cmp x2,#AT_PAGESZ; beq 1f
cmp x1,aux_end; blo 0b; mov x0,#1<<12 // default 4KiB
.unreq aux_end
1:
sub page_m,xzr,x0 // PAGE_MASK
add arg4,old_sp,#F_LENU // &dstlen
mov arg3,sp // dst for decompress
ldr arg2w,[foldi,#sz_cpr] // srclen
@ -145,11 +166,16 @@ L70_ret:
adr arg1,_start - 4*4 // &SO_INFO
add arg2,sp,#F_ARGC // &{argc, argv, envp}
str page_m, [u_ptr,#0]
add u_ptr,u_ptr,2*NBPW
br u_ptr
.unreq u_ptr
str_upx:
.asciz "upx"
str_psa:
.asciz "/proc/self/auxv"
.balign 4
//%esp:
// MATCH_04 ptr unfolded_code
@ -157,8 +183,15 @@ str_upx:
// MATCH_00 argc,argv,envp,lr(_start)
f_decompress:
// nrv2b code is hard-wired here
#define NO_METHOD_CHECK 1
#undef NO_SYNC_CACHE
// only one de-compressor; build 'eof' return
#undef DAISY_CHAIN
// use of mmap() forces implcit cache sync
#define NO_SYNC_CACHE 1
off .req w5
#include "arch/arm64/v8/nrv2b_d32.S"

View File

@ -43,6 +43,49 @@ MAP_FIXED= 0x10
#define call bl
section SO_HEAD
ZERO:
PAGE_MASK: .quad 0xfffffffffffff000 // default
upxfn_path:.quad 0 // displacement from "zero"
fold_begin: .globl fold
b fold
get_page_mask: .globl get_page_mask
ldr x0,PAGE_MASK
ret
Punmap: .globl Punmap
ldr x8,PAGE_MASK
and x8,x0,x8
sub x0,x0,x8
add x1,x1,x8
munmap: .globl munmap
do_sys __NR_munmap; ret
// Sometimes Linux enforces page-aligned address
Pprotect: .globl Pprotect
mprotect: .globl mprotect
ldr x8,PAGE_MASK
and x8,x0,x8
sub x0,x0,x8
add x1,x1,x8
do_sys __NR_mprotect; ret
mmap_privanon: .globl mmap_privanon
mov w4,#MAP_PRIVATE|MAP_ANONYMOUS
orr w3,w3,w4 // combine with input (such as MAP_FIXED)
mov w4,#-1 // fd= -1
mov x5,#0 // offset= 0
// FALL THROUGH to mmap
Pmap: .globl Pmap
ldr x8,PAGE_MASK
and x8,x0,x8
sub x0,x0,x8
add x1,x1,x8
mmap: .globl mmap
do_sys __NR_mmap; ret
fold: // enter here (x0= &so_info; x1= &{argc,argv,envp,lr}
//%esp:
// MATCH_04 ptr unfolded_code
@ -124,6 +167,14 @@ Pwrite: .globl Pwrite
write:
do_sys __NR_write; ret
.globl open
FD_CWD= -100
open:
mov w3,w2
mov w2,w1
mov x1,x0
mov w0,#FD_CWD
// fall into 'openat'
.globl openat
openat:
do_sys __NR_openat; ret
@ -148,43 +199,14 @@ brk:
readlink:
do_sys __NR_readlink; ret
Punmap: .globl Punmap
and x8,x0,#-1+ (1<<12) // FIXME: variable PAGE_MASK
sub x0,x0,x8
add x1,x1,x8
munmap: .globl munmap
do_sys __NR_munmap; ret
msync: .globl msync
do_sys __NR_msync; ret
// Sometimes Linux enforces page-aligned address
Pprotect: .globl Pprotect
mprotect: .globl mprotect
and x8,x0,#-1+ (1<<12) // FIXME: variable PAGE_MASK
sub x0,x0,x8
add x1,x1,x8
do_sys __NR_mprotect; ret
.globl __sync_cache_range
__sync_cache_range: // (void *lo, void *hi)
#include "arm64-sync-cache-range.S"
ret
mmap_privanon: .globl mmap_privanon
mov w4,#MAP_PRIVATE|MAP_ANONYMOUS
orr w3,w3,w4 // combine with input (such as MAP_FIXED)
mov w4,#-1 // fd= -1
mov x5,#0 // offset= 0
// FALL THROUGH to mmap
Pmap: .globl Pmap
and x8,x0,#-1+ (1<<12) // FIXME: variable PAGE_MASK
sub x0,x0,x8
add x1,x1,x8
mmap: .globl mmap
do_sys __NR_mmap; ret
get_sys_munmap: .globl get_sys_munmap // r0= system call instruction
#if defined(ARMEL_DARWIN) /*{*/
ldr w0,4*1 + munmap

View File

@ -100,6 +100,8 @@ extern int open(char const *pathname, int flags, unsigned mode);
extern int memfd_create(char const *, unsigned);
extern int ftruncate(int, size_t);
extern unsigned long get_page_mask(void);
// Implementation for Linux-native, where memfd_create
// (or /dev/shm) works. Saves space in contrast to
// upxfd_android (or Android emulator), which must
@ -132,7 +134,8 @@ unsigned long upx_mmap_and_fd_linux( // returns (mapped_addr | (1+ fd))
}
ptr = mmap(ptr, datlen, PROT_READ|PROT_WRITE,
(ptr ? MAP_FIXED : 0)|MAP_SHARED, fd, 0);
if (PAGE_MASK <= (unsigned long)ptr) {
unsigned long const page_mask = get_page_mask();
if (page_mask <= (unsigned long)ptr) {
return (unsigned long)ptr; // errno
}
return (unsigned long)ptr + (1+ (unsigned)fd);