Tool for snipping loader_commands in Mach-O on Apple M1 (aarch64) on BigSur.

experimental!
https://github.com/upx/upx/issues/424
	new file:   macho-snip/Makefile
	new file:   macho-snip/macho-snip.c
	new file:   macho-snip/udf.s
This commit is contained in:
John Reiser 2022-02-28 19:53:25 -08:00 committed by Markus F.X.J. Oberhumer
parent e5aeea9ed2
commit 972c76eb42
3 changed files with 298 additions and 0 deletions

View File

@ -0,0 +1,21 @@
# Required: Apple Command Line Developer Tools: "xcode-select --install"
# Downloads a few megabytes. Installs in a couple minutes.
# Required: Apple Xcode Developer Tools from Apple App Store.
# Must accept the license: Launch Xcode app,
# or run "xcodebuild -license" from Terminal after install.
# Downloads about 13GB (~20 minutes at 1Gb/s). Install takes
# ~45 minutes more on SSD, and ~40GB of space in the filesystem.
CFLAGS += -g
macho-snip: macho-snip.o udf
$(CC) $(CFLAGS) -o macho-snip macho-snip.o
udf.o: udf.s
gcc -c $<
# shortest main program; used as an example
udf: udf.o
gcc --verbose -nostartfiles -o udf $<
codesign --remove-signature udf
strip -N udf # removes all symbols

View File

@ -0,0 +1,273 @@
// SPDX-License-Identifier: GPL-2.0+
// Copyright 2022 BitWagon Software LLC. All rights reserved.
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
// /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/mach-o/loader.h
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
struct Cmd_names {
unsigned char val;
char name[23];
} const cmd_names[] = {
[ 0x1] = { 0x1, "LC_SEGMENT"},
[ 0x2] = { 0x2, "LC_SYMTAB"},
[ 0x4] = { 0x4, "LC_THREAD"},
[ 0x5] = { 0x5, "LC_UNIXTHREAD"},
[ 0xb] = { 0xb, "LC_DYSYMTAB"},
[ 0xc] = { 0xc, "LC_LOAD_DYLIB"},
[ 0xd] = { 0xd, "LC_ID_DYLIB"},
[ 0xe] = { 0xe, "LC_LOAD_DYLINKER"},
[ 0xf] = { 0xf, "LC_ID_DYLINKER"},
[ 0x11] = { 0x11, "LC_ROUTINES"},
[ 0x16] = { 0x16, "LC_TWOLEVEL_HINTS"},
[ (0x18 /*|LC_REQ_DYLD*/ )] = {0x18, "LC_LOAD_WEAK_DYLIB"},
[ 0x19] = { 0x19, "LC_SEGMENT_64"},
[ 0x1a] = { 0x1a, "LC_ROUTINES_64"},
[ 0x1b] = { 0x1b, "LC_UUID"},
[ 0x1c] = { 0x1c, "LC_RPATH"},
[ 0x1d] = { 0x1d, "LC_CODE_SIGNATURE"},
[ 0x1e] = { 0x1e, "LC_SEGMENT_SPLIT_INFO"},
[ (0x1f /*|LC_REQ_DYLD*/ )] = { 0x1f, "LC_REEXPORT_DYLIB"},
[ 0x20] = { 0x20, "LC_LAZY_LOAD_DYLIB"},
[ 0x21] = { 0x21, "LC_ENCRYPTION_INFO"},
// [ 0x22] = { 0x22, "LC_DYLD_INFO" // compressed dyld information (10.6.x)
[ (0x22 /*|LC_REQ_DYLD*/ )] = { 0x22, "LC_DYLD_INFO_ONLY"},
[ 0x24] = { 0x24, "LC_VERSION_MIN_MACOSX"},
[ 0x25] = { 0x25, "LC_VERSION_MIN_IPHONEOS"},
[ 0x26] = { 0x26, "LC_FUNCTION_STARTS"},
[ 0x27] = { 0x27, "LC_DYLD_ENVIRONMENT"}, // string as environment variable
[ (0x28 /*|LC_REQ_DYLD*/ )] = { 0x28, "LC_MAIN"},
[ 0x29] = { 0x29, "LC_DATA_IN_CODE"},
[ 0x2a] = { 0x2a, "LC_SOURCE_VERSION"},
[ 0x2B] = { 0x2B, "LC_DYLIB_CODE_SIGN_DRS"},
[ 0x2C] = { 0x2C, "LC_ENCRYPTION_INFO_64"},
[ 0x2F] = { 0x2F, "LC_VERSION_MIN_TVOS"},
[ 0x30] = { 0x30, "LC_VERSION_MIN_WATCHOS"},
[ 0x31] = { 0x31, "LC_NOTE"},
[ 0x32] = { 0x32, "LC_BUILD_VERSION"},
[(0x33 /*|LC_REQ_DYLD*/ )] = {0x33, "LC_DYLD_EXPORTS_TRIE"},
[(0x34 /*|LC_REQ_DYLD*/ )] = {0x34, "LC_DYLD_CHAINED_FIXUPS"},
[(0x35 /*|LC_REQ_DYLD*/ )] = {0x35, "LC_FILESET_ENTRY"},
};
// Remove (cut out, "snip") named loader_commands from a Macho-O file.
// Try to enable success of running "codesign -s - file" afterwards.
// Note that LC_CODE_SIGNATURE should be removed before snipping:
// codesign --remove-signature file
int
main(int argc, char const * /*const*/ *const argv, char const *const *const envp)
{
struct stat st;
int fd;
int err1;
int res2;
int err2;
int prot = PROT_READ | PROT_WRITE;
int flags = MAP_FIXED | MAP_SHARED;
void *const awant = (void *)(0x18L << 28); // above user, below dylibs
char *addr;
if (argc < 3) {
fprintf(stderr, "Usage: macho-snip file loader_cmd...\n");
exit(1);
}
fd = open(argv[1], O_RDWR, 0);
err1 = errno;
if (fd < 0) {
perror(argv[1]);
fprintf(stderr, "Trying readonly...\n");
flags = MAP_FIXED | MAP_PRIVATE;
fd = open(argv[1], O_RDONLY, 0);
}
res2 = fstat(fd, &st);
err2 = errno;
if (fd < 0) {
errno = err1;
perror(argv[1]);
exit(1);
}
if (0!=res2) {
errno = err2;
perror(argv[1]);
exit(1);
}
addr = mmap(awant, st.st_size, prot, flags, fd, 0);
if (awant!=addr) {
perror(argv[1]);
exit(1);
}
fprintf(stderr,"%zd (%#zx) bytes at %p\n", (long)st.st_size, (long)st.st_size, addr);
unsigned long argv_done = 0; // set of bits
struct segment_command_64 *linkedit = 0;
struct mach_header_64 *const mhdr = (struct mach_header_64 *)addr;
unsigned ncmds = mhdr->ncmds;
unsigned headway = mhdr->sizeofcmds;
struct load_command *cmd = (struct load_command *)(1+ mhdr);
for (; ncmds; --ncmds) {
unsigned end_dataoff = 0;
unsigned end_datasize = 0;
struct load_command *cmd_next;
again: ;
fprintf(stderr, "cmd@%p %s %d(%#x)\n",
cmd, cmd_names[cmd->cmd&0xFF].name, cmd->cmd&0xFF, cmd->cmd);
unsigned const cmdsize = cmd->cmdsize;
if (headway < cmdsize ) {
}
else {
headway -= cmdsize;
cmd_next = (struct load_command *)(cmdsize + (void *)cmd);
}
switch (cmd->cmd &~ LC_REQ_DYLD) {
int jargv;
case LC_SEGMENT_64: {
struct segment_command_64 *seg = (struct segment_command_64 *)cmd;
if (!strcmp("__LINKEDIT", seg->segname)) {
linkedit = seg;
}
} break;
case LC_CODE_SIGNATURE: {
fprintf(stderr, "macho-snip: LC_CODE_SIGNATURE not implemented\n");
continue;
struct linkedit_data_command *cmd_LED = (struct linkedit_data_command *)cmd;
if (( cmd_LED->dataoff + cmd_LED->datasize )
== (linkedit->fileoff + linkedit->filesize)) {
linkedit->filesize -= cmd_LED->datasize;
}
memset(addr + linkedit->fileoff, 0, cmdsize);
goto snip;
} break;
//struct nlist_64 {
// union {
// uint32_t n_strx; /* index into the string table */
// } n_un;
// uint8_t n_type; /* type flag, see below */
// uint8_t n_sect; /* section number or NO_SECT */
// uint16_t n_desc; /* see <mach-o/stab.h> */
// uint64_t n_value; /* value of this symbol (or stab offset) */
//};
//
// The string table has an extra entry at the front: " " (one space)
// so that the first actual string has an index of 2.
// See the comment which follows the definition of struct nlist_64 in
// /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/mach-o/nlist.h
//
// The string table must be zero-padded to a multiple of 8 bytes.
// codesign requires that the string table must be last in __LINKEDIT:
// (__LINKEDIT.filesize + .fileoff) == (LC_SYMTAB.strsize + .stroff)
// [The LC_CODE_SIGNATURE.dataoff also aligns itself to (0 mod 16),
// which is peculiar because the data array of sha256 values (one 32-byte
// value per 4KB page) has offset (4 mod 16) instead of (0 mod 16).]
case LC_SYMTAB: {
struct symtab_command *symcmd = (struct symtab_command *)cmd;
if (( symcmd->strsize + symcmd->stroff)
!= (linkedit->filesize + linkedit->fileoff)) {
fprintf(stderr,"macho-snip: bad LC_SYMTAB string table\n");
}
// find beginning of last name string
unsigned j;
struct nlist_64 *const symp = (struct nlist_64 *)(symcmd->symoff + addr);
char *const namp0 = symcmd->stroff + addr;
char *namp;
for (j=0, namp = namp0; j < symcmd->nsyms; ++j) {
namp += 1+ strlen(namp);
if (symp[j].n_un.n_strx != (namp - namp0)) {
fprintf(stderr, "macho-snip: bad .n_strx\n");
}
}
unsigned pad = 7& -(unsigned long)namp;
memset(namp, 0, pad); namp += pad; // zero pad to (0 mod 8)
symcmd->strsize = namp - namp0;
linkedit->filesize = (namp - addr) - linkedit->fileoff;
symcmd->nsyms -= 1; // lop last symbol FIXME: generalize
} break;
case LC_DYSYMTAB: {
struct dysymtab_command *dysym = (struct dysymtab_command *)cmd;
if (0==(dysym->nundefsym -= 1)) { // FIXME: generalize
dysym->iundefsym = 0;
}
} break;
case LC_BUILD_VERSION:
case LC_LOAD_DYLIB:
case LC_LOAD_DYLINKER:
case LC_MAIN:
case LC_SOURCE_VERSION: {
for (jargv = 2; jargv < argc; ++jargv) {
if (argv[jargv] && !strcmp(cmd_names[cmd->cmd & 0xFF].name, argv[jargv])) {
argv_done |= 1uL << jargv;
fprintf(stderr, "macho-snip: %#x, %s\n",
cmd_names[cmd->cmd & 0xFF].val, cmd_names[cmd->cmd & 0xFF].name);
goto snip;
}
}
} break;
case LC_DATA_IN_CODE: {
case LC_DYLD_EXPORTS_TRIE:
case LC_DYLD_CHAINED_FIXUPS:
case LC_DYLIB_CODE_SIGN_DRS:
case LC_FUNCTION_STARTS:
case LC_LINKER_OPTIMIZATION_HINT:
case LC_SEGMENT_SPLIT_INFO: {
for (jargv = 2; jargv < argc; ++jargv) {
if (argv[jargv] && !strcmp(cmd_names[cmd->cmd & 0xFF].name, argv[jargv])) {
argv_done |= 1uL << jargv;
fprintf(stderr, "macho-snip: %#x, %s\n",
cmd_names[cmd->cmd & 0xFF].val, cmd_names[cmd->cmd & 0xFF].name);
goto snip_linkedit_data_command;
}
}
} break;
}
cmd = (struct load_command *)(cmdsize + (void *)cmd);
continue;
snip_linkedit_data_command: ;
struct linkedit_data_command *ldc = (struct linkedit_data_command *)cmd;
end_datasize = ldc->datasize;
end_dataoff = ldc->datasize + ldc->dataoff;
memset(addr + ldc->dataoff, 0, end_datasize); // the linkedit_data
if ((linkedit->fileoff + linkedit->filesize) == end_dataoff) {
linkedit->filesize -= end_datasize; // trim
}
snip: ;
memmove(cmd, cmd_next, headway);
memset(headway + (char *)cmd, 0, cmdsize); // space that was vacated
cmd_next = cmd; // we moved *cmd_next to *cmd
mhdr->sizeofcmds -= cmdsize;
mhdr->ncmds -= 1;
argv[jargv] = 0; // snip only once per argv[]
} // switch
cmd = cmd_next;
} // ncmds
argv_done |= (1<<1) | (1<<0); // argv[0,1] do not name linker_commands
if (~(~0uL << argc) != argv_done) {
int j;
for (j=2; j < argc; ++j) {
if (!((1uL << j) & argv_done)) {
fprintf(stderr, "macho-snip warning: %s not processed\n", argv[j]);
}
}
}
if (!(MAP_SHARED & flags)) {
write(1, addr, st.st_size);
}
return 0; // success
}
/* vim:set ts=4 sw=4 et: */

View File

@ -0,0 +1,4 @@
.align 4
// start: .globl start // for standalone
_main: .globl _main // for -lc
udf 123